/usr/lib/python3/dist-packages/astroML/datasets/nasa_atlas.py is in python3-astroml 0.3-6.
This file is owned by root:root, with mode 0o644.
The actual contents of the file can be viewed below.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 | """
NASA Sloan Atlas dataset size reduction
---------------------------------------
The NASA Sloan Atlas dataset is contained in a ~0.5GB available at
http://www.nsatlas.org/data
This function fetches a ~50MB subset of that data. This subset is created
using the code that can be found at examples/datasets/truncate_nsa_data.py
"""
from __future__ import print_function, division
import os
import numpy as np
from .tools import download_with_progress_bar
from . import get_data_home
DATA_URL = ('http://www.astro.washington.edu/users/ivezic/'
'DMbook/nsa_v0_1_2_reduced.npy')
ARCHIVE_FILE = os.path.basename(DATA_URL)
def fetch_nasa_atlas(data_home=None,
download_if_missing=True):
"""Loader for NASA galaxy atlas data
Parameters
----------
data_home : optional, default=None
Specify another download and cache folder for the datasets. By default
all scikit learn data is stored in '~/astroML_data' subfolders.
download_if_missing : optional, default=True
If False, raise a IOError if the data is not locally available
instead of trying to download the data from the source site.
Returns
-------
data : ndarray
The data, in the form of a numpy record array.
Notes
-----
This is the file created by the example script at
examples/datasets/truncate_nsa_data.py
For an explanation of the meaning of the fields, see the description at
http://www.nsatlas.org/data
"""
data_home = get_data_home(data_home)
if not os.path.exists(data_home):
os.makedirs(data_home)
archive_file = os.path.join(data_home, ARCHIVE_FILE)
if not os.path.exists(archive_file):
if not download_if_missing:
raise IOError('data not present on disk. '
'set download_if_missing=True to download')
print("downloading NASA atlas data from %s to %s"
% (DATA_URL, data_home))
buf = download_with_progress_bar(DATA_URL, return_buffer=True)
data = np.load(buf)
np.save(archive_file, data)
else:
data = np.load(archive_file)
return data
|