/usr/lib/python3/dist-packages/astroML/datasets/sdss_galaxy_colors.py is in python3-astroml 0.3-6.
This file is owned by root:root, with mode 0o644.
The actual contents of the file can be viewed below.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 | from __future__ import print_function, division
import os
import numpy as np
from . import get_data_home
from .tools import sql_query
SPECCLASS = ['UNKNOWN', 'STAR', 'GALAXY', 'QSO',
'HIZ_QSO', 'SKY', 'STAR_LATE', 'GAL_EM']
NOBJECTS = 50000
GAL_COLORS_DTYPE = [('u', float),
('g', float),
('r', float),
('i', float),
('z', float),
('specClass', int),
('redshift', float),
('redshift_err', float)]
ARCHIVE_FILE = 'sdss_galaxy_colors.npy'
def fetch_sdss_galaxy_colors(data_home=None, download_if_missing=True):
"""Loader for SDSS galaxy colors.
This function directly queries the sdss SQL database at
http://cas.sdss.org/
Parameters
----------
data_home : optional, default=None
Specify another download and cache folder for the datasets. By default
all scikit learn data is stored in '~/astroML_data' subfolders.
download_if_missing : optional, default=True
If False, raise a IOError if the data is not locally available
instead of trying to download the data from the source site.
Returns
-------
data : recarray, shape = (10000,)
record array containing magnitudes and redshift for each galaxy
"""
data_home = get_data_home(data_home)
if not os.path.exists(data_home):
os.makedirs(data_home)
archive_file = os.path.join(data_home, ARCHIVE_FILE)
query_text = ('\n'.join(
("SELECT TOP %i" % NOBJECTS,
" p.u, p.g, p.r, p.i, p.z, s.specClass, s.z, s.zerr",
"FROM PhotoObj AS p",
" JOIN SpecObj AS s ON s.bestobjid = p.objid",
"WHERE ",
" p.u BETWEEN 0 AND 19.6",
" AND p.g BETWEEN 0 AND 20",
" AND s.specClass > 1 -- not UNKNOWN or STAR",
" AND s.specClass <> 5 -- not SKY",
" AND s.specClass <> 6 -- not STAR_LATE")))
if not os.path.exists(archive_file):
if not download_if_missing:
raise IOError('data not present on disk. '
'set download_if_missing=True to download')
print("querying for %i objects" % NOBJECTS)
print(query_text)
output = sql_query(query_text)
print("finished.")
data = np.loadtxt(output, delimiter=',',
skiprows=1, dtype=GAL_COLORS_DTYPE)
np.save(archive_file, data)
else:
data = np.load(archive_file)
return data
|