Пример #1
0
def get_catalog():
    pth = os.path.join(os.path.dirname(__file__), 'data', 'catalog.pkl')

    if running_on_cloud():
        return pickle.load(bucket.getf('catalog.pkl'))

    return pickle.load(open(pth))
Пример #2
0
def get_catalog():
    pth = os.path.join(os.path.dirname(__file__), 'data', 'catalog.pkl')

    if running_on_cloud():
        return pickle.load(bucket.getf('catalog.pkl'))

    return pickle.load(open(pth))
Пример #3
0
    def negatives_iterator(self, samples_per_field=None):
        """Yield an infinite sequence of offset stamp parameters

        Parameters
        ----------
        samples_per_field : int (optional)
            How many samples to generate before switching
            longitues by >1 deg. Tuning this can improve
            IO performance.

        Yields
        ------
        An infinite sequence of (field_lon, lcen, bcen, rad)
        """
        cat = get_catalog()
        l0, b0, r0 = cat[:, 0], cat[:, 1], cat[:, 3]

        while True:
            lon = self._random_field()
            nsample = samples_per_field
            if nsample is None:
                nsample = 30000 if running_on_cloud() else 500

            l = np.random.uniform(-.5, .5, nsample) + lon
            b = np.random.uniform(-.8, .8, nsample)
            r = np.random.choice(r0, size=nsample)

            bad = overlap(l, b, r, l0, b0, r0)
            l, b, r = l[~bad], b[~bad], r[~bad]

            for i in range(l.size):
                yield lon, l[i], b[i], r[i]
Пример #4
0
    def negatives_iterator(self, samples_per_field=None):
        """Yield an infinite sequence of offset stamp parameters

        Parameters
        ----------
        samples_per_field : int (optional)
            How many samples to generate before switching
            longitues by >1 deg. Tuning this can improve
            IO performance.

        Yields
        ------
        An infinite sequence of (field_lon, lcen, bcen, rad)
        """
        cat = get_catalog()
        l0, b0, r0 = cat[:, 0], cat[:, 1], cat[:, 3]

        while True:
            lon = self._random_field()
            nsample = samples_per_field
            if nsample is None:
                nsample = 30000 if running_on_cloud() else 500

            l = np.random.uniform(-.5, .5, nsample) + lon
            b = np.random.uniform(-.8, .8, nsample)
            r = np.random.choice(r0, size=nsample)

            bad = overlap(l, b, r, l0, b0, r0)
            l, b, r = l[~bad], b[~bad], r[~bad]

            for i in range(l.size):
                yield lon, l[i], b[i], r[i]
Пример #5
0
def _has_field(lon):
    """Return True if image data exists for a given longitude"""
    base = os.path.join(os.path.dirname(__file__), 'data', 'galaxy',
                        'registered')
    if not running_on_cloud():
        return os.path.exists(os.path.join(base, "%3.3i_i4.fits" % lon)) and \
            os.path.exists(os.path.join(base, "%3.3i_mips.fits" % lon))
    else:
        return bucket.exists("%3.3i_i4.fits" % lon) and \
            bucket.exists("%3.3i_mips.fits" % lon)
Пример #6
0
def _has_field(lon):
    """Return True if image data exists for a given longitude"""
    base = os.path.join(os.path.dirname(__file__), 'data', 'galaxy',
                        'registered')
    if not running_on_cloud():
        return os.path.exists(os.path.join(base, "%3.3i_i4.fits" % lon)) and \
            os.path.exists(os.path.join(base, "%3.3i_mips.fits" % lon))
    else:
        return bucket.exists("%3.3i_i4.fits" % lon) and \
            bucket.exists("%3.3i_mips.fits" % lon)
Пример #7
0
def get_field(lon):
    """Create and return a new field appropriate
    for running locally or on PiCloud

    The previous return value is cached,
    to avoid repeated I/O for repeated
    requests for the same field
    """
    global _cached_field

    if _cached_field is not None and _cached_field.lon == lon:
        return _cached_field

    del _cached_field

    logging.getLogger(__name__).debug("Loading a new field at l=%i" % lon)

    if running_on_cloud():
        result = CloudField(lon)
    else:
        result = Field(lon)

    _cached_field = result
    return result
Пример #8
0
def get_field(lon):
    """Create and return a new field appropriate
    for running locally or on PiCloud

    The previous return value is cached,
    to avoid repeated I/O for repeated
    requests for the same field
    """
    global _cached_field

    if _cached_field is not None and _cached_field.lon == lon:
        return _cached_field

    del _cached_field

    logging.getLogger(__name__).debug("Loading a new field at l=%i" % lon)

    if running_on_cloud():
        result = CloudField(lon)
    else:
        result = Field(lon)

    _cached_field = result
    return result
Пример #9
0
    def random_iterator(self, samples_per_field=None):
        """Yield an infinite sequence of random stamp parameters

        The random sample is generated using Field.random_stamps

        Parameters
        ----------
        samples_per_field : int (optional)
            How many samples to generate before switching
            longitues by >1 deg. Tuning this can improve
            IO performance.

        Yields
        ------
        An infinite sequence of (field_lon, lcen, bcen, rad)
        """
        while True:
            lon = self._random_field()
            nsample = samples_per_field
            if nsample is None:
                nsample = 30000 if running_on_cloud() else 500

            for f in get_field(lon).random_stamps(nsample):
                yield f
Пример #10
0
    def random_iterator(self, samples_per_field=None):
        """Yield an infinite sequence of random stamp parameters

        The random sample is generated using Field.random_stamps

        Parameters
        ----------
        samples_per_field : int (optional)
            How many samples to generate before switching
            longitues by >1 deg. Tuning this can improve
            IO performance.

        Yields
        ------
        An infinite sequence of (field_lon, lcen, bcen, rad)
        """
        while True:
            lon = self._random_field()
            nsample = samples_per_field
            if nsample is None:
                nsample = 30000 if running_on_cloud() else 500

            for f in get_field(lon).random_stamps(nsample):
                yield f
Пример #11
0
import csv
import json
import os
import pickle
from os.path import join as path_join
from os.path import isfile
import joblib
import numpy as np
from sklearn.preprocessing import LabelEncoder
from scipy.io import mmread
from sklearn.metrics import mean_absolute_error
from sklearn.base import clone
from scipy.sparse import coo_matrix, hstack, vstack
try:
    import cloud
    on_cloud = cloud.running_on_cloud()
except Exception as e:
    on_cloud = False


#def read_column(filename, column_name):
    #"""returns generator with values in column_name in filename"""
    #csv_file = csv.reader(open(filename, 'r'))
    #header = csv_file.next()
    #print header
    #if column_name not in header:
        #raise Exception("Column '%s' is not in header! Header: %s" % (column_name, ",".join(header)))
    #column_index = header.index(column_name)
    #for line in csv_file:
        #yield line[column_index]
Пример #12
0
def data_path():
  if cloud.running_on_cloud():
    return "/bucket/"
  else:
    return os.path.join(os.path.dirname(__file__), "../../data/")
Пример #13
0
import os

#import PyWiseRF
from sklearn.ensemble import RandomForestClassifier
import cloud
import numpy as np

if cloud.running_on_cloud():
    os.environ['WISERF_ROOT'] = '/home/picloud/WiseRF-1.5.9-linux-x86_64-rc2'


def test():
    x = np.random.random((5, 5))
    y = np.array([1, 1, 1, 0, 0])
    clf = WiseRF().fit(x, y)
    return clf

#class WiseRF(PyWiseRF.WiseRF):
#    def decision_function(self, x):
#        p = self.predict_proba(x)
#        return p[:, 1] - p[:, 0]

class WiseRF(RandomForestClassifier):
    def decision_function(self, x):
        p = self.predict_proba(x)
        return p[:, 1] - p[:, 0]
Пример #14
0
import os

import PyWiseRF
import cloud
import numpy as np

if cloud.running_on_cloud():
    os.environ['WISERF_ROOT'] = '/home/picloud/WiseRF-1.5.9-linux-x86_64-rc2'


def test():
    x = np.random.random((5, 5))
    y = np.array([1, 1, 1, 0, 0])
    clf = WiseRF().fit(x, y)
    return clf

class WiseRF(PyWiseRF.WiseRF):
    def decision_function(self, x):
        p = self.predict_proba(x)
        return p[:, 1] - p[:, 0]