def get_downsampled_data(name): f = mmap_array.MmapArrayFile(name + "-downsampled") if not f.exists(): original = get_sample_data(name) npt_lsst = np.ceil(np.ptp(original[:, 0]) / 1.6).astype('int') f.write(original[::npt_lsst, :]) return f.read()
def get_sample_data(name): source_filename = os.path.join(lc_data_dir, name + '.csv') if not os.path.isfile(source_filename): raise EnvironmentError("Expected to find a file {!r}".format(source_filename)) f = mmap_array.MmapArrayFile(name) if not f.exists(): data = pd.read_csv(source_filename, names=['time', 'value', 'error']).as_matrix() entries, num_cols = data.shape assert num_cols == 3, "sanity check" assert 100 < entries < int(1e9), "sanity check" f.write(data) return f.read()
from __future__ import print_function import argparse import glob import os.path import sys import pickle import numpy as np import pandas as pd from justice import mmap_array sn_dir = os.path.join(mmap_array.default_array_dir, 'sn_phot_cc') index_filename = os.path.join(sn_dir, 'index_df.pickle') all_lc_data = mmap_array.MmapArrayFile('all', array_dir=sn_dir, order='C') def parse_file(filename): # Everything seems to be of the form "A: rest of line", so parse this first. lines_with_type_tag = [] with open(filename, 'r') as f: for line in f: line = line.strip() if not line or line.startswith("#"): continue typ, value = line.split(":", 1) lines_with_type_tag.append((typ, value.strip())) # Get the type tag, there should be only one. sn_type, = (int(l) for typ, l in lines_with_type_tag if typ == "SNTYPE")
import os.path import pickle import random import time import numpy as np import pandas as pd from justice import mmap_array pickle_file = os.path.abspath( os.path.join(os.path.abspath(__file__), "../../gaia-selection.pickle") ) gaia_dir = os.path.join(mmap_array.default_array_dir, 'gaia') source_id_to_ranges_index = mmap_array.MmapArrayFile( 'source_id_to_ranges_index', array_dir=gaia_dir, order='C') all_lc_data = mmap_array.MmapArrayFile('all', array_dir=gaia_dir, order='C') def write_mmap_file(): if not os.path.isfile(pickle_file): raise ValueError("Please download the GAIA pickle file (see module docstring).") with open(pickle_file, 'rb') as f: data = pickle.load(f) assert isinstance(data, pd.DataFrame) data = data.sort_values('source_id').reset_index() index_data = [ (source_id, min(group.index), max(group.index)) for source_id, group in data.groupby("source_id")