Exemplo n.º 1
0
    def __init__(self, df_cols=['mjd', 'flux']):
        fns = ['training_set']
        self.raw = m.read_multi(fns, fillna=True)[0]
        self.df = self.raw[[m.ID] + df_cols]
        self.classes = self.df_meta['target'].unique()
        self.classes.sort()

        self.class_list = self.classes.tolist()

        self.id_group = self.df.groupby(by=m.ID, as_index=False)
        self.objs = [elt for elt in self.id_group]
Exemplo n.º 2
0
    def __init__(self):
        self.fns = ['training_set', 'training_set_metadata']

        self.df, self.meta_df = m.read_multi(self.fns)
        self.meta_df = self.meta_df.fillna(0)

        self.merged = pd.merge(self.df, self.meta_df, on=m.ID)
        self.time_series = self.merged.sort_values(by='mjd')
        self.times = time_series[['mjd']].values.flatten()
        self.data = time_series.drop('mjd', axis=1)
        self.length = len(self.times)
Exemplo n.º 3
0
    def __init__(self, batch_size=16):

        self.batch_size = batch_size

        fns = ['training_set', 'training_set_metadata']
        self.tr, self.tr_meta = m.read_multi(fns)

        self.raw = pd.merge(self.tr, self.tr_meta, on='object_id')
        self.raw = self.raw.fillna(0)

        # is it hacking to give the model obj_id?
        self.obj_ids = self.raw['object_id']

        self.df = self.raw.drop(['object_id', 'target'], axis=1)

        self.t = self.df['mjd']  # 1D list of values to calculate Y for in ODE
        self.y = self.df.drop('mjd', axis=1)

        self.y_dim = len(self.y.columns)

        self.train_len = len(self.df)
Exemplo n.º 4
0
    def __init__(self, cols=[m.ID, 'mjd', 'flux']):
        # doesn't read the absurdly large test set sample

        self.fns = ['training_set', 'training_set_metadata']
        self.df, self.meta_df = m.read_multi(self.fns)
        self.raw = self.df

        self.df = self.df[cols]

        self.df_grouped = self.df.groupby(by=m.ID, as_index=False)

        # self.target_classes = self.meta_df['target'].unique()
        # self.target_classes.sort()

        # self.class_list = self.target_classes.tolist()

        # self.merged = pd.merge(self.df, self.meta_df, on=m.ID)
        # self.merged= self.merged.fillna(0).astype(np.float32)

        # self.grouped = self.merged.groupby(by=m.ID, as_index=False)

        # self.grouped = self.merged.groupby(by=[m.ID, 'passband'], as_index=False)

        self.unscaled_objs = [obj[1] for obj in self.df_grouped]
Exemplo n.º 5
0
    def __init__(self, conv=True, df_cols=['mjd', 'flux']):
        fns = ['training_set', 'training_set_metadata']
        self.raw, self.raw_meta = m.read_multi(fns, fillna=True)
        self.conv = conv

        self.df = self.raw[[m.ID] + df_cols]
        self.df_meta = self.raw_meta[['object_id',
                                      'target']].sort_values(by=m.ID)
        val_counts = self.df['object_id'].value_counts()
        self.seq_max_len = val_counts.max()
        self.input_dim = len(df_cols)

        self.classes = self.df_meta['target'].unique()
        # print(f'classes: {self.classes}, type {type(self.classes)}')
        self.classes.sort()
        # print(f'sorted: {self.classes}')
        self.class_list = self.classes.tolist()

        self.id_group = self.df.groupby(by=m.ID, as_index=False)
        self.objs = [elt for elt in self.id_group]

        self.obj_count = len(self.objs)
        self.tups = []
        self.create_tuples()
Exemplo n.º 6
0
    def __init__(self):
        self.fns = [
            'training_set', 'training_set_metadata', 'test_set_sample',
            'test_set_metadata'
        ]

        self.df, self.meta_df, self.test_df, self.test_meta = m.read_multi(
            self.fns)

        self.set_list = [self.df, self.meta_df, self.test_df, self.test_meta]

        # fill in Na

        self.tr_objs = [
            obj for obj in self.df.groupby(by=m.ID, as_index=False)
        ]
        self.te_objs = [
            obj for obj in self.df.groupby(by=m.ID, as_index=False)
        ]

        self.tr_objs_pb = [
            obj
            for obj in self.df.groupby(by=[m.ID, 'passband'], as_index=False)
        ]
        self.te_objs_pb = [
            obj
            for obj in self.df.groupby(by=[m.ID, 'passband'], as_index=False)
        ]

        self.seq_max_len = self.df[m.ID].value_counts().max()
        '''
		was asserting that df1.size + df2.size == merged
		this is not the case, and theres isnt some cool compression used to prevent
		duplicating metadata
		pd.merge will be fine for now
		'''

        self.merged = pd.merge(self.df, self.meta_df, on=m.ID)
        self.test_merged = pd.merge(self.test_df, self.test_meta, on=m.ID)

        self.merged = self.merged.fillna(0).astype(np.float32)
        self.test_merged = self.test_merged.fillna(0).astype(np.float32)

        self.grouped = self.merged.groupby(by=m.ID, as_index=False)
        self.test_grouped = self.test_merged.groupby(by=m.ID, as_index=False)

        self.unscaled_objs = [obj[1] for obj in self.grouped]
        self.test_unscaled_obj = [obj[1] for obj in self.test_grouped]

        self.merged = m.scale_df(self.merged)
        self.test_merged = m.scale_df(self.test_merged)

        self.merged_objs = [obj[1] for obj in self.grouped]
        self.test_merged_objs = [obj[1] for obj in self.test_grouped]

        # self.merged_pbs = [obj[1] for ]

        self.input_size = len(
            self.merged.columns) - 2  # -2 for the obj id and target

        self.target_classes = self.meta_df['target'].unique()
        self.target_classes.sort()

        self.class_list = self.target_classes.tolist()

        self.output_size = len(self.target_classes)

        print('demo initialized\n')