def attribute_test(message, length = 3): assert len(message) == length assert message[0].tensors() == { 'a': torch.Tensor([1]), 'b': torch.Tensor([4]), } assert message[0].dataframe().equals(pd.DataFrame({ 'c': np.array([7]), 'd': np.array([10]), })) assert message[0] == Message({'a': torch.Tensor([1]),'b': torch.Tensor([4])}, pd.DataFrame({'c': np.array([7]),'d': np.array([10]),})) assert message[1:3].tensors() == { 'a': torch.Tensor([2,3]), 'b': torch.Tensor([5,6]), } assert message[1:3].dataframe().equals(pd.DataFrame({ 'c': np.array([8,9]), 'd': np.array([11,12]), })) assert (message['a'] == torch.Tensor([1,2,3])).all() assert message[['a','c']] == Message({'a': torch.Tensor([1,2,3]), 'c': np.array([7,8,9])}) assert message[1:3] == Message({'a': torch.Tensor([2,3]),'b': torch.Tensor([5,6])}, pd.DataFrame({'c': np.array([8,9]),'d': np.array([11,12])})) # Test length assert len(message) == length
def test_getitem(): m = Message(tensors, vectors) assert m[0] == Message({ 'a': torch.Tensor([1]), 'b': torch.Tensor([4]) }, { 'c': np.array([7]), 'd': np.array([10]) }) assert m[[0, 2]] == Message( { 'a': torch.Tensor([1, 3]), 'b': torch.Tensor([4, 6]) }, { 'c': np.array([7, 9]), 'd': np.array([10, 12]) }) # Check that out of bounds index calls raise errors try: m[3] assert False except IndexError: assert True try: m[3:5] assert False except IndexError: assert True
def test_SQLFactory(): trainer = dummy_trainer() metrics_dict = {'metric': DummyMetric()} generator = dummy_generator() dataloader = dummy_dataloader() params_table = create_table('parameters', columns=[Column('parameters', Integer)]) metrics_table = {'metric': create_table('metrics', columns=[Column('metric', String)])} engine = create_engine('sqlite:///:memory:') sequel = factory.SQLFactory(components={ 'trainer':trainer, 'metrics': metrics_dict, 'parameterizer': generator, 'eval_set': dataloader, 'params_table': params_table, 'metrics_tables': metrics_table, 'engine': engine} ) sequel.run() params, metrics = sequel.read() assert type(params) is Message assert type(metrics) is dict assert set(metrics.keys()) == set(['metric']) assert len(params) == 11 assert len(metrics['metric']) == 11 assert params[5] == Message({'id':[6], 'parameters':[5]}) assert metrics['metric'][5] == Message({'id':[6],'metric': ['hiiiii']}) for mrow, prow in zip(metrics['metric'], params): assert mrow['id'][0] == prow['id'][0]
def test_LoopingPipe(): dumbo = one_way_dummy() loopy = pl.LoopingPipe(dumbo) loopy[10] loopy[5] numba1 = len(loopy) numba2 = len( loopy ) # Call __len__ twice to ensure both compute_length and retrieval of length works assert numba1 == 20 assert numba2 == 20 x = loopy[0] assert x == Message({'count': [0]}) loopy[10] loopy = pl.LoopingPipe(dumbo) x = loopy[0] assert x == Message({'count': [0]}) # Check that the input Pipes were reset. assert loopy.length is None try: # Test if length is implicitly calculated whenever input Pipes run out. loopy[21] except IndexError: assert True else: assert False assert loopy.length == 20
def test_load_experiment(): dirs = os.listdir() all_avengers = [x for x in dirs if x.startswith('avenger')] for man in all_avengers: rmtree(man) dirs = os.listdir() all_avengers = [x for x in dirs if x.startswith('avenger')] assert len(all_avengers) == 0 avenger = exp.Experiment('avenger', os.getcwd(), description='ok') ironman = avenger.get_engine('ironman') saver = dummy_table(ironman) saver.insert( Message({ 'superpower': ['flying', 'walking', 'eating'], 'name': ['flyman', 'walkergirl', 'bumbo'], 'age': [2, 3, 4], })) saver.commit() marvel = exp.load_experiment( os.path.join(avenger.db_path, avenger.save_path)) saver = dummy_table(ironman) rows = saver.query() for row in rows: assert type(row) is Message assert row == Message({ 'superpower': ['eating'], 'name': ['bumbo'], 'age': [4], 'id': [3] }) assert marvel.name == avenger.name assert marvel.iteration == avenger.iteration assert marvel.description == avenger.description assert marvel.timestamp == avenger.timestamp
def test_df(): m = Message(tensors, vectors) df = m.dataframe() assert df.equals(pd.DataFrame(vectors)) df = m.dataframe(keys=['c']) assert df.equals(pd.DataFrame({'c': vectors['c']})) df = m.dataframe(keys=['c','a']) assert (df == (pd.DataFrame({'c': vectors['c'], 'a': np.array(tensors['a'])}))).all().all()
def test_tensors(): m = Message(tensors, vectors) t = m.tensors() assert t == TensorMessage(tensors) t = m.tensors(keys=['a']) assert t == TensorMessage({'a': tensors['a']}) t = m.tensors(keys=['a','c']) assert t == TensorMessage({'a': tensors['a'], 'c': torch.Tensor(vectors['c'])})
def drop_df(batch, exceptions=['SampleID']): """ Drops the dataframe component and leaves only the tensor component of a message. """ keep_df = batch[exceptions] new_batch = Message(keep_df) new_batch = new_batch.merge(batch.tensors()) return new_batch
def test_to_dict(): m = Message(tensors, vectors) md = m.to_dict() assert type(md) is dict assert (md['c'] == md['c']) assert (md['d'] == md['d']) assert (md['a'] == np.array(md['a'])).all() assert (md['b'] == np.array(md['b'])).all()
def test_RandomHubJunction(): a = Message({'x': np.random.rand(100)}) b = Message({'x': np.random.rand(100)}) rob = jn.RandomHubJunction(components={'a': a, 'b': b}) i = 0 for x in rob: i += 1 assert i == 200 for x in rob: i += 1 assert i == 400
def load(self, query, filters_dict, keys=None, fill_value=0): """ Loads from database and filesystem records corresponding to the provided query and filter dict. """ filename = assign_file_name(query, filters_dict) self._path = os.path.join(self.file_directory, filename) # Check if file already exists try: self.message = Message.read('csv', self._path) except: self._create_file(query, filters_dict, keys, fill_value) self.message = Message.read('csv', self._path) self.message['SampleID'] = self.message['Unnamed: 0']
def test_to_dataframe(): mo = Message(tensors,vectors) # no = mo.to_dataframe() # assert no.tensor_message == {} # assert (no['a'] == mo['a']).all() # assert (no['b'] == mo['b']).all() # for letter in ['a','b','c','d']: # assert letter in no.df lo = Message(dtensors, vectors) ok = lo.to_dataframe() for i in range(3): assert (ok['a'][i] == dtensors['a'][i].numpy()).all() assert (ok['b'][i] == dtensors['b'][i].numpy()).all()
def test_Coffer(): m = Message({'a': [1, 2, 3], 'b': torch.tensor([4, 5, 6])}) art1 = artifacts.FireworksArtifact('test.fireworks', m) p = [1, 2, 3, 4, 'hii'] art2 = artifacts.PickleArtifact('test.pickle', p) b = b'hohohooh' art3 = artifacts.BinaryArtifact('test.bin', b) coffee = coffer.DebugCoffer() art_gallery = [art1, art2, art3] coffee.upload(art_gallery) fart_gallery = coffee.download() art_gallery_dict = { 'fireworks': art1.data, 'pickle': art2.data, 'binary': art3.data } fart_gallery_dict = {} for artifact in fart_gallery: if artifact.key.endswith('fireworks'): fart_gallery_dict['fireworks'] = artifact.data if artifact.key.endswith('pickle'): fart_gallery_dict['pickle'] = artifact.data if artifact.key.endswith('binary'): fart_gallery_dict['binary'] = artifact.data for art, fart in zip(art_gallery_dict.values(), fart_gallery_dict.values()): assert art == fart
def test_LRUCache(): m = cache.LRUCache(10, buffer_size=2) dummy_message = Message(tensors, vectors) m[2:5] = dummy_message assert m[2:5] == dummy_message assert m.rank_dict.keys() == m.pointers.keys() m[7:10] = dummy_message assert m[2:5] == dummy_message assert m[7:10] == dummy_message assert m.rank_dict.keys() == m.pointers.keys() m[12:15] = dummy_message assert m[2:5] == dummy_message assert m[7:10] == dummy_message assert m[12:15] == dummy_message assert m.rank_dict.keys() == m.pointers.keys() # At this point, the least recently used elements are in the beginning m[15:18] = dummy_message assert m[15:18] == dummy_message assert len(m) == 10 assert m.rank_dict.keys() == m.pointers.keys() assert not 2 in m assert not 3 in m assert set([4, 7, 8, 9, 12, 13, 14, 15, 16, 17]) == set(m.rank_dict.keys()) m[[ 4, 8, 9, 13, 14, 15, 16 ]] # Trigger __getitem__; Now 7, 12 and 17 should be queueud for deletion m[18:21] = dummy_message assert len(m) == 10 for i in [7, 12, 17]: assert not i in m # Trigger a buffer clearance. m[29] = dummy_message[0] assert len(m) == 9
def iteration_completed(self, engine): iter = (engine.state.iteration - 1) if iter % self.log_interval == 0: current_state = Message.from_objects( deepcopy(engine.state.output['state'])) current_state['iteration'] = [iter] self.model_state = self.model_state.append(current_state)
def test_roc_trainer(): data = get_test_examples() metanet = module.MetaphlanNet( components={'widths': [12365, 6000, 2000, 55]}) classifier = module.DiseaseClassifier(components={ "in_width": 55, "out_width": 49 }, input=metanet) if torch.cuda.is_available(): data.cuda() metanet.cuda() classifier.cuda() bce = nn.BCELoss() def loss(batch): return bce(batch['predictions'], batch['label']) trainer = hyperfactory.roc_trainer(classifier, loss, components={ "in_width": 55, "out_width": 49 }, input=metanet) evaluator = trainer(Message({'roc_bias': torch.ones(49)})) assert hasattr(evaluator, 'run') evaluator.run(data)
def test_LFUCache(): m = cache.LFUCache(10, buffer_size=2) dummy_message = Message(tensors, vectors) m[2:5] = dummy_message assert m[2:5] == dummy_message assert m.rank_dict.keys() == m.pointers.keys() m[7:10] = dummy_message assert m[2:5] == dummy_message assert m[7:10] == dummy_message assert m.rank_dict.keys() == m.pointers.keys() m[12:15] = dummy_message assert m[2:5] == dummy_message assert m[7:10] == dummy_message assert m[12:15] == dummy_message assert m.rank_dict.keys() == m.pointers.keys() # At this point, 12:15 are the least frequently used elements. m[15:18] = dummy_message assert m[15:18] == dummy_message assert len(m) == 10 assert m.rank_dict.keys() == m.pointers.keys() assert not 12 in m assert not 13 in m assert set([2, 3, 4, 7, 8, 9, 14, 15, 16, 17]) == set(m.rank_dict.keys()) # Trigger __getitem__ m[29] = dummy_message[0] assert len(m) == 9
def test_LocalMemoryFactory(): trainer = dummy_trainer() metrics_dict = {'metric': DummyMetric()} generator = dummy_generator() dataloader = dummy_dataloader() memfactory = factory.LocalMemoryFactory(components={"trainer": trainer, "metrics": metrics_dict, 'parameterizer': generator, 'eval_set': dataloader}) memfactory.run() params, metrics = memfactory.read() assert type(params) is Message assert type(metrics) is defaultdict assert set(metrics.keys()) == set(['metric']) assert len(params) == 11 assert len(metrics['metric']) == 11 assert params[5] == Message({'parameters':[5]}) assert metrics['metric'][5] == Message({'metric': ['hiiiii']})
def test_Model_inferencing(): damura = DummyModel({'m': [2.]}) x = Message({'x': torch.Tensor([1, 2, 3])}) y = damura(x) assert y == x assert (y['x'] == torch.Tensor([1, 2, 3])).all() assert (y['y'] == torch.Tensor([2., 4., 6.])).all()
def read_data(): sampleids = pd.read_csv('SampleID.csv') examples = torch.load('examples.torch') label = torch.load('label.torch') data = Message({'examples': examples, 'label': label}, sampleids) return data
def test_init_set(): m = cache.UnlimitedCache() dummy_message = Message(tensors, vectors) assert len(m) == 0 m[0] = dummy_message[0] assert len(m) == 1 m[3:6] = dummy_message assert len(m) == 4 assert m[0] == dummy_message[0] assert m[3:6] == dummy_message dummy_2 = Message(tensors2, vectors2) m[3:6] = dummy_2 assert len(m) == 4 assert m[0] == dummy_message[0] assert m[3:6] == dummy_2 m[7:10] = dummy_message assert m[7:10] == dummy_message
def test_cat(): m = Message(tensors, vectors) m0 = m[0] m1 = m[1] m2 = m[2] babaghanush = messi.cat([m0,m1,m2]) assert babaghanush == m
def test_permute(): m = cache.UnlimitedCache() dummy_message = Message(tensors, vectors) m[3:6] = dummy_message assert m.cache == dummy_message m._permute([2, 1, 0]) assert m.cache != dummy_message m._permute([2, 1, 0]) assert m.cache == dummy_message
def compute(self): if self.num_examples == 0: raise NotComputableError( "Metric must have at least one example before it can be computed." ) return Message({ 'average-loss': [self.l2 / self.num_examples] }).to_dataframe()
def test_save_load(): m = Message(tensors, vectors) test_path = 'test.fireworks' m.save(test_path) new_m = Message.load(test_path) assert new_m == m os.remove(test_path) buffer = BytesIO() m.save(buffer) buffed_m = Message.load(buffer) assert buffed_m == m
def __getitem__(self, index): index = index_to_list(index) if index == []: return None elif max(index) < self.length and min(index) >= 0: return Message({'values': np.array(index)}) else: raise IndexError("Out of bounds for dummy pipe with length {0}.".format(self.length))
def generate_linear_model_data(n=300): """ Generates n samples from a linear model with a small variability. """ m = randint(-3,3) b = randint(-10,10) x = np.random.rand(n)*100 errors = np.random.normal(0, .4, n) # Gaussian samples for errors y = m*x+b + errors return Message({'x':x, 'y_true':y}), {'m': m, 'b': b, 'errors': errors} # Second dict is for debugging
def test_RepeaterPipe(): dumbo = one_way_iter_dummy() robert = pl.RepeaterPipe(dumbo) numbaz = Message() assert len(numbaz) == 0 for numba in robert: numbaz = numbaz.append(numba) assert len(numbaz) == robert.repetitions * 20 dumbo = one_way_dummy() robert = pl.RepeaterPipe(dumbo) numbaz = Message() robert.reset() i = 0 assert len(numbaz) == 0 while True: try: numbaz = numbaz.append(robert.__next__()) i += 1 if i > 1000: # If something goes horribly wrong, cancel test assert False except StopIteration: break assert len(numbaz) == robert.repetitions * 20
def test_make_row(): tab = dummy_table('bubsy') tom = tab(name='ok', values=33) assert tom.name == 'ok' assert tom.values == 33 engine = create_engine('sqlite:///:memory:', echo=True) pipe = db.TablePipe(tab, engine) message = Message({'name': ['a','b'], 'values': [1,2]}) row = pipe.make_row(message[0]) assert row.name == 'a' assert row.values == 1
def __getitem__(self, index): if type(index) is list: index = [i for i in index] if type(index) is slice: step = index.step or 1 index = [i for i in range(index.start, index.stop, step)] if index == []: return None elif max(index) < self.length and min(index) >= 0: return Message({'name': 'johnny', 'values': np.array(index)}) else: raise IndexError("Out of bounds for dummy pipe with length {0}.".format(self.length))