def test_json_parser(self): self.override_annotation('T1', None, ['Real', 'yes', 1], ['Bogus', 'no', 0]) from pprint import pprint parser = parsers.ClassificationParser('json') pprint(self.test_json) d = parser.process(self.test_json) compare = { 'classification_id': 60910323, 'user_id': 1437100, 'workflow': 1737, 'time_stamp': datetime.datetime(2017, 6, 22, 11, 29, 50, 609000), 'session_id': '53244efffa0eddf43255b5f37b6b462242f39141f3373215d45054da4b1d9066', 'live_project': True, 'seen_before': False, 'annotation': 1, 'subject_id': 1053214149494, } print(d) for key, value in compare.items(): print(key, value) assert d[key] == value assert len(d) == len(compare)
def upload_project_dump(self, fname): logger.info('dropping collection') self._rebuild() logger.info('parsing csv dump') data = [] pp = parsers.ClassificationParser('csv') with open(fname, 'r') as file: reader = csv.DictReader(file) for i, row in enumerate(reader): cl = pp.process(row) if cl is None: continue data.append(cl) sys.stdout.flush() sys.stdout.write("%d records processed\r" % i) if len(data) > 100000: self.collection.insert_many(data) data = [] logger.critical('Parsers skipped %d rows', pp.skipped) self.collection.insert_many(data) self._gen_stats() logger.debug('done')
def test_csv_parser_muon_complex(self): self.override_annotation('T0', '0.details.0.value.0', [3], [-1]) from pprint import pprint parser = parsers.ClassificationParser('csv') pprint(self.test_csv[1]) d = parser.process(self.test_csv[1].copy()) compare = { 'classification_id': 15935073, 'user_id': 1460166, 'workflow': 2473, 'time_stamp': datetime.datetime(2016, 8, 22, 16, 10, 26), 'session_id': '6049552d100cae2a9570c40bee1119cfbca4decae1e50bafffc3cba873793d6f', 'live_project': False, 'seen_before': False, 'annotation': 1, 'subject_id': 3354054, } print(d) for key, value in compare.items(): print(key, value) assert d[key] == value assert len(d) == len(compare)
def test_csv_parser_elephant(self): self.override_annotation('T1', None, ['2'], ['-1']) from pprint import pprint parser = parsers.ClassificationParser('csv') pprint(self.test_csv[0]) d = parser.process(self.test_csv[0].copy()) compare = { 'classification_id': 25656085, 'user_id': 1559523, 'workflow': 3304, 'time_stamp': datetime.datetime(2017, 1, 24, 17, 0, 38), 'session_id': '3eb59fd166f9ace809fd1b611a52b14152e1a86f998625f7cf88f14627fa318d', 'live_project': False, 'seen_before': False, 'annotation': 1, 'subject_id': 458040, } print(d) for key, value in compare.items(): print(key, value) assert d[key] == value assert len(d) == len(compare)
def test_csv_parser_seen_before(self): self.override_annotation('T1', None, ['Real', 'yes', 1], ['Bogus', 'no', 0]) from pprint import pprint parser = parsers.ClassificationParser('csv') cl = self.test_csv[3].copy() pprint(cl) d = parser.process(cl) compare = { 'classification_id': 11423065, 'user_id': 1437100, 'workflow': 1737, 'time_stamp': datetime.datetime(2016, 4, 18, 18, 50, 48), 'session_id': 'b759eab8f4fe3436707edede3094cd5bd30c4812e2a701e48fa7cb13f0068f40', 'live_project': False, 'seen_before': True, 'annotation': 1, 'subject_id': 1935795, } print(d) for key, value in compare.items(): print(key, value) assert d[key] == value assert len(d) == len(compare)
def test_type_mod_timestamp(self): parser = parsers.ClassificationParser(None) t = parser._type('2017-01-24T16:11:24.680Z', 'timestamp') assert t == datetime.datetime(2017, 1, 24, 16, 11, 24, 680000) t = parser._type('2017-01-24 16:11:24 UTC', 'timestamp') assert t == datetime.datetime(2017, 1, 24, 16, 11, 24)
def test_remap(self): field = {'type': int, 'remap': ['b', 'c', 'd', 'e']} parser = parsers.ClassificationParser(None) assert parser._remap({'a': 1}, 'a', field) == 1 assert parser._remap({'b': 2}, 'a', field) == 2 assert parser._remap({'c': 3}, 'a', field) == 3 assert parser._remap({'d': 4}, 'a', field) == 4 assert parser._remap({'e': 5}, 'a', field) == 5
def test_remap_notsource(self): parser = parsers.ClassificationParser(None) field = {'type': int, 'remap': {'json': 'b'}} cl = {'a': 1, 'b': 2} assert parser._remap(cl, 'a', field) == 1
def test_type_mod_float(self): parser = parsers.ClassificationParser(None) assert parser._type('0.1', float) == 0.1
def test_type_mod_bool(self): parser = parsers.ClassificationParser(None) assert parser._type('true', bool) is True assert parser._type(True, bool) is True assert parser._type('True', bool) is True
def test_type_mod_int(self): parser = parsers.ClassificationParser(None) assert parser._type('1', int) == 1 assert parser._type(1, int) == 1