def setUp(self): self.annots = [] parser = data_parser.Parser() with open(test.SMALL_DEL_FILE) as in_f: for annot in parser.iparse(in_f, data_parser.delicious_flickr_parser): self.annots.append(annot)
def create_annots(self, fpath): parser = data_parser.Parser() annots = [] with open(fpath) as in_f: for annot in parser.iparse(in_f, data_parser.delicious_flickr_parser): annots.append(annot) return annots
def main(args=[]): if len(args) < 4: types = '{flickr, delicious, bibsonomy, connotea, citeulike, lt}' print('Usage %s %s %s %s %s' % (args[0], '<annotation_file>', '<database_file>', '<ids folder>', '<ftype = %s>' % types), file=sys.stderr) return 1 func_map = { 'bibsonomy': data_parser.bibsonomy_parser, 'citeulike': data_parser.citeulike_parser, 'connotea': data_parser.connotea_parser, 'delicious': data_parser.delicious_flickr_parser, 'flickr': data_parser.delicious_flickr_parser, 'lt': data_parser.library_thing_parser } in_fpath = args[1] db_fpath = args[2] ids_folder = args[3] func_name = args[4] if func_name not in func_map: print('ftype %s unknown' % func_name) return 1 parse_func = func_map[func_name] #Saving Table to PyTables parser = data_parser.Parser() with open(in_fpath) as annotf, AnnotWriter(db_fpath) as writer: writer.create_table(func_name) for annotation in parser.iparse(annotf, parse_func, sys.stderr): writer.append_row(annotation) #Saving IDs to text files user_ids = parser.user_ids item_ids = parser.item_ids tag_ids = parser.tag_ids with open(os.path.join(ids_folder, func_name + '.user'), 'w') as userf: for user in sorted(user_ids, key=user_ids.__getitem__): print(user[1], user_ids[user], file=userf) with open(os.path.join(ids_folder, func_name + '.items'), 'w') as itemsf: for item in sorted(item_ids, key=item_ids.__getitem__): print(item[1], item_ids[item], file=itemsf) with open(os.path.join(ids_folder, func_name + '.tags'), 'w') as tagsf: for tag in sorted(tag_ids, key=tag_ids.__getitem__): print(tag[1], tag_ids[tag], file=tagsf)
def write_good_annots(database, table, new_database, good_items): '''Writes new annotations based on filters''' with AnnotReader(database) as reader, AnnotWriter(new_database) as writer: reader.change_table(table) writer.create_table(table) iterator = reader.iterate(query={'item': {'$in': good_items}}) parser = data_parser.Parser() iparse = parser.iparse(iterator, data_parser.json_parser) for new_annot in iparse: writer.append_row(new_annot) return parser.user_ids, parser.item_ids, parser.tag_ids
def test_metrics_small_file(self): p = data_parser.Parser() with open(test.SMALL_DEL_FILE) as f: annots = [ a for a in p.iparse(f, data_parser.delicious_flickr_parser) ] item_tag_frequencies, collection_item_frequency, \ collection_tag_frequency1 = create_metrics_index(annots, 'item', 'tag') user_tag_frequencies, collection_user_frequency, \ collection_tag_frequency2 = create_metrics_index(annots, 'user', 'tag') self.assertEquals(collection_item_frequency[0], 5) self.assertEquals(collection_item_frequency[1], 1) self.assertEquals(collection_item_frequency[2], 2) self.assertEquals(collection_item_frequency[3], 1) self.assertEquals(collection_item_frequency[4], 1) self.assertEquals(collection_user_frequency[0], 4) self.assertEquals(collection_user_frequency[1], 4) self.assertEquals(collection_user_frequency[2], 2) self.assertEquals(item_tag_frequencies[0][0], 2) self.assertEquals(item_tag_frequencies[0][1], 1) self.assertEquals(item_tag_frequencies[0][2], 0) self.assertEquals(item_tag_frequencies[0][3], 1) self.assertEquals(item_tag_frequencies[0][4], 1) self.assertEquals(item_tag_frequencies[0][5], 0) self.assertEquals(user_tag_frequencies[2][0], 0) self.assertEquals(user_tag_frequencies[2][1], 0) self.assertEquals(user_tag_frequencies[2][2], 0) self.assertEquals(user_tag_frequencies[2][3], 0) self.assertEquals(user_tag_frequencies[2][4], 1) self.assertEquals(user_tag_frequencies[2][5], 1) self.assertEquals(collection_tag_frequency1[0], 3) self.assertEquals(collection_tag_frequency1[1], 3) self.assertEquals(collection_tag_frequency1[2], 1) self.assertEquals(collection_tag_frequency1[3], 1) self.assertEquals(collection_tag_frequency1[4], 1) self.assertEquals(collection_tag_frequency1[5], 1) self.assertEquals(collection_tag_frequency1, collection_tag_frequency2)
def base_tfunc(self, fpath, parse_func): ''' This simple test writes annotations to h5 file and reads them back. Comparing if both are equal. ''' parser = data_parser.Parser() written_list = [] n_lines = 0 with open(fpath) as in_f: with annotations.AnnotWriter(self.h5_file) as writer: writer.create_table('bibs') for annot in parser.iparse(in_f, parse_func): written_list.append(tuple(sorted(annot.items()))) writer.append_row(annot) n_lines += 1 read_list = [] with annotations.AnnotReader(self.h5_file) as reader: reader.change_table('bibs') for annot in reader.iterate(): read_list.append(tuple(sorted(annot.items()))) self.assertEquals(n_lines, len(written_list)) self.assertEquals(read_list, written_list)
def test_iparse(self): fakef = StringIO.StringIO() fakef.writelines([ DELICIOUS_LINE1 + '\n', DELICIOUS_LINE2 + '\n', DELICIOUS_LINE3 + '\n', DELICIOUS_LINE4 + '\n', DELICIOUS_LINE5 ]) fakef.seek(0) p = data_parser.Parser() annots = [ a for a in p.iparse(fakef, data_parser.delicious_flickr_parser) ] self.assertEqual(0, annots[0]['user']) self.assertEqual(0, annots[0]['item']) self.assertEqual(0, annots[0]['tag']) self.assertEqual('2003-01-01 01:00:00', convert_time(annots[0]['date'])) self.assertEqual(0, annots[1]['user']) self.assertEqual(1, annots[1]['item']) self.assertEqual(1, annots[1]['tag']) self.assertEqual('2011-02-17 11:10:20', convert_time(annots[1]['date'])) self.assertEqual(1, annots[2]['user']) self.assertEqual(1, annots[2]['item']) self.assertEqual(2, annots[2]['tag']) self.assertEqual(2, annots[3]['user']) self.assertEqual(1, annots[3]['item']) self.assertEqual(0, annots[3]['tag']) self.assertEqual(3, annots[4]['user']) self.assertEqual(2, annots[4]['item']) self.assertEqual(3, annots[4]['tag'])
def test_with_file(self): p = data_parser.Parser() with open(test.BIBSONOMY_FILE) as f: annots = [a for a in p.iparse(f, data_parser.bibsonomy_parser)] self.assertEquals(10000, len(annots))
def __init_test(self, annot_file): parser = data_parser.Parser() with open(annot_file) as in_f: for annot in parser.iparse(in_f, data_parser.delicious_flickr_parser): self.annots.append(annot)