def test_format_person_various_places_appendix(self): """Test various variations of place appendix.""" expected_cat_meta = [] # all expected_name = 'City, Country' self.unset_kuenstler_params([ 'KueVorNameS', 'KueNameS', 'KudJahrVonL', 'KudJahrBisL', 'KueFunktionS' ]) self.assertEqual( MakeInfo.format_person(self.mock_info, '123', self.cat_meta), expected_name) # City only expected_name = 'City' self.unset_kuenstler_params(['KudLandS']) self.assertEqual( MakeInfo.format_person(self.mock_info, '123', self.cat_meta), expected_name) # Country only expected_name = 'Country' self.unset_kuenstler_params(['KudOrtS']) self.reset_kuenstler_params(['KudLandS']) self.assertEqual( MakeInfo.format_person(self.mock_info, '123', self.cat_meta), expected_name) # Assert none of these added to cat_meta self.assertItemsEqual(self.cat_meta, expected_cat_meta)
def test_format_person_various_places_appendix(self): """Test various variations of place appendix.""" expected_cat_meta = [] # all expected_name = 'City, Country' self.unset_kuenstler_params( ['KueVorNameS', 'KueNameS', 'KudJahrVonL', 'KudJahrBisL', 'KueFunktionS']) self.assertEqual( MakeInfo.format_person(self.mock_info, '123', self.cat_meta), expected_name) # City only expected_name = 'City' self.unset_kuenstler_params(['KudLandS']) self.assertEqual( MakeInfo.format_person(self.mock_info, '123', self.cat_meta), expected_name) # Country only expected_name = 'Country' self.unset_kuenstler_params(['KudOrtS']) self.reset_kuenstler_params(['KudLandS']) self.assertEqual( MakeInfo.format_person(self.mock_info, '123', self.cat_meta), expected_name) # Assert none of these added to cat_meta self.assertItemsEqual(self.cat_meta, expected_cat_meta)
def run(in_path=None, log_file=None): # set defaults unless overridden in_path = in_path or CSV_DIR_CLEAN log_file = log_file or LOG_FILE # stop if in_path doesn't exist if not os.path.isdir(in_path): print u'%s is not a valid path' % in_path exit(0) # create out_path if it doesn't exist if type(log_file) == str: log_file = unicode(log_file) # create log file f = codecs.open(log_file, 'w', 'utf-8') # Load known variables A = MakeInfo() # start analysis analysePhoto(A, f, file_in=os.path.join(in_path, u'%s.csv' % 'photo')) analyseMulti(f, file_in=os.path.join(in_path, u'%s.csv' % 'multimedia')) analyseYear(f, file_in=os.path.join(in_path, u'%s.csv' % 'ausstellung')) analysePhotoAll(f, file_in=os.path.join(in_path, u'%s.csv' % 'photoAll')) print u'Created %s' % log_file
def test_format_person_empty_returns_empty(self): expected_name = '' expected_cat_meta = [] self.unset_kuenstler_params() self.assertEqual( MakeInfo.format_person(self.mock_info, '123', self.cat_meta), expected_name) self.assertItemsEqual(self.cat_meta, expected_cat_meta)
def test_format_person_matching_link(self): expected_name = '[[:sv:Some link|FirstName LastName]]' expected_cat_meta = [] self.mock_info.peopleLinkC = {'123': ':sv:Some link'} self.assertEqual( MakeInfo.format_person(self.mock_info, '123', self.cat_meta), expected_name) self.assertItemsEqual(self.cat_meta, expected_cat_meta)
def test_format_person_full_info(self): expected_name = 'FirstName LastName (Profession, BirthYear-DeathYear) ' \ 'City, Country' expected_cat_meta = [] self.assertEqual( MakeInfo.format_person(self.mock_info, '123', self.cat_meta), expected_name) self.assertItemsEqual(self.cat_meta, expected_cat_meta)
def test_format_person_no_place(self): expected_name = 'FirstName LastName (Profession, BirthYear-DeathYear)' expected_cat_meta = [] self.unset_kuenstler_params(['KudOrtS', 'KudLandS']) self.assertEqual( MakeInfo.format_person(self.mock_info, '123', self.cat_meta), expected_name) self.assertItemsEqual(self.cat_meta, expected_cat_meta)
def catTest(path, data_dir, connections_dir, filename_file, nameToPho=None): ''' check the category statistics for the files in a given directory ''' if not nameToPho: tree, nameToPho = makeHitlist(filename_file) # logfile flog = codecs.open(os.path.join(path, u'¤catStats.log'), 'w', 'utf-8') maker = MakeInfo() phoMull_list = [] for filename_in in os.listdir(path): base_name = os.path.splitext(filename_in)[0] if base_name not in nameToPho.keys(): continue phoMull_list.append(nameToPho[base_name]['phoMull']) maker.catTestBatch(phoMull_list, data_dir, connections_dir, outputPath=path, log=flog) flog.close()
def test_format_person_no_bracket(self): expected_name = 'FirstName LastName, City, Country' expected_cat_meta = [] self.unset_kuenstler_params( ['KudJahrVonL', 'KudJahrBisL', 'KueFunktionS']) self.assertEqual( MakeInfo.format_person(self.mock_info, '123', self.cat_meta), expected_name) self.assertItemsEqual(self.cat_meta, expected_cat_meta)
def test_format_person_matching_creator_not_creative(self): expected_name = 'FirstName LastName (Profession, BirthYear-DeathYear) ' \ 'City, Country' expected_cat_meta = [] self.mock_info.peopleCreatC = {'123': 'Creator:Some template'} self.assertEqual( MakeInfo.format_person(self.mock_info, '123', self.cat_meta), expected_name) self.assertItemsEqual(self.cat_meta, expected_cat_meta)
def test_format_person_no_place(self): expected_name = 'FirstName LastName (Profession, BirthYear-DeathYear)' expected_cat_meta = [] self.unset_kuenstler_params( ['KudOrtS', 'KudLandS']) self.assertEqual( MakeInfo.format_person(self.mock_info, '123', self.cat_meta), expected_name) self.assertItemsEqual(self.cat_meta, expected_cat_meta)
def test_format_person_matching_creator(self): expected_name = '{{Creator:Some template}}' expected_cat_meta = [] self.mock_info.peopleCreatC = {'123': 'Creator:Some template'} self.assertEqual( MakeInfo.format_person(self.mock_info, '123', self.cat_meta, creative=True), expected_name) self.assertItemsEqual(self.cat_meta, expected_cat_meta)
def test_make_gallery_reprint(self): """Ensure already outputted images are not re-outputted.""" expected_gallery = '' expected_printed = ['foo.jpg'] self.printed = ['foo.jpg'] files = ['foo.jpg', ] self.assertEqual( MakeInfo.make_gallery(self.title, files, self.printed), expected_gallery) self.assertItemsEqual(self.printed, expected_printed)
def test_make_gallery_single(self): expected_gallery = u'\n<gallery caption="galleryTitle">\n' \ u'File:foo.jpg\n' \ u'</gallery>' expected_printed = ['foo.jpg'] files = ['foo.jpg'] self.assertEqual( MakeInfo.make_gallery(self.title, files, self.printed), expected_gallery) self.assertItemsEqual(self.printed, expected_printed)
def test_make_gallery_duplicate(self): """Ensure internal duplicates are not outputted.""" expected_gallery = u'\n<gallery caption="galleryTitle">\n' \ u'File:foo.jpg\n' \ u'</gallery>' expected_printed = ['foo.jpg'] files = ['foo.jpg', 'foo.jpg'] self.assertEqual( MakeInfo.make_gallery(self.title, files, self.printed), expected_gallery) self.assertItemsEqual(self.printed, expected_printed)
def test_format_person_only_name(self): expected_name = 'FirstName LastName' expected_cat_meta = [] self.unset_kuenstler_params( ['KudJahrVonL', 'KudJahrBisL', 'KueFunktionS', 'KudOrtS', 'KudLandS']) self.assertEqual( MakeInfo.format_person(self.mock_info, '123', self.cat_meta), expected_name) self.assertItemsEqual(self.cat_meta, expected_cat_meta)
def test_format_person_only_name(self): expected_name = 'FirstName LastName' expected_cat_meta = [] self.unset_kuenstler_params([ 'KudJahrVonL', 'KudJahrBisL', 'KueFunktionS', 'KudOrtS', 'KudLandS' ]) self.assertEqual( MakeInfo.format_person(self.mock_info, '123', self.cat_meta), expected_name) self.assertItemsEqual(self.cat_meta, expected_cat_meta)
def test_format_person_trigger_cat_meta(self): expected_name = 'FirstName LastName (Profession, BirthYear-DeathYear) ' \ 'City, Country' expected_cat_meta = ['unmatched creator'] self.mock_info.peopleCreatC = {'123': ''} self.assertEqual( MakeInfo.format_person(self.mock_info, '123', self.cat_meta, creative=True), expected_name) self.assertItemsEqual(self.cat_meta, expected_cat_meta)
def test_make_gallery_multiple(self): expected_gallery = u'\n<gallery caption="galleryTitle">\n' \ u'File:foo1.jpg\n' \ u'File:foo2.jpg\n' \ u'File:foo3.jpg\n' \ u'</gallery>' expected_printed = ['foo1.jpg', 'foo2.jpg', 'foo3.jpg'] files = ['foo1.jpg', 'foo2.jpg', 'foo3.jpg'] self.assert_same_gallery_content( MakeInfo.make_gallery(self.title, files, self.printed), expected_gallery) self.assertItemsEqual(self.printed, expected_printed)
def test_make_gallery_reprint(self): """Ensure already outputted images are not re-outputted.""" expected_gallery = '' expected_printed = ['foo.jpg'] self.printed = ['foo.jpg'] files = [ 'foo.jpg', ] self.assertEqual( MakeInfo.make_gallery(self.title, files, self.printed), expected_gallery) self.assertItemsEqual(self.printed, expected_printed)
def test_format_person_creator_over_link(self): """Test that creator template is prioritised over link.""" expected_name = '{{Creator:Some template}}' expected_cat_meta = [] self.mock_info.peopleCreatC = {'123': 'Creator:Some template'} self.mock_info.peopleLinkC = {'123': ':sv:Some link'} self.assertEqual( MakeInfo.format_person(self.mock_info, '123', self.cat_meta, creative=True), expected_name) self.assertItemsEqual(self.cat_meta, expected_cat_meta)
def test_make_gallery_captions(self): expected_gallery = u'\n<gallery caption="galleryTitle">\n' \ u'File:foo1.jpg|The foo\n' \ u'File:foo2.jpg|The bar\n' \ u'</gallery>' expected_printed = ['foo1.jpg', 'foo2.jpg'] files = ['foo1.jpg', 'foo2.jpg'] captions = {u'foo1.jpg': u'The foo', u'foo2.jpg': u'The bar'} self.assert_same_gallery_content( MakeInfo.make_gallery(self.title, files, self.printed, captions=captions), expected_gallery) self.assertItemsEqual(self.printed, expected_printed)
def test_format_person_various_brackets(self): """Test various variations of bracket contents.""" expected_cat_meta = [] # all expected_name = '(Profession, BirthYear-DeathYear)' self.unset_kuenstler_params( ['KueVorNameS', 'KueNameS', 'KudOrtS', 'KudLandS']) self.assertEqual( MakeInfo.format_person(self.mock_info, '123', self.cat_meta), expected_name) # Profession only expected_name = '(Profession)' self.unset_kuenstler_params(['KudJahrVonL', 'KudJahrBisL']) self.assertEqual( MakeInfo.format_person(self.mock_info, '123', self.cat_meta), expected_name) # Year only expected_name = '(BirthYear-DeathYear)' self.unset_kuenstler_params(['KueFunktionS']) self.reset_kuenstler_params(['KudJahrVonL', 'KudJahrBisL']) self.assertEqual( MakeInfo.format_person(self.mock_info, '123', self.cat_meta), expected_name) # Birth year range only expected_name = '(BirthYear-)' self.unset_kuenstler_params(['KudJahrBisL']) self.assertEqual( MakeInfo.format_person(self.mock_info, '123', self.cat_meta), expected_name) # Death year range only expected_name = '(-DeathYear)' self.unset_kuenstler_params(['KudJahrVonL']) self.reset_kuenstler_params(['KudJahrBisL']) self.assertEqual( MakeInfo.format_person(self.mock_info, '123', self.cat_meta), expected_name) # Year range and profession only expected_name = '(Profession, -DeathYear)' self.reset_kuenstler_params(['KueFunktionS']) self.assertEqual( MakeInfo.format_person(self.mock_info, '123', self.cat_meta), expected_name) # Assert none of these added to cat_meta self.assertItemsEqual(self.cat_meta, expected_cat_meta)
def load_and_dump_LSH_info(batch, data_dir=None, connections_dir=None): """ Construct and return an info_blob for LSH data. :param batch: The category added to all files of the format "Category:Media contributed by LSH: <batch>". :param data_dir: override the default directory for data files :param connections_dir: override the default directory for connection files """ data_dir = data_dir or DATA_DIR connections_dir = connections_dir or CONNECTIONS_DIR base_meta_cat = 'Media contributed by LSH' maker = MakeInfo() maker.readInLibraries(folder=data_dir) maker.readConnections(folder=connections_dir) batch_cat = '{:s}: {:s}'.format(base_meta_cat, batch) data = maker.dump_info(base_meta_cat, use_commons_name=True) # Add batch category which was added at upload for orig_names, image in data.items(): image['meta_cats'].append(batch_cat) return data
def run(in_path=None, out_path=None, data_path=None): # set defaults unless overridden in_path = in_path or IN_PATH out_path = out_path or OUT_PATH data_path = data_path or DATA_PATH # Load all relevant files A = MakeInfo() A.readInLibraries(folder=data_path) A.readConnections(keepskip=True, folder=in_path) # read csv files from config f = codecs.open(CSV_CONFIG, 'r', 'utf-8') global CSV_FILES CSV_FILES = json.load(f) f.close() # Create a dict of depicted ObjId with frequency as value # This gets around the issue that objDaten (A.objD) also contains # objects used in photoAll oDict = {} for k, v in A.photoD.iteritems(): objIds = v[u'PhoObjId'] if objIds: objIds = objIds.split(';') for o in objIds: if o in oDict.keys(): oDict[o] += 1 else: oDict[o] = 1 # create new mapping landDict, ortDict, techDict = makePlaceAndMaterial(A, oDict) # ObjMultiple ord1Dict, ord2Dict, gruppDict = makeObjKeywords(A, oDict) # keywords = makeKeywords(A) # Stichworth photographers = makePhotographers(A) # photo exhibitPlaces = makeExhibitPlaces(A, oDict) # Ausstelung events = makeEvents(A, oDict) # Ereignis people = makePeople(A, oDict) # Kuenstler # combine with data from old mapping # need to handle unused mappings separately techDict = simpleCombine(A.materialC, techDict, addEmpty=True) landDict = simpleCombine(A.placesC, landDict) ortDict = simpleCombine(A.placesC, ortDict) exhibitPlaces = simpleCombine(A.placesC, exhibitPlaces) ord1Dict = simpleCombine(A.objCatC, ord1Dict) ord2Dict = simpleCombine(A.objCatC, ord2Dict) gruppDict = simpleCombine(A.objCatC, gruppDict) keywords = combineKeywords(A.stichC, keywords) photographers = combinePhotographers( A.photographerCreatC, A.photographerCatC, photographers) events = combineEvents(A.ereignisC, A.ereignisLinkC, events) people = combinePeople(A.peopleLinkC, A.peopleCreatC, A.peopleCatC, people) # the following are needed to preserve old but unused mappings emptyPlaces = simpleEmpty(A.placesC, [landDict, ortDict, exhibitPlaces]) emptyObjCats = simpleEmpty(A.objCatC, [ord1Dict, ord2Dict, gruppDict]) # output # create target if it doesn't exist if not os.path.isdir(out_path): os.mkdir(out_path) # several dicts per file writePlaces(os.path.join(out_path, u'Places.txt'), exhibitPlaces, landDict, ortDict, emptyPlaces) # Places writeObjKeywords(os.path.join(out_path, u'ObjKeywords.txt'), ord1Dict, ord2Dict, gruppDict, emptyObjCats) # ObjKeyword # one dict per file writeMaterials(os.path.join(out_path, u'Materials.txt'), techDict) # Materials writeKeywords(os.path.join(out_path, u'Keywords.txt'), keywords) # Keywords writeEvents(os.path.join(out_path, u'Events.txt'), events) # Events writePeople(os.path.join(out_path, u'People.txt'), people) # People writePhotographers(os.path.join(out_path, u'Photographers.txt'), photographers) # Photographers
def run(in_path=None, out_path=None, data_path=None): # set defaults unless overridden in_path = in_path or IN_PATH out_path = out_path or OUT_PATH data_path = data_path or DATA_PATH # Load all relevant files A = MakeInfo() A.readInLibraries(folder=data_path) A.readConnections(keepskip=True, folder=in_path) # read csv files from config f = codecs.open(CSV_CONFIG, 'r', 'utf-8') global CSV_FILES CSV_FILES = json.load(f) f.close() # Create a dict of depicted ObjId with frequency as value # This gets around the issue that objDaten (A.objD) also contains # objects used in photoAll oDict = {} for k, v in A.photoD.iteritems(): objIds = v[u'PhoObjId'] if objIds: objIds = objIds.split(';') for o in objIds: if o in oDict.keys(): oDict[o] += 1 else: oDict[o] = 1 # create new mapping landDict, ortDict, techDict = makePlaceAndMaterial(A, oDict) # ObjMultiple ord1Dict, ord2Dict, gruppDict = makeObjKeywords(A, oDict) # keywords = makeKeywords(A) # Stichworth photographers = makePhotographers(A) # photo exhibitPlaces = makeExhibitPlaces(A, oDict) # Ausstelung events = makeEvents(A, oDict) # Ereignis people = makePeople(A, oDict) # Kuenstler # combine with data from old mapping # need to handle unused mappings separately techDict = simpleCombine(A.materialC, techDict, addEmpty=True) landDict = simpleCombine(A.placesC, landDict) ortDict = simpleCombine(A.placesC, ortDict) exhibitPlaces = simpleCombine(A.placesC, exhibitPlaces) ord1Dict = simpleCombine(A.objCatC, ord1Dict) ord2Dict = simpleCombine(A.objCatC, ord2Dict) gruppDict = simpleCombine(A.objCatC, gruppDict) keywords = combineKeywords(A.stichC, keywords) photographers = combinePhotographers(A.photographerCreatC, A.photographerCatC, photographers) events = combineEvents(A.ereignisC, A.ereignisLinkC, events) people = combinePeople(A.peopleLinkC, A.peopleCreatC, A.peopleCatC, people) # the following are needed to preserve old but unused mappings emptyPlaces = simpleEmpty(A.placesC, [landDict, ortDict, exhibitPlaces]) emptyObjCats = simpleEmpty(A.objCatC, [ord1Dict, ord2Dict, gruppDict]) # output # create target if it doesn't exist if not os.path.isdir(out_path): os.mkdir(out_path) # several dicts per file writePlaces(os.path.join(out_path, u'Places.txt'), exhibitPlaces, landDict, ortDict, emptyPlaces) # Places writeObjKeywords(os.path.join(out_path, u'ObjKeywords.txt'), ord1Dict, ord2Dict, gruppDict, emptyObjCats) # ObjKeyword # one dict per file writeMaterials(os.path.join(out_path, u'Materials.txt'), techDict) # Materials writeKeywords(os.path.join(out_path, u'Keywords.txt'), keywords) # Keywords writeEvents(os.path.join(out_path, u'Events.txt'), events) # Events writePeople(os.path.join(out_path, u'People.txt'), people) # People writePhotographers(os.path.join(out_path, u'Photographers.txt'), photographers) # Photographers
def makeAndRename(path, batch_cat=None, data_dir=None, connections_dir=None, filename_file=None): """ Create info file and rename image file :param path: relative path to the directory in which to process files :param batch_cat: If given a category of the format Category:Media contributed by LSH: batchCat will be added to all files. :param data_dir: override the default directory for data files :param connections_dir: override the default directory for connection files :param filename_file: override the default filename file :return: None """ # set defaults unless overridden data_dir = data_dir or DATA_DIR connections_dir = connections_dir or CONNECTIONS_DIR filename_file = filename_file or FILENAMES # logfile logfile = os.path.join(path, u'¤generator.log') flog = codecs.open(logfile, 'a', 'utf-8') # require batchCat to be of some length if batch_cat is not None: batch_cat = batch_cat.strip() if not batch_cat: batch_cat = None else: batch_cat = u'[[Category:Media contributed by LSH: %s]]' \ % batch_cat tree, name_to_pho = makeHitlist(filename_file) # get category statistics catTest(path, data_dir, connections_dir, filename_file) # initialise maker maker = MakeInfo() maker.readInLibraries(folder=data_dir) maker.readConnections(folder=connections_dir) for filename_in in os.listdir(path): base_name = os.path.splitext(filename_in)[0] if filename_in.startswith(u'¤'): # log files continue elif base_name not in name_to_pho.keys(): flog.write(u'%s did not have a photoId\n' % filename_in) continue pho_mull = name_to_pho[base_name]['phoMull'] filename_out = u'%s.%s' % ( name_to_pho[base_name]['filename'].replace(u' ', u'_'), name_to_pho[base_name]['ext']) wName, out = maker.infoFromPhoto(pho_mull, preview=False, testing=False) # output if out: if batch_cat: out += batch_cat # Make info file info_file = u'%s.info' % os.path.splitext(filename_out)[0] helpers.open_and_write_file(os.path.join(path, info_file), out) # Move image file os.rename(os.path.join(path, filename_in), os.path.join(path, filename_out)) flog.write(u'%s outputed to %s\n' % (filename_in, filename_out)) else: flog.write(u'%s failed to make infopage. See log\n' % filename_in)
def test_make_gallery_empty(self): expected_gallery = '' expected_printed = [] self.assertEqual(MakeInfo.make_gallery(self.title, [], self.printed), expected_gallery) self.assertItemsEqual(self.printed, expected_printed)