def test(): base_dir = os.path.join(os.getcwd(), "data") print "base_dir: " + base_dir target_spire = Target() target_spire['identifier'] = 'spire' target_spire['title'] = 'Sciences Po Institutional Repository' target_spire['type'] = 'oaipmh' target_spire[ 'url'] = 'http://spire.sciences-po.fr/dissemination/oaipmh2-publications.xml' target_spire['metadata_prefix'] = 'mods' test_date_from = datetime(2012, 10, 1, 12, 30, 59, tzinfo=None) test_date_until = datetime(2013, 4, 30, 17, 50, 1, tzinfo=None) test_identifier_book = 'oai:spire.sciences-po.fr:2441/dambferfb7dfprc9m26c8c8o3' test_identifier_bookPart = 'oai:spire.sciences-po.fr:2441/eo6779thqgm5r489makgoai85' test_identifier_masterThesis = 'oai:spire.sciences-po.fr:2441/5l6uh8ogmqildh09h6m8hj429' test_identifier_doctoralThesis = 'oai:spire.sciences-po.fr:2441/3fm4jv3k2s99lms9jb5i5asil' test_identifier_professoralThesis = 'oai:spire.sciences-po.fr:2441/f4rshpf3v1umfa09lb0joe5g5' test_set = 'SHS:ART' #result = oaipmh_harvester.identifiy(target_spire) #dump_result(result) #result = oaipmh_harvester.list_metadata_formats(target_spire, test_identifier_01) #dump_result(result) #result = oaipmh_harvester.list_sets(target_spire) #dump_result(result) result = oaipmh_harvester.get_record(target_spire, identifier=test_identifier_bookPart) dump_result(result)
def test(): base_dir = os.path.join(os.getcwd(), "data") print "base_dir: " + base_dir target_spire = Target() target_spire['identifier'] = 'spire' target_spire['title'] = 'Sciences Po Institutional Repository' target_spire['type'] = 'oaipmh' target_spire['url'] = 'http://spire.sciences-po.fr/dissemination/oaipmh2-publications.xml' target_spire['metadata_prefix'] = 'mods' test_date_from = datetime(2012, 10, 1, 12, 30, 59, tzinfo=None) test_date_until = datetime(2013, 4, 30, 17, 50, 1, tzinfo=None) test_identifier_book = 'oai:spire.sciences-po.fr:2441/dambferfb7dfprc9m26c8c8o3' test_identifier_bookPart = 'oai:spire.sciences-po.fr:2441/eo6779thqgm5r489makgoai85' test_identifier_masterThesis = 'oai:spire.sciences-po.fr:2441/5l6uh8ogmqildh09h6m8hj429' test_identifier_doctoralThesis = 'oai:spire.sciences-po.fr:2441/3fm4jv3k2s99lms9jb5i5asil' test_identifier_professoralThesis = 'oai:spire.sciences-po.fr:2441/f4rshpf3v1umfa09lb0joe5g5' test_set = 'SHS:ART' #result = oaipmh_harvester.identifiy(target_spire) #dump_result(result) #result = oaipmh_harvester.list_metadata_formats(target_spire, test_identifier_01) #dump_result(result) #result = oaipmh_harvester.list_sets(target_spire) #dump_result(result) result = oaipmh_harvester.get_record(target_spire, identifier=test_identifier_bookPart) dump_result(result)
def harvest_by_ids(corpus, target, ids): logging.info("harvest_by_ids: {}".format(ids)) date_begin = datetime.datetime.now() # harvest metajson_list = [] for identifier in ids: metajson_list.append(oaipmh_harvester.get_record(target, identifier)) date_harvest = datetime.datetime.now() chrono.chrono_trace("harvest spire and convert to metajson", date_begin, date_harvest, len(ids)) # import result_import = corpus_service.import_metajson_list(corpus, metajson_list, True, None) date_import = datetime.datetime.now() chrono.chrono_trace("import", date_harvest, date_import, len(result_import))
def harvest_by_ids(corpus, target, ids): logging.info("harvest_by_ids: {}".format(ids)) date_begin = datetime.datetime.now() # harvest metajson_list = [] for identifier in ids: metajson_list.append(oaipmh_harvester.get_record(target, identifier)) date_harvest = datetime.datetime.now() chrono.chrono_trace("harvest spire and convert to metajson", date_begin, date_harvest, len(ids)) # import result_import = corpus_service.import_metajson_list( corpus, metajson_list, True, None) date_import = datetime.datetime.now() chrono.chrono_trace("import", date_harvest, date_import, len(result_import))