Пример #1
0
def test():
    base_dir = os.path.join(os.getcwd(), "data")
    print "base_dir: {}".format(base_dir)

    #query_string = "s.q=europe&s.cmd%5B%5D=addFacetValueFilters%28IsFullText%2Ctrue%29&s.fq%5B%5D=SourceType%3A%28%22Library+Catalog%22%29&s.fvf%5B%5D=ContentType%2CNewspaper+Article%2Ct&s.fvf%5B%5D=ContentType%2CBook+Review%2Ct&s.fvf%5B%5D=ContentType%2CDissertation%2Ct"
    #query_string = "s.cmd=addFacetValueFilters(ContentType,Book+Review)&s.fvf%5B%5D=ContentType,Journal+Article,f&s.fvf%5B%5D=ContentType,Dissertation,t&s.fvf%5B%5D=IsFullText,true,f&s.fvf%5B%5D=ContentType,Newspaper+Article,t&s.q=europe"
    #query_string = "s.cmd=addFacetValueFilters(ContentType,Journal+Article)&s.fvf=IsFullText,true,f&s.q=europe+federal"
    #query_string = "s.fvf%5B%5D=ContentType%2CJournal+Article%2Cf&s.fvf%5B%5D=IsFullText%2Ctrue%2Cf&s.q=europe+federal&s.cmd=addFacetValueFilters(ContentType,Dissertation)%20removeFacetValueFilter(ContentType,Journal%20Article)"
    #query_string = "s.fvf%5B%5D=ContentType%2CConference+Proceeding%2Cf&s.fvf%5B%5D=ContentType%2CData+Set%2Cf&s.fvf%5B%5D=IsFullText%2Ctrue%2Cf&s.fvf%5B%5D=ContentType%2CNewspaper+Article%2Cf&s.fvf%5B%5D=ContentType%2CTrade+Publication+Article%2Cf&s.ps=50&s.q=europe+federal&s.cmd=removeFacetValueFilter(ContentType,Newspaper%20Article)%20removeFacetValueFilter(ContentType,Trade%20Publication%20Article)%20removeFacetValueFilter(ContentType,Data%20Set)"
    #query_string = "s.fvf%5B%5D=ContentType%2CConference+Proceeding%2Cf&s.fvf%5B%5D=IsFullText%2Ctrue%2Cf&s.ps=50&s.q=europe+federal&s.cmd=addFacetValueFilters(ContentType,Data+Set)"
    query_string = "s.q=Organized+Crime+and+States"
    # Thesis:
    #query_string = "s.cmd=addFacetValueFilters(ContentType,Dissertation)&s.q=social"
    # Video:
    #query_string = "s.q=social&s.fvf%5B%5D=ContentType%2CVideo+Recording%2Cf"
    # EditedBook
    #query_string = "s.q=ouvrage+collectif&s.cmd=addFacetValueFilters(ContentType,Book+%2F+eBook)"
    #query_string = "s.fvf%5B%5D=ContentType%2CConference+Proceeding%2Cf"
    summon_response = summon.summon_query(query_string)
    metajson_list = crosswalks_service.convert_json(
        summon_response, constants.FORMAT_SUMMONJSON, "summon", False)
    collection = metajson_service.create_collection("summon_test",
                                                    "Summon Test",
                                                    metajson_list)
    print jsonbson.dumps_json(collection, True)
def convert_metajson_list_in_one_file(metajson_list, output_format):
    if metajson_list:
        if output_format == constants.FORMAT_METAJSON or output_format == constants.FORMAT_HTML:
            yield metajson_service.create_collection(None, None, metajson_list)

        elif output_format == constants.FORMAT_MODS:
            yield mods_crosswalk.metajson_list_to_mods_xmletree(metajson_list)

        elif output_format == constants.FORMAT_METS:
            yield mets_crosswalk.metajson_list_to_mets_xmletree(metajson_list)

        elif output_format == constants.FORMAT_REPEC:
            yield repec_crosswalk.metajson_list_to_repec(metajson_list)

        elif output_format == constants.FORMAT_CSV_METAJSON:
            yield csv_crosswalk.metajson_list_to_csv_metajson(metajson_list)

        else:
            logging.error("convert_metajson_list ERROR Not managed format: {}".format(output_format))
Пример #3
0
def convert_metajson_list_in_one_file(metajson_list, output_format):
    if metajson_list:
        if output_format == constants.FORMAT_METAJSON or output_format == constants.FORMAT_HTML:
            yield metajson_service.create_collection(None, None, metajson_list)

        elif output_format == constants.FORMAT_MODS:
            yield mods_crosswalk.metajson_list_to_mods_xmletree(metajson_list)

        elif output_format == constants.FORMAT_METS:
            yield mets_crosswalk.metajson_list_to_mets_xmletree(metajson_list)

        elif output_format == constants.FORMAT_REPEC:
            yield repec_crosswalk.metajson_list_to_repec(metajson_list)

        elif output_format == constants.FORMAT_CSV_METAJSON:
            yield csv_crosswalk.metajson_list_to_csv_metajson(metajson_list)

        else:
            logging.error("convert_metajson_list ERROR Not managed format: {}".format(output_format))
def parse_and_convert_file_list(input_file_path_list, input_format, output_format, source, rec_id_prefix, only_first_record, all_in_one_file):
    """ Convert from a list of file path """
    results = []
    for input_file_path in input_file_path_list:
        file_results = parse_and_convert_file(input_file_path, input_format, output_format, source, rec_id_prefix, only_first_record, False)
        #logging.debug("parse_and_convert_file_list type(file_results): {}".format(type(file_results)))
        if file_results:
            results.extend(file_results)
    if results:
        if not all_in_one_file:
            return results

        else:
            if output_format == constants.FORMAT_METAJSON or output_format == constants.FORMAT_HTML:
                return metajson_service.create_collection(None, None, results)

            elif output_format == constants.FORMAT_MODS:
                return mods_crosswalk.create_mods_collection_xmletree(results)

            else:
                logging.error("parse_and_convert_file_list: ERROR Not managed format: {}".format(output_format))
Пример #5
0
def parse_and_convert_file_list(input_file_path_list, input_format, output_format, source, rec_id_prefix, only_first_record, all_in_one_file):
    """ Convert from a list of file path """
    results = []
    for input_file_path in input_file_path_list:
        file_results = parse_and_convert_file(input_file_path, input_format, output_format, source, rec_id_prefix, only_first_record, False)
        #logging.debug("parse_and_convert_file_list type(file_results): {}".format(type(file_results)))
        if file_results:
            results.extend(file_results)
    if results:
        if not all_in_one_file:
            return results

        else:
            if output_format == constants.FORMAT_METAJSON or output_format == constants.FORMAT_HTML:
                return metajson_service.create_collection(None, None, results)

            elif output_format == constants.FORMAT_MODS:
                return mods_crosswalk.create_mods_collection_xmletree(results)

            else:
                logging.error("parse_and_convert_file_list: ERROR Not managed format: {}".format(output_format))
Пример #6
0
def test():
    base_dir = os.path.join(os.getcwd(), "data")
    print "base_dir: {}".format(base_dir)

    #query_string = "s.q=europe&s.cmd%5B%5D=addFacetValueFilters%28IsFullText%2Ctrue%29&s.fq%5B%5D=SourceType%3A%28%22Library+Catalog%22%29&s.fvf%5B%5D=ContentType%2CNewspaper+Article%2Ct&s.fvf%5B%5D=ContentType%2CBook+Review%2Ct&s.fvf%5B%5D=ContentType%2CDissertation%2Ct"
    #query_string = "s.cmd=addFacetValueFilters(ContentType,Book+Review)&s.fvf%5B%5D=ContentType,Journal+Article,f&s.fvf%5B%5D=ContentType,Dissertation,t&s.fvf%5B%5D=IsFullText,true,f&s.fvf%5B%5D=ContentType,Newspaper+Article,t&s.q=europe"
    #query_string = "s.cmd=addFacetValueFilters(ContentType,Journal+Article)&s.fvf=IsFullText,true,f&s.q=europe+federal"
    #query_string = "s.fvf%5B%5D=ContentType%2CJournal+Article%2Cf&s.fvf%5B%5D=IsFullText%2Ctrue%2Cf&s.q=europe+federal&s.cmd=addFacetValueFilters(ContentType,Dissertation)%20removeFacetValueFilter(ContentType,Journal%20Article)"
    #query_string = "s.fvf%5B%5D=ContentType%2CConference+Proceeding%2Cf&s.fvf%5B%5D=ContentType%2CData+Set%2Cf&s.fvf%5B%5D=IsFullText%2Ctrue%2Cf&s.fvf%5B%5D=ContentType%2CNewspaper+Article%2Cf&s.fvf%5B%5D=ContentType%2CTrade+Publication+Article%2Cf&s.ps=50&s.q=europe+federal&s.cmd=removeFacetValueFilter(ContentType,Newspaper%20Article)%20removeFacetValueFilter(ContentType,Trade%20Publication%20Article)%20removeFacetValueFilter(ContentType,Data%20Set)"
    #query_string = "s.fvf%5B%5D=ContentType%2CConference+Proceeding%2Cf&s.fvf%5B%5D=IsFullText%2Ctrue%2Cf&s.ps=50&s.q=europe+federal&s.cmd=addFacetValueFilters(ContentType,Data+Set)"
    query_string = "s.q=Organized+Crime+and+States"
    # Thesis:
    #query_string = "s.cmd=addFacetValueFilters(ContentType,Dissertation)&s.q=social"
    # Video:
    #query_string = "s.q=social&s.fvf%5B%5D=ContentType%2CVideo+Recording%2Cf"
    # EditedBook
    #query_string = "s.q=ouvrage+collectif&s.cmd=addFacetValueFilters(ContentType,Book+%2F+eBook)"
    #query_string = "s.fvf%5B%5D=ContentType%2CConference+Proceeding%2Cf"
    summon_response = summon.summon_query(query_string)
    metajson_list = crosswalks_service.convert_json(summon_response, constants.FORMAT_SUMMONJSON, "summon", False)
    collection = metajson_service.create_collection("summon_test", "Summon Test", metajson_list)
    print jsonbson.dumps_json(collection, True)
Пример #7
0
def write_metajson_collection(col_id, col_title, items, output_file_path):
    if items:
        #logging.debug("write_metajson_collection type(items): {}".format(type(items)))
        collection = metajson_service.create_collection(col_id, col_title, items)
        write_json(collection, output_file_path)