Exemplo n.º 1
0
def iso2tables_old(master, entry_filename, rec_format, id_traitement):
    with open(entry_filename, 'rb') as fh:
        collection = mc.MARCReader(fh)
        collection.force_utf8 = True
        try:
            for record in collection:
                record2listemetas(record, rec_format)
        except mc.exceptions.RecordLengthInvalid as err:
            print("\n\n/*---------------------------------------------*\n\n")
            print(main.errors["pb_input_utf8"])
            print(err)
            print("\n\n*------------------------------------------------*/")
            main.popup_errors(
                master, main.errors["pb_input_utf8_marcEdit"],
                "Aide en ligne : conversion iso2709 > XML",
                "https://github.com/Transition-bibliographique/bibliostratus/wiki/1-%5BBleu%5D-Pr%C3%A9parer-ses-donn%C3%A9es-pour-l'alignement-%C3%A0-partir-d'un-export-catalogue#un-probl%C3%A8me-dencodage--passez-en-xml-avec-marcedit"
            )
        except UnicodeDecodeError as err:
            print("\n\n/*---------------------------------------------*\n\n")
            print(main.errors["pb_input_utf8"])
            print(err)
            print("\n\n*------------------------------------------------*/")
            main.popup_errors(
                master, main.errors["pb_input_utf8_marcEdit"],
                "Aide en ligne : conversion iso2709 > XML",
                "https://github.com/Transition-bibliographique/bibliostratus/wiki/1-%5BBleu%5D-Pr%C3%A9parer-ses-donn%C3%A9es-pour-l'alignement-%C3%A0-partir-d'un-export-catalogue#un-probl%C3%A8me-dencodage--passez-en-xml-avec-marcedit"
            )
Exemplo n.º 2
0
def align_from_bib(form, entry_filename, liste_reports, parametres):
    """Alignement de ses données d'autorité avec les autorités BnF à partir d'une extraction de sa base bibliographique (métadonnées BIB + Nom, prénom et dates de l'auteur)"""
    header_columns = [
        "NumNot", "nbARK", "ark AUT trouvé", "ark BIB initial",
        "frbnf BIB initial", "Titre", "ISNI", "Nom", "Complément nom",
        "dates Auteur"
    ]
    if (parametres['meta_bnf'] == 1):
        header_columns.extend(
            ["[BnF] Nom", "[BnF] Complément Nom", "[BnF] Dates"])
    if (parametres['file_nb'] == 1):
        row2file(header_columns, liste_reports)
    elif (parametres['file_nb '] == 2):
        row2files(header_columns, liste_reports)
    n = 0
    with open(entry_filename, newline='\n', encoding="utf-8") as csvfile:
        entry_file = csv.reader(csvfile, delimiter='\t')
        if (parametres['headers']):
            try:
                next(entry_file)
            except UnicodeDecodeError:
                main.popup_errors(
                    form, main.errors["pb_input_utf8"],
                    "Comment modifier l'encodage du fichier",
                    "https://github.com/Transition-bibliographique/bibliostratus/wiki/2-%5BBlanc%5D-:-alignement-des-donn%C3%A9es-bibliographiques-avec-la-BnF#erreur-dencodage-dans-le-fichier-en-entr%C3%A9e"
                )
        for row in entry_file:
            align_from_bib_item(row, n, form, parametres, liste_reports)
            n += 1
Exemplo n.º 3
0
def iso2tables(master, entry_filename, rec_format, id_traitement):
    input_file = open(entry_filename, 'r',
                      encoding="utf-8").read().split(u'\u001D')[0:-1]
    temp_list = [el + u'\u001D' for el in input_file]
    for rec in temp_list:
        outputfilename = "temp_record.txt"
        outputfile = open(outputfilename, "w", encoding="utf-8")
        outputfile.write(rec)
        outputfile.close()
        with open(outputfilename, 'rb') as fh:
            collection = mc.MARCReader(fh)
            collection.force_utf8 = True
            try:
                for record in collection:
                    print(record2meta(record, ["001"]))
                    record2listemetas(record, rec_format)
            except mc.exceptions.RecordLengthInvalid as err:
                NumNot = record2meta(record, ["001"])
                liste_notices_pb_encodage.append(NumNot)
                pass
            except UnicodeDecodeError as err:
                NumNot = record2meta(record, ["001"])
                liste_notices_pb_encodage.append(NumNot)
                pass
    try:
        os.remove("temp_record.txt")
    except FileNotFoundError as err:
        main.popup_errors(master, main.errors["format_fichier_en_entree"])
Exemplo n.º 4
0
def launch(entry_filename,
           file_format,
           rec_format,
           output_ID,
           master=None,
           form=None):
    """Lancement du programme après validation
    du formulaire de conversion d 'un fichier MARC en tableaux"""
    if entry_filename == []:
        main.popup_errors(form, "Merci d'indiquer un nom de fichier en entrée")
        raise
    else:
        entry_filename = entry_filename[0]
    try:
        [entry_filename, file_format, rec_format, output_ID] = [
            str(entry_filename),
            int(file_format),
            int(rec_format),
            str(output_ID)
        ]
    except ValueError as err:
        print("\n\nDonnées en entrée erronées\n")
        print(err)
    main.check_file_name(form, entry_filename)
    # popup_en_cours = main.message_programme_en_cours(form)

    # Notices BIB : Type de document / type de notice
    if (rec_format == 1):
        for doct in doctype:
            for recordt in recordtype:
                dcrec = doct + recordt
                doct_libelle = doct
                if (doct in doctype):
                    doct_libelle = doctype[doct]
                recordt_libelle = recordt
                if (recordt in recordtype):
                    recordt_libelle = recordtype[recordt]
                dcrec_libelles = "-".join([doct_libelle, recordt_libelle])
                doc_record_type[dcrec] = dcrec_libelles
    # Notices AUT : type d'autorité
    else:
        doct = "c"
        for recordt in recordtypeAUT:
            dcrec = doct + recordt
            doct_libelle = doct
            if (doct in doctypeAUT):
                doct_libelle = doctypeAUT[doct]
            recordt_libelle = recordt
            if (recordt in recordtypeAUT):
                recordt_libelle = recordtypeAUT[recordt]
            dcrec_libelles = "-".join([doct_libelle, recordt_libelle])
            doc_record_type[dcrec] = dcrec_libelles
    print("Fichier en entrée : ", entry_filename)
    if (file_format == 1 or file_format == 2):
        iso2tables(master, entry_filename, file_format, rec_format, output_ID)
    else:
        xml2tables(master, entry_filename, rec_format, output_ID)
    end_of_treatments(form, output_ID)
Exemplo n.º 5
0
def check_nb_colonnes(row, parametres, frame_master):
    """
    Vérifie s'il y a bien dans le fichier
    le nombre de colonnes indiquées dans le formulaire
    """
    nbcol = len(row)

    if parametres["correct_record_option"] != nbcol:
        alert = f"""Erreur dans les paramètres en entrée :
Nombre de colonnes dans le fichier : {nbcol}
Nombre de colonnes indiqué : {parametres["correct_record_option"]}"""
        main.popup_errors(frame_master, alert)
Exemplo n.º 6
0
def iso2tables(master, entry_filename, id_traitement):
    with open(entry_filename, 'rb') as fh:
        collection = mc.MARCReader(fh)
        collection.force_utf8 = True
        try:
            for record in collection:
                record2listemetas(record)
        except mc.exceptions.RecordLengthInvalid:
            print("\n\n/*---------------------------------------------*\n\n")
            print(main.errors["pb_input_utf8"])
            print("\n\n*------------------------------------------------*/")
            main.popup_errors(master, main.errors["pb_input_utf8"])
Exemplo n.º 7
0
def test_encoding_file(master, entry_filename, encoding):
    test = True
    input_file = ""
    try:
        input_file = open(entry_filename, 'r',
                          encoding=encoding).read().split(u'\u001D')[0:-1]
    except ValueError as err:
        if ("base 10" in str(err)):
            print(
                "Encodage UTF-8 BOM -> convertir le fichier en UTF8 sans BOM")
    except UnicodeDecodeError:
        main.popup_errors(master, main.errors["format_fichier_en_entree"])
    return (test, input_file)
Exemplo n.º 8
0
def test_encoding_file(master, entry_filename, encoding, file_format):
    test = True
    input_file = ""
    if (file_format == 1):
        file = open(entry_filename, "rb").read()
        if (len(file[0:3].decode(encoding)) == 1):
            file = file[3:]
            entry_filename = "temp_file_sans_bom.txt"
        temp_file = open("temp_file_sans_bom.txt", "wb")
        temp_file.write(file)
        temp_file.close()
    try:
        input_file = open(entry_filename, 'r',
                          encoding=encoding).read().split(u'\u001D')[0:-1]
    except UnicodeDecodeError:
        main.popup_errors(master, main.errors["format_fichier_en_entree"])
    try:
        os.remove("temp_file_sans_bom.txt")
    except FileNotFoundError:
        print("Fichier temporaire UTF8-sans BOM inutile")
    return (test, input_file)
Exemplo n.º 9
0
def launch(form, entry_filename, headers, input_data_type, isni_option,
           file_nb, id_traitement, meta_bnf):
    #main.check_file_name(entry_filename)
    #results2file(nb_fichiers_a_produire)
    parametres = {
        "headers": headers,
        "input_data_type": input_data_type,
        "isni_option": isni_option,
        "file_nb": file_nb,
        "meta_bnf": meta_bnf,
        "id_traitement": id_traitement
    }
    liste_reports = create_reports(id_traitement, file_nb)

    if (input_data_type == 1):
        align_from_aut(form, entry_filename, liste_reports, parametres)
    elif (input_data_type == 2):
        align_from_bib(form, entry_filename, liste_reports, parametres)
    else:
        main.popup_errors("Format en entrée non défini")
    bib2ark.fin_traitements(form, liste_reports, nb_notices_nb_ARK)
Exemplo n.º 10
0
def launch(filename,
           type_records_form,
           correct_record_option,
           headers,
           AUTlieesAUT,
           AUTlieesSUB,
           AUTlieesWORK,
           outputID,
           format_records=1,
           format_file=1,
           xml_encoding_option="utf-8",
           select_fields="",
           master=None,
           form=None):
    if filename == []:
        main.popup_errors(form, "Merci d'indiquer un nom de fichier en entrée")
        raise
    else:
        filename = filename[0]
    try:
        [
            filename, type_records_form, correct_record_option, headers,
            AUTlieesAUT, AUTlieesSUB, AUTlieesWORK, outputID, format_records,
            format_file, xml_encoding_option, select_fields
        ] = [
            str(filename),
            int(type_records_form),
            int(correct_record_option),
            int(headers),
            int(AUTlieesAUT),
            int(AUTlieesSUB),
            int(AUTlieesWORK),
            str(outputID),
            int(format_records),
            int(format_file),
            str(xml_encoding_option),
            str(select_fields)
        ]
    except ValueError as err:
        print("\n\nDonnées en entrée erronées\n")
        print(err)

    AUTliees = AUTlieesAUT + AUTlieesSUB + AUTlieesWORK
    format_BIB = dict_format_records[format_records]
    outputID = funcs.id_traitement2path(outputID)
    type_records = "bib"
    if (type_records_form == 2):
        type_records = "aut"
    parametres = {
        "headers": headers,
        "type_records": type_records,
        "correct_record_option": correct_record_option,
        "type_records_form": type_records_form,
        "AUTliees": AUTliees,
        "AUTlieesAUT": AUTlieesAUT,
        "AUTlieesSUB": AUTlieesSUB,
        "AUTlieesWORK": AUTlieesWORK,
        "outputID": outputID,
        "format_records": format_records,
        "format_file": format_file,
        "xml_encoding_option": xml_encoding_option,
        "format_BIB": format_BIB,
        "select_fields": select_fields,
        "listeARK_BIB": [],
        "listeNNA_AUT": []
    }
    files = {}
    main.generic_input_controls(master, filename)
    bib_file = file_create(type_records, parametres)
    files["bib_file"] = bib_file
    if (parametres["AUTliees"] > 0):
        aut_file = file_create("aut", parametres)
        files["aut_file"] = aut_file
    file2extract(filename, parametres, files, master, form)
    file_fin(files["bib_file"], format_file)
    if (AUTliees == 1):
        file_fin(files['aut_file'], format_file)
    fin_traitements(form, outputID)
Exemplo n.º 11
0
def launch(filename,
           type_records_form,
           correct_record_option,
           headers,
           AUTlieesAUT,
           AUTlieesSUB,
           AUTlieesWORK,
           outputID,
           format_records=1,
           format_file=1,
           xml_encoding_option="utf-8",
           select_fields="",
           master=None,
           form=None):
    if filename == []:
        main.popup_errors(form, "Merci d'indiquer un nom de fichier en entrée")
        raise
    else:
        filename = filename[0]
    try:
        [
            filename, type_records_form, correct_record_option, headers,
            AUTlieesAUT, AUTlieesSUB, AUTlieesWORK, outputID, format_records,
            format_file, xml_encoding_option, select_fields
        ] = [
            str(filename),
            int(type_records_form),
            int(correct_record_option),
            int(headers),
            int(AUTlieesAUT),
            int(AUTlieesSUB),
            int(AUTlieesWORK),
            str(outputID),
            int(format_records),
            int(format_file),
            str(xml_encoding_option),
            str(select_fields)
        ]
    except ValueError as err:
        print("\n\nDonnées en entrée erronées\n")
        print(err)

    AUTliees = AUTlieesAUT + AUTlieesSUB + AUTlieesWORK
    format_BIB = dict_format_records[format_records]
    outputID = funcs.id_traitement2path(outputID)
    type_records = "bib"
    if (type_records_form == 2):
        type_records = "aut"
    parametres = {
        "type_records": type_records,
        "correct_record_option": correct_record_option,
        "type_records_form": type_records_form,
        "AUTliees": AUTliees,
        "AUTlieesAUT": AUTlieesAUT,
        "AUTlieesSUB": AUTlieesSUB,
        "AUTlieesWORK": AUTlieesWORK,
        "outputID": outputID,
        "format_records": format_records,
        "format_file": format_file,
        "xml_encoding_option": xml_encoding_option,
        "format_BIB": format_BIB,
        "select_fields": select_fields,
        "listeARK_BIB": [],
        "listeNNA_AUT": []
    }
    main.generic_input_controls(master, filename)

    bib_file = file_create(type_records, parametres)
    parametres["bib_file"] = bib_file
    if (parametres["AUTliees"] > 0):
        aut_file = file_create("aut", parametres)
        parametres["aut_file"] = aut_file
    with open(filename, newline='\n', encoding="utf-8") as csvfile:
        entry_file = csv.reader(csvfile, delimiter='\t')
        if headers:
            next(entry_file, None)
        j = 0
        for row in entry_file:
            if j == 0:
                check_nb_colonnes(row, parametres, master)
            extract1record(row, j, form, headers, parametres)
            j = j + 1

        file_fin(bib_file, format_file)
        if (AUTliees == 1):
            file_fin(aut_file, format_file)
    fin_traitements(form, outputID)