Пример #1
0
def mostrarSeccionCarga():
    conectarBd()
    uploaded_file = st.file_uploader("Archivo Bibtex con la información de los papers")
    before = len(Paper.objects)
    if uploaded_file is not None:
        # To read file as bytes:
        bytes_data = uploaded_file.read()
        data = bytes_data.decode("utf-8")
        bib_data = parse_string(data, 'bibtex')
        notdoi = []
        papers = []
        with st.spinner("Preprocesando el archivo para la carga..."):
            total = sum(1 for entry in bib_data.entries.values())
        st.success("Se iniciará la carga de "+str(total)+" papers a la base de datos.")
        my_bar = st.progress(.0)
        loaded = 0
        for entry in bib_data.entries.values():
            fields = entry.fields
            title = fields["title"].replace('{', '').replace('}', '')
            doi = fields.get("doi")
            isOnlyReference = False
            loaded+=1
            my_bar.progress(loaded/total)
            if doi is None:
                notdoi.append(title)
                continue
            abstract = fields.get("abstract","")
            paper = Paper(title = title, doi = doi , abstract = abstract, isOnlyReference = isOnlyReference).save()
            papers.append(paper)

        after = len(Paper.objects)
        st.success("Se ingresaron "+ str(after-before) + " papers a la base de datos")
        st.write([x.title for x in papers])
        if len(notdoi):
            st.error ("No se pudo ingresar " + str(len(notdoi)) + " debido a que no se conocía su doi")
            st.write(notdoi)
def automatizarCarga(papers):
    papers_before = len(Paper.objects)
    authors_before = len(Author.objects)
    inst_before = len(Institution.objects)
    fin_inst_before = len(Finantial_Institution.objects)
    total = len(papers)
    inicio_msg = st.success(
        "Se iniciará la carga de información bibliográfica de **" +
        str(total) + "** papers.")
    my_bar = st.progress(.0)
    loaded = 0
    exitos = []
    fallos = []

    for paper in papers:
        with open('mensajes.txt', 'a') as f:
            try:
                test = get_entity(paper.doi, EntityType.PUBLICATION,
                                  OutputType.JSON)
                exitos.append(test['title'][0])
                #st.write(test['title'][0])
            except:
                fallos.append(paper.title)
                f.write('Fallo la Busqueda del Paper \n')
            if test['created']['date-parts'][0][1] is not None:
                paper.publication_month = test['created']['date-parts'][0][1]
                paper.save()
            if test['created']['date-parts'][0][0] is not None:
                paper.publication_year = test['created']['date-parts'][0][0]

            #Guardando Autores y autores affiliation
            if 'author' in test:
                affiliation_list = []
                for autor in test['author']:
                    try:
                        name_author = autor['given'] + ' ' + autor['family']
                    except:
                        continue
                    try:
                        author_ = Author.objects.get(name=name_author)
                    except Author.MultipleObjectsReturned:
                        f.write("Hubo un problema con el Autor: " +
                                name_author + "\n")
                        continue
                    except Author.DoesNotExist:
                        if 'ORCID' in autor and 'authenticated-orcid' in autor:
                            author_ = Author(orcid=autor['ORCID'],
                                             authenticated_orcid=autor[
                                                 'authenticated-orcid'],
                                             name=name_author,
                                             familyName=autor['family'],
                                             firstName=autor['given']).save()
                            f.write("Se guardó el Autor: " + name_author +
                                    "\n")
                        else:
                            author_ = Author(name=name_author,
                                             familyName=autor['family'],
                                             firstName=autor['given']).save()
                            f.write("Se guardó el Autor: " + name_author +
                                    "\n")

                    if autor['affiliation'] != []:
                        for institucion in autor['affiliation']:
                            try:
                                institution_ = Institution.objects.get(
                                    name=institucion['name'])
                            except Institution.MultipleObjectsReturned:
                                f.write(
                                    "Hubo un problema con la Institución : " +
                                    institucion['name'] + "\n")
                                continue
                            except Institution.DoesNotExist:
                                f.write("Se guardó la Institución : " +
                                        institucion['name'] + "\n")
                                institution_ = Institution(
                                    name=institucion['name']).save()
                            affiliation = Author_Affiliation(
                                institution=institution_,
                                author=author_,
                                sequence=autor['sequence'])
                            affiliation_list.append(affiliation)
                    else:
                        affiliation = Author_Affiliation(
                            author=author_, sequence=autor['sequence'])
                        affiliation_list.append(affiliation)

                paper.author_affiliations = affiliation_list

            #Obteniendo Finantial_Institution
            funder_list = []
            if 'funder' in test:
                for funder_ in test['funder']:
                    try:
                        if 'DOI' in funder_:
                            funder = Finantial_Institution.objects.get(
                                doi=funder_['DOI'])
                        else:
                            funder = Finantial_Institution.objects.get(
                                name=funder_['name'])
                    except Finantial_Institution.MultipleObjectsReturned:
                        f.write(
                            "Hubo un problema con la institución financiera: "
                            + funder_['name'] + "\n")
                        continue
                    except Finantial_Institution.DoesNotExist:
                        if 'DOI' in funder_:
                            funder = Finantial_Institution(
                                doi=funder_['DOI'],
                                name=funder_['name']).save()
                            f.write("Se guardó la Institución : " +
                                    funder_['name'] + "\n")
                        else:
                            funder = Finantial_Institution(
                                name=funder_['name']).save()
                            f.write("Se guardó la Institución : " +
                                    funder_['name'] + "\n")
                    funder_list.append(funder)
                paper.finantial_institutions = funder_list
            else:
                f.write("El paper " + paper.doi +
                        " no fue financiado por ninguna institución. \n")

            #Obteniendo reference
            reference_list = []
            if 'reference' in test:
                for reference_ in test['reference']:
                    try:
                        if 'DOI' in reference_:
                            paper_ref = Paper.objects.get(
                                doi=reference_['DOI'])
                        else:
                            continue
                    except Paper.MultipleObjectsReturned:
                        f.write(
                            "Hubo un problema con el paper de referencia: " +
                            reference_['DOI'] + "\n")
                        continue
                    except Paper.DoesNotExist:
                        if 'article-title' in reference_ and 'DOI' in reference_:
                            paper_ref = Paper(
                                doi=reference_['DOI'],
                                title=reference_['article-title'],
                                abstract=' ',
                                isOnlyReference=True).save()
                            f.write("Se guardó el paper de referencia: " +
                                    reference_['DOI'] + "\n")
                        else:
                            f.write("Paper de referencia no tiene title: " +
                                    "\n")
                    reference_list.append(reference_['DOI'])
            else:
                f.write("Paper de referencia no tiene DOI: " + "\n")

            paper.references = reference_list
            paper.bibliographyIsLoaded = True
            paper.save()
            f.close()
        loaded += 1
        my_bar.progress(loaded / total)
    my_bar.empty()
    inicio_msg.empty()
    papers_after = len(Paper.objects)
    authors_after = len(Author.objects)
    inst_after = len(Institution.objects)
    fin_inst_after = len(Finantial_Institution.objects)
    exitosDF = pd.DataFrame()
    exitosDF["Título"] = pd.Series(exitos)
    fallosDF = pd.DataFrame()
    fallosDF["Título"] = pd.Series(fallos)
    st.success("Se procesaron exitosamente **" + str(len(exitos)) +
               "** papers")
    st.success("Se ingresaron **" + str(papers_after - papers_before) +
               " nuevas referencias** a la base de datos")
    st.success("Se ingresaron **" + str(authors_after - authors_before) +
               " nuevos autores** a la base de datos")
    st.success("Se ingresaron **" + str(inst_after - inst_before) +
               " nuevas instituciones** a la base de datos")
    st.success("Se ingresaron **" + str(fin_inst_after - fin_inst_before) +
               " nuevas instituciones financiadoras** a la base de datos")
    st.dataframe(exitosDF)
    st.error("La API CrossRef no tenía registros de **" + str(len(fallos)) +
             "** papers")
    st.dataframe(fallosDF)
def crearPaper():
    badflag = False
    if not os.path.exists("affiliations_def"):
        error = InputError("Faltan los autores!!")
        st.sidebar.write(error)
        badflag = True
    if not os.path.exists("publication"):
        error = InputError("Faltan los datos de publicación!!")
        st.sidebar.write(error)
        badflag = True
    if not os.path.exists("financement"):
        error = InputError("Faltan los datos de financiamiento!!")
        st.sidebar.write(error)
        badflag = True
    if badflag: return False
    with open('publication', 'r') as fp:
        pub_data = json.load(fp)
    finan_data = pd.read_csv('financement')
    aff_data = pd.read_csv('affiliations')

    #manejo de instituciones financieras
    finantial_institutions_list = []
    for index, row in finan_data.iterrows():
        try:
            f_inst = Finantial_Institution.objects.get(name=row["Name"])
            finantial_institutions_list.append(f_inst)
        except Finantial_Institution.MultipleObjectsReturned:
            st.write("Hubo un problema con la institución: " + row["Name"])
            continue
        except Finantial_Institution.DoesNotExist:
            st.write("Se guardó la institución: " + row["Name"])
            #fin_ins = Finantial_Institution(name = row["Name"], country = row["Country"]).save()
            #finantial_institutions_list.append(fin_ins)

    #manejo de afiliaciones
    affiliations = []
    for index, row in aff_data.iterrows():
        #autor
        try:
            author = Author.objects.get(name=row["Autor"])
        except Author.MultipleObjectsReturned:
            st.write("Hubo un problema con el autor: " + row["Autor"])
            continue
        except Author.DoesNotExist:
            st.write("Se guardó al autor " + row["Autor"] + " con id " +
                     str(row["Scopus_id"]))
            author = Author(name=row["Autor"],
                            scopusID=str(row["Scopus_id"])).save()
        #institucion
        try:
            institution = Institution.objects.get(name=row["Institución"])
        except Institution.MultipleObjectsReturned:
            st.write("Hubo un problema con la institución: " +
                     row["Institución"])
            continue
        except Institution.DoesNotExist:
            st.write("Se guardó la institución " + row["Institución"])
            institution = Institution(name=row["Institución"]).save()
        aff = Author_Affiliation(institution=institution, author=author)
        affiliations.append(aff)
    #crea el paper
    new_paper = Paper(title=pub_data["Titulo"],
                      abstract=pub_data["Abstract"],
                      doi=pub_data["DOI"],
                      keywords=pub_data["Keywords"],
                      publication_month=pub_data["Mes_Publicacion"],
                      publication_year=pub_data["Año_Publicacion"],
                      finantial_institutions=finantial_institutions_list,
                      author_affiliations=affiliations).save()
    with open('paper', 'w') as fp:
        json.dump(new_paper.to_json(), fp)
    st.sidebar.success("¡Se han guardado los datos de este paper! ")
    st.sidebar.json(new_paper.to_json())
    return True