Пример #1
0
def main_resps():
    in_file = Path("data/resps-perturbed-tides.txt")
    bibs = parse_citations(in_file)

    db = BibDatabase()


    db.entries = [
        bib.to_bibtex() for bib in bibs
    ]

    id_to_count = defaultdict(lambda : 0)
    for entry in db.entries:
        id_to_count[entry["ID"]] += 1

    for the_id, count in id_to_count.items():
        if count > 1:
            for entry in [e for e in db.entries if e["ID"] == the_id]:
                count -= 1
                entry["ID"] += ascii_lowercase[count]
        



    writer = BibTexWriter()
    writer.indent = "    "
    with Path("data/resps-tides-perturbed-refs.bib").open("wb") as ref_file:
        ref_file.write(writer.write(db).encode())
Пример #2
0
    def test_store_organism_publication(self):
        """Tests - store organism publication."""
        test_organism = Organism.objects.create(genus="Mus", species="musculus")

        db2 = BibDatabase()
        db2.entries = [
            {
                "journal": "Nice Journal",
                "comments": "A comment",
                "pages": "12--23",
                "month": "jan",
                "abstract": "This is an abstract. This line should be "
                "long enough to test multilines...",
                "title": "An amazing title",
                "year": "2013",
                "doi": "10.1186/s12864-016-2535-300002",
                "volume": "12",
                "ID": "Teste2018",
                "author": "Foo, b. and Foo1, b. and Foo b.",
                "keyword": "keyword1, keyword2",
                "ENTRYTYPE": "article",
            }
        ]
        for entry in db2.entries:
            bibtest = PublicationLoader()
            bibtest.store_bibtex_entry(entry)

        OrganismLoader().store_organism_publication(
            organism="Mus musculus", doi="10.1186/s12864-016-2535-300002"
        )
        test_organismpub = OrganismPub.objects.get(organism=test_organism)
        self.assertEqual("An amazing title", test_organismpub.pub.title)
Пример #3
0
def entries_to_str(entries):
    """
    Pass a list of bibtexparser entries and return a bibtex formatted string.
    """
    db = BibDatabase()
    db.entries = entries
    return bibtexparser.dumps(db)
Пример #4
0
def work_to_bibtex(work, name=None, acronym=False, rules=None):
    """Convert work to bibtex text

    Doctest:

    .. doctest::

        >>> reload()
        >>> murta2014a = work_by_varname("murta2014a")
        >>> print(work_to_bibtex(murta2014a))
        @inproceedings{murta2014a,
          address = {Cologne, Germany},
          author = {Murta, Leonardo and Braganholo, Vanessa and Chirigati, Fernando and Koop, David and Freire, Juliana},
          booktitle = {International Provenance and Annotation Workshop},
          pages = {71--83},
          publisher = {Springer},
          title = {no{W}orkflow: capturing and analyzing provenance of scripts},
          year = {2014}
        }
        <BLANKLINE>
        <BLANKLINE>

        Custom name:

        >>> reload()
        >>> murta2014a = work_by_varname("murta2014a")
        >>> print(work_to_bibtex(murta2014a, name="other"))
        @inproceedings{other,
          address = {Cologne, Germany},
          author = {Murta, Leonardo and Braganholo, Vanessa and Chirigati, Fernando and Koop, David and Freire, Juliana},
          booktitle = {International Provenance and Annotation Workshop},
          pages = {71--83},
          publisher = {Springer},
          title = {no{W}orkflow: capturing and analyzing provenance of scripts},
          year = {2014}
        }
        <BLANKLINE>
        <BLANKLINE>

        Use acronym for place name:

        >>> print(work_to_bibtex(murta2014a, acronym=True))
        @inproceedings{murta2014a,
          address = {Cologne, Germany},
          author = {Murta, Leonardo and Braganholo, Vanessa and Chirigati, Fernando and Koop, David and Freire, Juliana},
          booktitle = {IPAW},
          pages = {71--83},
          publisher = {Springer},
          title = {no{W}orkflow: capturing and analyzing provenance of scripts},
          year = {2014}
        }
        <BLANKLINE>
        <BLANKLINE>
    """
    result = work_to_bibtex_entry(work, name=name, acronym=acronym, rules=rules)
    db = BibDatabase()
    db.entries = [result]
    writer = BibTexWriter()
    writer.indent = "  "
    return writer.write(db)
 def test_multiple_string_write(self):
     bib_database = BibDatabase()
     bib_database.strings['name1'] = 'value1'
     bib_database.strings['name2'] = 'value2'  # Order is important!
     result = bibtexparser.dumps(bib_database)
     expected = '@string{name1 = "value1"}\n\n@string{name2 = "value2"}\n\n'
     self.assertEqual(result, expected)
Пример #6
0
    def pull(self):
        user_agents = [
            'Mozilla/5.0 (Windows NT 6.1; WOW64)',
            'AppleWebKit/537.36 (KHTML, like Gecko)',
            'Chrome/35.0.1916.114 Safari/537.36'
        ]

        headers = {'User-Agent': " ".join(user_agents)}
        within = 'owners%%2Eowner%%3DHOSTED'
        sort = '%%5Fscore'
        export_format = 'bibtex'
        url_template = 'https://dl.acm.org/exportformats_search.cfm?query=%s&within=%s&srt=%s&expformat=%s'

        result = BibDatabase()

        for query in self.queries:
            url = url_template % (query, within, sort, export_format)
            response = requests.get(url, cookies=self.cookies, headers=headers)
            self.cookies.update(response.cookies)
            bibtex_parser = BibTexParser(customization=convert_to_unicode)

            result.get_entry_list().append(
                bibtex_parser.parse(response.text).get_entry_list())

        return result
Пример #7
0
def merge_folder_tree(folder, use_backup):
    """
    Merge bib files from the current subtree into a master bib file at the root.
    This function updates the 'file' link of each entry with the relative path
    to each subfolder that has been processed.

    Args:
        folder (str): relative or absolute path of the folder to process.

    Returns:
        Nothing, but creates a file named `master.bib` in the given folder.
    """
    db = BibDatabase()
    for subdir, _dirs, _files in os.walk(os.path.abspath(folder)):
        if os.path.exists(os.path.join(subdir, '.nobib')):
            continue  # Skip blacklisted folders
        reldir = os.path.relpath(subdir, os.path.abspath(folder))
        bib_path = os.path.join(subdir, 'biblio.bib')
        subdb = utils.read_bib_file(bib_path)
        for entry in subdb.entries:
            filename = utils.decode_filename_field(entry['file'])
            filename = os.path.join(reldir, filename)
            entry['file'] = utils.encode_filename_field(filename)
        db.entries += subdb.entries
    # Remove duplicated entries
    entries_dict = db.entries_dict
    db.entries = [val for key, val in entries_dict.items()]
    # Write result
    bib_path = os.path.join(folder, 'master.bib')
    utils.write_with_backup(bib_path, utils.write_bib(db, order=True), use_backup)
Пример #8
0
def convert_ieee_xplore_json_to_bibtex_db(json_string):

    result = BibDatabase()
    for ieee_article in json.loads(json_string)['articles']:
        new_entry = dict()

        new_entry['ID'] = ieee_article['article_number']
        new_entry['ENTRYTYPE'] = parse_ieee_json_type(ieee_article)

        new_entry['abstract'] = parse_ieee_abstract(ieee_article)
        new_entry['title'] = ieee_article['title']

        if 'authors' in ieee_article:
            new_entry['author'] = parse_ieee_json_authors(ieee_article)

        new_entry['keywords'] = parse_ieee_json_keywords(ieee_article)

        new_entry['url'] = ieee_article['pdf_url']

        if new_entry['ENTRYTYPE'] == 'article':
            parse_extra_bibtex_article_fields(ieee_article, new_entry)

        elif new_entry['ENTRYTYPE'] == 'inproceedings':
            new_entry['booktitle'] = ieee_article['publication_title']

        article_date = parse_ieee_json_date(ieee_article)
        new_entry['year'] = article_date.strftime("%Y")
        new_entry['month'] = article_date.strftime("%B")

        new_entry['pages'] = ("%s-%s") % (ieee_article['start_page'],
                                          ieee_article['end_page'])

        result.get_entry_list().append(new_entry)

    return result
Пример #9
0
def metaDictToBib(jobid, metadict, omit_keys, path_prefix):
    """Export meta data to bibtex format

    Args:
        jobid (int): id of job.
        metadict (DocMeta): meta dict of a doc.
        alt_dict (dict): dict for key changes.
        omit_keys (list): keys to omit in the converted dict.
        path_prefix (str): folder path to prepend to attachment file paths.

    Returns:
        rec (int): 0 if successful, 1 otherwise.
        jobid (int): the input jobid as it is.
        dbtext (str): formated bibtex entry, '' if <rec>==1.
        docid (int): id of the processed document.
    """

    try:
        alt_dict = INV_ALT_KEYS
        ord_dict = toOrdinaryDict(metadict, alt_dict, omit_keys, path_prefix)

        db = BibDatabase()
        db.entries = [
            ord_dict,
        ]
        writer = BibTexWriter()
        writer.indent = '    '
        writer.comma_first = False
        dbtext = writer.write(db)

        return 0, jobid, dbtext, metadict['id']

    except Exception:
        LOGGER.exception('Failed to write to bibtex')
        return 1, jobid, '', metadict['id']
Пример #10
0
def save_tacl_bib(txt_file, year, volume):
    def name(n):
        t = n.split()
        return t[-1] + ', ' + ' '.join(t[:-1]) if len(t) > 1 else n.strip()

    entries = []
    d = None
    for i, line in enumerate(open(txt_file)):
        line = line.strip()
        j = i % 3
        if j == 0:
            authors = ' and '.join([name(n)
                                    for n in line[9:].split(';')]).strip()
            d = {
                'ID': line[:8],
                'ENTRYTYPE': 'article',
                'author': authors,
                'journal':
                'Transactions of the Association for Computational Linguistics',
                'year': str(year),
                'volume': str(volume)
            }
        elif j == 1:
            d['title'] = line
            entries.append(d)

    db = BibDatabase()
    db.entries = entries
    writer = BibTexWriter()
    with open(txt_file + '.bib', 'w') as bout:
        bout.write(writer.write(db))
Пример #11
0
def format_paper_citation_dict(citation, indent='  '):
    """
    Format a citation dict for a paper or a list of papers into a BibTeX
    record string.

    :param citation: A ``Paper`` citation dict or list of such dicts.
    :param indent: Indentation to be used in BibTeX output.
    """
    if isinstance(citation, dict):
        entries = [citation]
    else:
        entries = citation

    # Handle conflicting ids for entries
    entries_ids = collections.defaultdict(lambda: 0)
    for entry in entries:
        entry_id = entry['ID']
        entries_ids[entry_id] += 1
        if entries_ids[entry_id] > 1:
            entry['ID'] = '%s_%s' % (entry_id, entries_ids[entry_id])

    writer = BibTexWriter()
    writer.indent = indent
    with io.StringIO('') as bibfile:
        db = BibDatabase()
        db.entries = entries
        bibfile.write(writer.write(db))
        return bibfile.getvalue().strip()
Пример #12
0
def generate_bib_from_arxiv(arxiv_item, value, field="id"):
    # arxiv_cat = arxiv_item.arxiv_primary_category["term"]
    if field == "ti":
        journal = "arxiv:" + arxiv_item["id"].split("http://arxiv.org/abs/")[1]
    else:
        journal = "arxiv:" + value

    url = arxiv_item["pdfUrl"]
    title = arxiv_item["title"]
    authors = arxiv_item["authors"]
    if len(authors) > 0:
        first_author = authors[0].split(" ")
        authors = " and ".join([author for author in authors])
    else:
        first_author = authors
        authors = authors

    published = arxiv_item["published"].split("-")
    year = ''
    if len(published) > 1:
        year = published[0]
    bib = BibDatabase()
    bib.entries = [{
        "journal": journal,
        "url": url,
        "ID": year + first_author[0] + journal,
        "title": title,
        "year": year,
        "author": authors,
        "ENTRYTYPE": "article"
    }]
    bib = BibTexWriter().write(bib)
    return bib
Пример #13
0
def merge_folder_tree(folder, use_backup):
    """
    Merge bib files from the current subtree into a master bib file at the root.
    This function updates the 'file' link of each entry with the relative path
    to each subfolder that has been processed.

    Args:
        folder (str): relative or absolute path of the folder to process.

    Returns:
        Nothing, but creates a file named `master.bib` in the given folder.
    """
    db = BibDatabase()
    for subdir, _dirs, _files in os.walk(os.path.abspath(folder)):
        if os.path.exists(os.path.join(subdir, '.nobib')):
            continue  # Skip blacklisted folders
        reldir = os.path.relpath(subdir, os.path.abspath(folder))
        bib_path = os.path.join(subdir, 'biblio.bib')
        subdb = utils.read_bib_file(bib_path)
        for entry in subdb.entries:
            filename = utils.decode_filename_field(entry['file'])
            filename = os.path.join(reldir, filename)
            entry['file'] = utils.encode_filename_field(filename)
        db.entries += subdb.entries
    # Remove duplicated entries
    entries_dict = db.entries_dict
    db.entries = [val for key, val in entries_dict.items()]
    # Write result
    bib_path = os.path.join(folder, 'master.bib')
    utils.write_with_backup(bib_path, utils.write_bib(db, order=True),
                            use_backup)
Пример #14
0
def export(data, file_bool):
    """
        Convert the data to json or bibtex and write to a temporary file.

    :param data: The data containing the bibliographical details
    :param file_bool: True for the file to be exported as json False for bibtex format
    :return: the filename and the mimetype

    """
    data_ = [k for i, j in data.items() if j for k in j]

    for i in data_:
        try:
            if 'timestamp' in i:
                i['timestamp'] = i['timestamp'].strftime('%Y/%m/%d')
        except AttributeError:
            pass
        for k in i:
            i[k] = str(i[k])

    path = Path(os.path.abspath(os.path.dirname(__file__))).parent
    if file_bool:
        mimetype = 'application/json'
        with tempfile.NamedTemporaryFile(dir=path, delete=False, suffix='.json') as temp:
            temp.write(bytes(json.dumps(data_), encoding='utf-8'))
    else:
        mimetype = 'application/x-bibtex'
        bib_db = BibDatabase()
        bib_db.entries = data_
        bibtex_str = bibtexparser.dumps(bib_db)
        with tempfile.NamedTemporaryFile(dir=path, delete=False, suffix='.bib') as temp:
            temp.write(bytes(bibtex_str, encoding='utf-8'))

    return temp.name.split('\\')[-1], mimetype
Пример #15
0
def save_citation(citation_record):
    cite_anchor = citation_record.find('a', {'class': 'gs_nph', 'href': '#', "role": "button"})
    if not cite_anchor or not cite_anchor['onclick']:
        logging.warn("No Cite anchor for citation: %s" % citation_record)
        return
    citation_id = cite_anchor['onclick'].split(',')[1][1:-1]
    logging.info("Getting formated cite from citation id: " + citation_id)
    params = {"q": "info:%s:scholar.google.com/" % citation_id, "output": "cite"}
    soup = create_soup_by_url("https://scholar.google.com/scholar", params)
    bib_anchor = soup.find('a', {"class": "gs_citi"})
    if not bib_anchor:
        logging.debug("BibTex page soup is: %s" % soup.getText())
        logging.warn("No BibTex citation provided for citation: %s" % citation_id)
        return
    soup = create_soup_by_url(bib_anchor['href'])
    global citation_num
    citation_num += 1
    # Adding a tag to the bib entry about google scholar citation ID
    citation_entry = bibtexparser.loads(soup.getText()).entries[0]
    citationID = citation_entry['ID'] # e.g., melville2004review
    citation_entry["gscholar_id"] = citation_id
    db_entry=[]
    db_entry.append(citation_entry)
    db = BibDatabase()
    db.entries = db_entry
    g_bib_entry = bibtexparser.dumps(db)
    bib_entry = "%% [%d]\n%s" % (citation_num, g_bib_entry)
    logging.info(bib_entry.strip())
    with open(opts.citation_name, "a+") as f:
        f.write(bib_entry.encode('utf-8'))
    if opts.should_download:
        pdf_div = citation_record.find('div', {"class": "gs_ggs gs_fl"})
        if pdf_div:
            download_pdf(pdf_div.a['href'], citationID)
Пример #16
0
    def make_bibtex_entries(self, meta_data):
        """
        Converts the metadata from crossref.org into a formated bibliography
        entry.  This is returned.  It also saves the entry into two
        bibliographic repositiories.  Temp.bib is overwritten with each run of
        this function.  Repository.bib accumulates over time.  The idea is to
        automatically build a bibliography reference as papers get imported.

        :param meta_data: Raw metadata from Crossref.org
        :return: formatted bibliography
        """
        #TODO:base location of temp.bib and repository.bib on destination path

        if meta_data == None:
            return None
        # print(meta_data)
        db = BibDatabase()
        db.entries = meta_data
        # print(db.entries)
        writer = BibTexWriter()
        with open("../Scratch/temp.bib", 'w', encoding='utf-8') as bibfile:
            bibfile.write(meta_data)
        with open("../Scratch/repository.bib", 'a',
                  encoding='utf-8') as bibfile:
            bibfile.write(meta_data)
        with open("../Scratch/temp.bib", encoding='utf-8') as bibtexfile:
            bib_database = bibtexparser.load(bibtexfile)

        return bib_database
Пример #17
0
def entries_to_file(entries, fn):
    writer = BibTexWriter()

    db = BibDatabase()
    db.entries = entries
    with codecs.open(fn, 'w', "utf-8") as bibtex_file:
        bibtex_file.write(writer.write(db))
Пример #18
0
def extract_citation(entry):
    entry = copy.deepcopy(entry)
    del entry['type']
    single_entry_db = BibDatabase()
    single_entry_db.entries = [entry]
    writer = BibTexWriter()
    return bibtexparser.dumps(single_entry_db, writer).strip()
Пример #19
0
 def exif_pdf(self, filename):
     fields = ["Author", "Year",  "Journal", "Title", "Publisher",
                    "Page", "Address", "Annote", "Booktitle", "Chapter",
                    "Crossred", "Edition", "Editor", "HowPublished",
                    "Institution", "Month", "Note", "Number",
                    "Organization", "Pages", "School",
                    "Series", "Type", "Url", "Volume", "Doi", "File"]
     op=pexif.get_json(filename)
     try:
         new_op = {
             field: str(value) for field in fields
             for key, value in op[0].items() if field.lower() in key.lower()
         }
         if 'Author' not in new_op:
             new_op['Author'] = 'Unknown'
         id_auth=new_op["Author"].split()[-1]
         id_tit = (new_op["Title"].split()[:2])
         id_tit.append(id_auth)
         id_val = "_".join(id_tit)
         new_op["ID"] = str(id_val)
         new_op["ENTRYTYPE"] = "article"
         op[0] = new_op
         db = BibDatabase()
         db.entries = op
         writer =  BibTexWriter()
         pdf_buff = (writer.write(db))
         self.create_textview(pdf_buff)
     except:
         self.Messages.on_error_clicked("Can't extract data from this pdf file", "Try other methods")
Пример #20
0
    def load_and_replace(bibtex_file):
        with open(os.path.join('publications', bibtex_file),
                  'r',
                  encoding="utf-8") as f:
            fdata = f.read()
            pdict = BibTexParser(fdata).get_entry_dict()
            plist = BibTexParser(fdata, bc.author).get_entry_list()
        by_year = {}

        for pub in plist:
            pubd = pdict[pub['ID']]
            db = BibDatabase()
            db.entries = [pubd]
            writer = BibTexWriter()
            writer.indent = '\t'
            bibentry = writer.write(db)
            pub['BIB_ENTRY'] = bibentry
            for field in pub:
                if field == 'BIB_ENTRY':
                    continue
                pub[field] = context.make_replacements(pub[field])
            pub['author'] = _format_author_list(pub['author'])
            y = int(pub['year']) if 'year' in pub else 1970
            if y not in by_year:
                by_year[y] = []
            by_year[y].append(pub)

        ret = []
        for year, pubs in sorted(by_year.items(), reverse=True):
            for pub in pubs:
                ret.append(pub)

        return ret
 def test_multiple_string_write(self):
     bib_database = BibDatabase()
     bib_database.strings['name1'] = 'value1'
     bib_database.strings['name2'] = 'value2'  # Order is important!
     result = bibtexparser.dumps(bib_database)
     expected = '@string{name1 = "value1"}\n\n@string{name2 = "value2"}\n\n'
     self.assertEqual(result, expected)
Пример #22
0
    def html_citation(self, label):
        bibdb = BibDatabase()
        bibdb.entries = [self.bib_dict[label]]
        bibtex_str = bibtexparser.dumps(bibdb)
        from subprocess import Popen, PIPE
        import shlex
        f = tempfile.NamedTemporaryFile(suffix=".bib", delete=False)
        f.write(bibtex_str)
        f.close()
        system_str = 'bibtex2html -nokeys -o - -s plain -nodoc -q {fname}'.format(
            fname=f.name)
        args = shlex.split(system_str)
        proc = Popen(args, stdout=PIPE, stderr=PIPE)
        out, err = proc.communicate()
        exitcode = proc.returncode
        f.close()
        os.remove(f.name)
        result = out.replace(
            """<!-- This document was automatically generated with bibtex2html 1.98
(see http://www.lri.fr/~filliatr/bibtex2html/),
with the following command:
bibtex2html -nokeys -o - -s plain -nodoc -q temp.bib  -->




""", '').replace(
                """<hr><p><em>This file was generated by
<a href="http://www.lri.fr/~filliatr/bibtex2html/">bibtex2html</a> 1.98.</em></p>""",
                '')
        #print out, err, exitcode
        htmlstr = '<li name="{name}">{citation}</li>\n'.format(name=label,
                                                               citation=result)
        return htmlstr
Пример #23
0
def format_paper_citation_dict(citation, indent='  '):
    """
    Format a citation dict for a paper or a list of papers into a BibTeX
    record string.

    :param citation: A ``Paper`` citation dict or list of such dicts.
    :param indent: Indentation to be used in BibTeX output.
    """
    if isinstance(citation, dict):
        entries = [citation]
    else:
        entries = citation

    # Handle conflicting ids for entries
    entries_ids = collections.defaultdict(lambda: 0)
    for entry in entries:
        entry_id = entry['ID']
        entries_ids[entry_id] += 1
        if entries_ids[entry_id] > 1:
            entry['ID'] = '%s_%s' % (entry_id, entries_ids[entry_id])

    writer = BibTexWriter()
    writer.indent = indent
    with io.StringIO('') as bibfile:
        db = BibDatabase()
        db.entries = entries
        bibfile.write(writer.write(db))
        return bibfile.getvalue().strip()
Пример #24
0
def parse_bibtex(input_path):
    feedly = pd.read_csv('dump.tsv', sep='\t', encoding='ISO-8859-1')
    with open(input_path, encoding="ISO-8859-1") as bibtex_file:
        bib_database = bibtexparser.load(bibtex_file)

    not_found = []
    feedly_titles = [
        f.upper().strip().replace('-', '') for f in feedly.title.tolist()
    ]
    new_bibtex = []
    db = BibDatabase()
    db.entries = []

    for e in bib_database.entries:
        match_phrase = e['title'].upper().strip().replace('-', '')
        if match_phrase in feedly_titles:
            db.entries.append(e)
            continue

        for t in feedly_titles:
            max_fuzz = 0
            fuzz_ratio = fuzz.ratio(match_phrase, t)
            if fuzz_ratio > max_fuzz:
                max_fuzz = fuzz_ratio
            if fuzz_ratio > 80:
                db.entries.append(e)
                break
        else:
            print(str(max_fuzz) + ': ' + e['title'] + '\n')
            not_found.append(e['title'])
    print('{} entries removed'.format(len(not_found)))

    return (db)
Пример #25
0
    def export_to_bibtex_one_file(self, path: str = "all.bib"):
        """stores publications in bibtex format in one file

        Parameters
        ----------
        path : optional
            path where the resulting file should be stored, by default "all.bib"

        Raises
        ------
        KeyError
            if the type of publication and the handle are not specified
        """
        self._create_dir(path)
        for pub in self._dep_pubs:
            meta = pub.get_bibtex_representation()
            if not meta:
                print("This pub has no meta")
            else:
                if meta["type"] and meta["handle"]:
                    handle = meta.pop("handle")
                    pub_type = meta.pop("type")
                    db = BibDatabase()
                    db.entries = [meta.copy()]
                    db.entries[0].update({"ID": handle, "ENTRYTYPE": pub_type})
                    writer = BibTexWriter()
                    tot = []
                    for key in meta:
                        tot.append(key)
                    writer.display_order = tot
                    with open(path, "a") as bibfile:
                        bibfile.write(writer.write(db))
                else:
                    raise KeyError("the type of publication and metdata"
                                   + "are required")
Пример #26
0
def merge(entry1, entry2):
    db = BibDatabase()
    entries = {}
    keys1 = entry1.keys()
    keys2 = entry2.keys()
    intersection = intersect(keys1, keys2)
    union = get_union(keys1, keys2)
    not_intersect = not_intersection(union, intersection)

    #The two entries have the same keys, so everything needs to be merged
    if not not_intersect:
        for key in keys1:
            if key == 'author':
                author = merge_author(entry1[key], entry1['author_norm'], entry2[key], entry2['author_norm'])
                author_norm = normalize_author(str(author))
                entries = add_field(entries, key, author)
                entries = add_field(entries, 'author_norm', author_norm)
            if key == 'editor':
                editor = merge_author(entry1[key], entry1['editor_norm'], entry2[key], entry2['editor_norm'])
                editor_norm = normalize_author(str(editor))
                entries = add_field(entries, key, editor)
                entries = add_field(entries, 'editor_norm', editor_norm)
            elif key == 'keywords' or key == 'topics':
                entries = add_field(entries, key, merge_keywords(entry1[key], entry2[key]))
            elif key == 'month':
                entries = add_field(entries, key, entry1[key])
            elif len(entry1[key]) == len(entry2[key]) or len(entry1[key]) < len(entry2[key]):
                entries = add_field(entries, key, entry2[key])
            else:
                entries = add_field(entries, key, entry1[key])
    else:
        #All the keys in the two entries aren't the same, so some need to be merged
        #some can just be written
        #print "Entries are not the same!"
        #print keys1, keys2
        for key in intersection:
            if key == 'author':
                author = merge_author(entry1[key], entry1['author_norm'], entry2[key], entry2['author_norm'])
                entries = add_field(entries, key, author)
            if key == 'editor':
                editor = merge_author(entry1[key], entry1['editor_norm'], entry2[key], entry2['editor_norm'])
                entries = add_field(entries, key, editor)
            elif key == 'keywords' or key == 'topics':
                entries = add_field(entries, key, merge_keywords(entry1[key], entry2[key]))
            elif key == 'month':
                entries = add_field(entries, key, entry1[key])
            elif key == 'doi':
                entries = add_field(entries, get_keycount(intersection, key), entry1[key])
            elif len(entry1[key]) == len(entry2[key]) or len(entry1[key]) < len(entry2[key]):
                entries = add_field(entries, key, entry2[key])
            else:
                entries = add_field(entries, key, entry1[key])
        for key in not_intersect:
            if key in keys1:
                entries = add_field(entries, key, entry1[key])
            elif key in keys2:
                entries = add_field(entries, key, entry2[key])
    
    db.entries = [entries]
    return db
Пример #27
0
    def test_store_biopython_seq_record_DOI(self):
        """Tests - __init__ and store_biopython_seq_record with DOI."""
        # DOI TESTING
        db2 = BibDatabase()
        db2.entries = [{
            "journal": "Nice Journal",
            "comments": "A comment",
            "pages": "12--23",
            "month": "jan",
            "abstract": "This is an abstract. This line should be "
            "long enough to test multilines...",
            "title": "An amazing title",
            "year": "2013",
            "doi": "10.1186/s12864-016-2535-300002",
            "volume": "12",
            "ID": "Teste2018",
            "author": "Foo, b. and Foo1, b. and Foo b.",
            "keyword": "keyword1, keyword2",
            "ENTRYTYPE": "article",
        }]
        for entry in db2.entries:
            bibtest3 = PublicationLoader()
            bibtest3.store_bibtex_entry(entry)
        test_bibtex3 = Pub.objects.get(uniquename="Teste2018")
        test_bibtex3_pubdbxref = PubDbxref.objects.get(pub=test_bibtex3)
        test_bibtex3_dbxref = Dbxref.objects.get(
            dbxref_id=test_bibtex3_pubdbxref.dbxref_id)
        self.assertEqual("10.1186/s12864-016-2535-300002",
                         test_bibtex3_dbxref.accession)

        Organism.objects.create(genus="Mus", species="musculus")
        test_seq_file_pub = SequenceLoader(
            filename="sequence_doi.fasta",
            doi="10.1186/s12864-016-2535-300002")
        test_seq_obj_pub = SeqRecord(Seq("acgtgtgtgcatgctagatcgatgcatgca"),
                                     id="chr2",
                                     description="chromosome 2")
        test_seq_file_pub.store_biopython_seq_record(test_seq_obj_pub,
                                                     "assembly",
                                                     "Mus musculus")

        test_feature_doi = Feature.objects.get(name="chromosome 2")

        self.assertEqual("chr2", test_feature_doi.uniquename)
        test_feature_pub_doi = FeaturePub.objects.get(
            pub_id=test_bibtex3.pub_id)
        test_pub_dbxref_doi = PubDbxref.objects.get(
            pub_id=test_feature_pub_doi.pub_id)
        test_dbxref_doi = Dbxref.objects.get(
            dbxref_id=test_pub_dbxref_doi.dbxref_id)
        self.assertEqual("10.1186/s12864-016-2535-300002",
                         test_dbxref_doi.accession)
        # test remove_file
        self.assertTrue(
            Dbxrefprop.objects.filter(value="sequence_doi.fasta").exists())
        call_command("remove_file", "--name=sequence_doi.fasta",
                     "--verbosity=0")
        self.assertFalse(
            Dbxrefprop.objects.filter(value="sequence_doi.fasta").exists())
 def test_write_common_strings(self):
     bib_database = BibDatabase()
     bib_database.load_common_strings()
     writer = BibTexWriter(write_common_strings=True)
     result = bibtexparser.dumps(bib_database, writer=writer)
     with io.open('bibtexparser/tests/data/common_strings.bib') as f:
         expected = f.read()
     self.assertEqual(result, expected)
Пример #29
0
    def pull(self):
        result = BibDatabase()
        database = self.source_pipe.pull()

        for entry in database.get_entry_list():
            if self.accept(entry):
                result.get_entry_list().append(entry)
        return result
Пример #30
0
def test_bibexport():
    a = bb.entries_dict['Yttri:Urban']
    db = BibDatabase()
    db.entries = [a]
    writer = BibTexWriter()
    with open('testoutbib.bib', 'w') as bibfile:
        bibfile.write(writer.write(db))
    sys.exit()
Пример #31
0
def getBibtexStrFromAbstractDict(abstractDict):
    abstractDict.pop('url')
    abstractDict.pop('journal')
    db = BibDatabase()
    writer = BibTexWriter()
    writer.indent = '    '
    db.entries = [abstractDict]
    return writer.write(db)
 def test_write_dependent_strings(self):
     bib_database = BibDatabase()
     bib_database.strings['title'] = 'Mr'
     expr = BibDataStringExpression([BibDataString(bib_database, 'title'), 'Smith'])
     bib_database.strings['name'] = expr
     result = bibtexparser.dumps(bib_database)
     expected = '@string{title = {Mr}}\n\n@string{name = title # {Smith}}\n\n'
     self.assertEqual(result, expected)
 def test_write_common_strings(self):
     bib_database = BibDatabase()
     bib_database.load_common_strings()
     writer = BibTexWriter(write_common_strings=True)
     result = bibtexparser.dumps(bib_database, writer=writer)
     with io.open('bibtexparser/tests/data/common_strings.bib') as f:
         expected = f.read()
     self.assertEqual(result, expected)
Пример #34
0
    def __init__(self,
                 data=None,
                 customization=None,
                 ignore_nonstandard_types=True,
                 homogenize_fields=False,
                 interpolate_strings=True,
                 common_strings=False,
                 string_provider=None):
        """
        Creates a parser for rading BibTeX files

        :return: parser
        :rtype: `BibTexParser`
        """
        self.bib_database = BibDatabase(string_provider)

        #: Load common strings such as months abbreviation
        #: Default: `False`.
        self.common_strings = common_strings
        if self.common_strings:
            self.bib_database.load_common_strings()

        #: Callback function to process BibTeX entries after parsing,
        #: for example to create a list from a string with multiple values.
        #: By default all BibTeX values are treated as simple strings.
        #: Default: `None`.
        self.customization = customization

        #: Ignore non-standard BibTeX types (`book`, `article`, etc).
        #: Default: `True`.
        self.ignore_nonstandard_types = ignore_nonstandard_types

        #: Sanitize BibTeX field names, for example change `url` to `link` etc.
        #: Field names are always converted to lowercase names.
        #: Default: `False`.
        self.homogenize_fields = homogenize_fields

        #: Interpolate Bibtex Strings or keep the structure
        self.interpolate_strings = interpolate_strings

        # On some sample data files, the character encoding detection simply
        # hangs We are going to default to utf8, and mandate it.
        self.encoding = 'utf8'

        # pre-defined set of key changes
        self.alt_dict = {
            'keyw': u'keyword',
            'keywords': u'keyword',
            'authors': u'author',
            'editors': u'editor',
            'urls': u'url',
            'link': u'url',
            'links': u'url',
            'subjects': u'subject'
        }

        # Setup the parser expression
        self._init_expressions()
 def test_write_dependent_strings(self):
     bib_database = BibDatabase()
     bib_database.strings['title'] = 'Mr'
     expr = BibDataStringExpression(
         [BibDataString(bib_database, 'title'), 'Smith'])
     bib_database.strings['name'] = expr
     result = bibtexparser.dumps(bib_database)
     expected = '@string{title = {Mr}}\n\n@string{name = title # {Smith}}\n\n'
     self.assertEqual(result, expected)
Пример #36
0
def get_bibdatabase():
    """
    Create an empty BibDatabase
    """

    bib_database = BibDatabase()
    bib_database.entries = []

    return bib_database
Пример #37
0
def write_bib_file(list_of_cited_entries, output_fname):
    db = BibDatabase()
    db.entries = list_of_cited_entries
    writer = BibTexWriter()

    with open(output_fname, 'w') as bibtex_file:
        bibtex_file.write(writer.write(db))

    print('Output written to ' + output_fname)
Пример #38
0
    def bibtex(self) -> str:
        """Contains the publication as a bibtex entry

        Returns:
            str -- a bibtex entry
        """
        a = BibDatabase()
        a.entries = [self.bib]
        return bibtexparser.dumps(a)
Пример #39
0
    def test_align(self):
        bib_database = BibDatabase()
        bib_database.entries = [{'ID': 'abc123',
                                 'ENTRYTYPE': 'book',
                                 'author': 'test',
                                 'thisisaverylongkey': 'longvalue'}]
        writer = BibTexWriter()
        writer.align_values = True
        result = bibtexparser.dumps(bib_database, writer)
        expected = \
"""@book{abc123,
 author             = {test},
 thisisaverylongkey = {longvalue}
}

"""
        self.assertEqual(result, expected)

        with open('bibtexparser/tests/data/multiple_entries_and_comments.bib') as bibtex_file:
            bib_database = bibtexparser.load(bibtex_file)
        writer = BibTexWriter()
        writer.contents = ['entries']
        writer.align_values = True
        result = bibtexparser.dumps(bib_database, writer)
        expected = \
"""@book{Toto3000,
 author    = {Toto, A and Titi, B},
 title     = {A title}
}

@article{Wigner1938,
 author    = {Wigner, E.},
 doi       = {10.1039/TF9383400029},
 issn      = {0014-7672},
 journal   = {Trans. Faraday Soc.},
 owner     = {fr},
 pages     = {29--41},
 publisher = {The Royal Society of Chemistry},
 title     = {The transition state method},
 volume    = {34},
 year      = {1938}
}

@book{Yablon2005,
 author    = {Yablon, A.D.},
 publisher = {Springer},
 title     = {Optical fiber fusion slicing},
 year      = {2005}
}

"""
        self.assertEqual(result, expected)
Пример #40
0
Файл: gui.py Проект: Juvawa/abi
def write_selected_to_file(selected):
    db = BibDatabase()
    result = []
    for item in selected:
        path = str(bib_dir) + str(files[item])
        with open(path, 'r') as f:
            db = bibtexparser.load(f)
            result.append(db.entries[0])
    db.entries = result
    print db.entries
    with open(website_dir, 'w') as f:
        bibtexparser.dump(db, f)

    subprocess.call(['bib2html', '-f', website_dir])
Пример #41
0
    def test_entry_separator(self):
        bib_database = BibDatabase()
        bib_database.entries = [{'ID': 'abc123',
                                 'ENTRYTYPE': 'book',
                                 'author': 'test'}]
        writer = BibTexWriter()
        writer.entry_separator = ''
        result = bibtexparser.dumps(bib_database, writer)
        expected = \
"""@book{abc123,
 author = {test}
}
"""
        self.assertEqual(result, expected)
Пример #42
0
 def test_sort_missing_field(self):
     bib_database = BibDatabase()
     bib_database.entries = [{'ID': 'b',
                              'ENTRYTYPE': 'article',
                              'year': '2000'},
                             {'ID': 'c',
                              'ENTRYTYPE': 'book',
                              'year': '2010'},
                             {'ID': 'a',
                              'ENTRYTYPE': 'book'}]
     writer = BibTexWriter()
     writer.order_entries_by = ('year', )
     result = bibtexparser.dumps(bib_database, writer)
     expected = "@book{a\n}\n\n@article{b,\n year = {2000}\n}\n\n@book{c,\n year = {2010}\n}\n\n"
     self.assertEqual(result, expected)
Пример #43
0
    def test_indent(self):
        bib_database = BibDatabase()
        bib_database.entries = [{'id': 'abc123',
                                 'type': 'book',
                                 'author': 'test'}]
        writer = BibTexWriter()
        writer.indent = '  '
        result = bibtexparser.dumps(bib_database, writer)
        expected = \
"""@book{abc123,
  author = {test}
}

"""
        self.assertEqual(result, expected)
Пример #44
0
 def __str__(self):
     bib = BibDatabase()
     bib.entries = [{
         'ENTRYTYPE': 'article',
         'ID': self.entry_number,
         'author': self.author,
         'journal': self.journal,
         'title': self.title,
         'year': self.year,
         'volume': self.volume,
         'number': self.number,
         'pages': self.pages,
         'abstract': self.abstract,
         'keyword': self.keyword,
         'doi': self.doi,
         'issn': self.issn
     }]
     return bibtexparser.dumps(bib)
Пример #45
0
    def __init__(self, data=None,
                 customization=None,
                 ignore_nonstandard_types=True,
                 homogenize_fields=False,
                 interpolate_strings=True,
                 common_strings=False):
        """
        Creates a parser for rading BibTeX files

        :return: parser
        :rtype: `BibTexParser`
        """
        self.bib_database = BibDatabase()

        #: Load common strings such as months abbreviation
        #: Default: `False`.
        self.common_strings = common_strings
        if self.common_strings:
            self.bib_database.load_common_strings()

        #: Callback function to process BibTeX entries after parsing,
        #: for example to create a list from a string with multiple values.
        #: By default all BibTeX values are treated as simple strings.
        #: Default: `None`.
        self.customization = customization

        #: Ignore non-standard BibTeX types (`book`, `article`, etc).
        #: Default: `True`.
        self.ignore_nonstandard_types = ignore_nonstandard_types

        #: Sanitize BibTeX field names, for example change `url` to `link` etc.
        #: Field names are always converted to lowercase names.
        #: Default: `False`.
        self.homogenize_fields = homogenize_fields

        #: Interpolate Bibtex Strings or keep the structure
        self.interpolate_strings = interpolate_strings

        # On some sample data files, the character encoding detection simply
        # hangs We are going to default to utf8, and mandate it.
        self.encoding = 'utf8'

        # pre-defined set of key changes
        self.alt_dict = {
            'keyw': u'keyword',
            'keywords': u'keyword',
            'authors': u'author',
            'editors': u'editor',
            'urls': u'url',
            'link': u'url',
            'links': u'url',
            'subjects': u'subject'
        }

        # Setup the parser expression
        self._init_expressions()
Пример #46
0
    def _entries_to_bibtex(self, bib_database):
        bibtex = ''
        if self.order_entries_by:
            # TODO: allow sort field does not exist for entry
            entries = sorted(bib_database.entries, key=lambda x: BibDatabase.entry_sort_key(x, self.order_entries_by))
        else:
            entries = bib_database.entries

        for entry in entries:
            bibtex += self._entry_to_bibtex(entry)
        return bibtex
Пример #47
0
    def write_selected_to_file(self, selected, website):
        db = BibDatabase()
        result = []
        for item in selected:
            path = str(self.bib_dir) + str(item)
            with open(path, 'r') as f:
                db = bibtexparser.load(f)
                result.append(db.entries[0])
        db.entries = result
        if website == 'personal':
            with open(self.personal_website_bib, 'w') as f:
                bibtexparser.dump(db, f)
        elif website == 'group':
            with open(self.group_website_bib, 'w') as f:
                bibtexparser.dump(db, f)

        #Make sure the file is uploaded to Dropbox before it is send to BibBase
        time.sleep(1)
        
        #Query to BibBase with the right URL
        if website == 'personal':
            html = urllib2.urlopen("http://bibbase.org/show?bib=" + str(self.personal_link)).read()
        elif website == 'group':
            html = urllib2.urlopen("http://bibbase.org/show?bib=" + str(self.group_link)).read()
        #The html does not contain styling and jquery or javascript
        html = '<head>' + \
            '<script src="https://ajax.googleapis.com/ajax/libs/jquery/2.1.3/jquery.min.js"></script>' + \
            '<script src="http://bibbase.org/js/bibbase.min.js" type="text/javascript"></script>' + \
            '<link rel="stylesheet" href="http://maxcdn.bootstrapcdn.com/font-awesome/4.3.0/css/font-awesome.min.css">' + \
            '<link rel="stylesheet" href="http://bibbase.org/css/bootstrap.min.css" type="text/css" media="screen">' + \
            '<link rel="stylesheet" href="http://bibbase.org/css/styles/default.css" type="text/css" media="screen">' + \
            '<link rel="stylesheet" href="http://bibbase.org/css/styles/common.css" type="text/css" media="screen">' + \
            '<link rel="stylesheet" href="http://bibbase.org/css/styles/hide_text.css" type="text/css" media="screen">' + str(html)

        if website == 'personal':
            with open(self.personal_website_html, 'w') as website:
                website.write(html)    
        elif website == 'group':
            with open(self.group_website_html, 'w') as website:
                website.write(html)    
Пример #48
0
def main():
    if len(sys.argv) < 3:
        print("Wrong number of arguments. Usage: \n")
        print("python3 dump_db.py name.db dump.bib")

    print("Dump database")
    print("Database: ", sys.argv[1])

    engine = create_engine('sqlite:///app.db')
    Session = sessionmaker()
    Session.configure(bind=engine)
    session = Session()

    db = BibDatabase()
    db.entries = []

    dbentries = session.query(BiblioEntry)
    for e in dbentries:
        db.entries.append(
            {'journal': e.journal,
             'title': e.title,
             'year': str(e.year),
             'publisher': e.publisher,
             'school': e.school,
             'ID': e.ID,
             'url': e.url,
             'author': e.authors,
             'keyword': e.keywords,
             'ENTRYTYPE': e.ENTRYTYPE}
                        )

    print("Write file on", sys.argv[2])
    writer = BibTexWriter()
    with open(sys.argv[2], 'w') as bibfile:
        bibfile.write(writer.write(db))

    session.close()
    print("Connection closed.")
Пример #49
0
    def _entries_to_bibtex(self, bib_database):
        bibtex = ''
        if self.order_entries_by:
            # TODO: allow sort field does not exist for entry
            entries = sorted(bib_database.entries, key=lambda x: BibDatabase.entry_sort_key(x, self.order_entries_by))
        else:
            entries = bib_database.entries

        if self.align_values:
            # determine maximum field width to be used
            widths = [max(map(len, entry.keys())) for entry in entries]
            self._max_field_width = max(widths)

        for entry in entries:
            bibtex += self._entry_to_bibtex(entry)
        return bibtex
Пример #50
0
Файл: apis.py Проект: pubs/pubs
def arxiv2bibtex(arxiv_id, try_doi=True, ui=None):
    """Return a bibtex string from an arXiv ID

    :param arxiv_id: arXiv id, with or without the `arXiv:` prefix and version
                     suffix (e.g. `v1`). Old an new style are accepted. Here are
                     example of accepted identifiers: `1510.00322`,
                     `arXiv:1510.00322`, `0901.0512`, `arXiv:0901.0512`,
                     `hep-ph/9409201` or `arXiv:hep-ph/9409201`.
                     Note that the `arXiv:` prefix will be automatically
                     removed, and the version suffix automatically added if
                     missing.
    :param try_doi:  if a DOI is referenced in the arXiv metadata,
                     try to download it instead. If that fails for any reason,
                     falls back to the arXiv, with a warning message, if the
                     UI is provided.
    :param ui:       if not None, will display a warning if the doi request
                     fails.
    """
    ## handle errors
    url = 'https://export.arxiv.org/api/query?id_list={}'.format(arxiv_id)
    try:
        r = requests.get(url)
        if r.status_code == 400:  # bad request
            msg = ("the arXiv server returned a bad request error. The "
                   "arXiv id {} is possibly invalid or malformed.".format(arxiv_id))
            raise ReferenceNotFoundError(msg)
        r.raise_for_status()  # raise an exception for HTTP errors:
                              # 401, 404, 400 if `ui` is None, etc.
    except requests.exceptions.RequestException as e:
        msg = ("connection error while retrieving arXiv data for "
               "'{}': {}".format(arxiv_id, e))
        raise ReferenceNotFoundError(msg)

    feed = feedparser.parse(r.text)
    if len(feed.entries) == 0:  # no results.
        msg = "no results for arXiv id {}".format(arxiv_id)
        raise ReferenceNotFoundError(msg)
    if len(feed.entries) > 1:  # I don't know how that could happen, but let's
                               # be ready for it.
        results = '\n'.join('{}. {}'.format(i, entry['title'])
                            for entry in feed.entries)
        msg = ("multiple results for arXiv id {}:\n{}\nThis is unexpected. "
               "Please submit an issue at "
               "https://github.com/pubs/pubs/issues").format(arxiv_id, choices)
        raise ReferenceNotFoundError(msg)

    entry = feed.entries[0]

    ## try to return a doi instead of the arXiv reference
    if try_doi and 'arxiv_doi' in entry:
        try:
            return doi2bibtex(entry['arxiv_doi'])
        except ReferenceNotFoundError as e:
            if ui is not None:
                ui.warning(str(e))

    ## create a bibentry from the arXiv response.
    db = BibDatabase()
    entry_id = _extract_arxiv_id(entry)
    author_str = ' and '.join(
        [author['name'] for author in entry['authors']])
    db.entries = [{
        'ENTRYTYPE': 'article',
        'ID': entry_id,
        'author': author_str,
        'title': entry['title'],
        'year': str(entry['published_parsed'].tm_year),
        'month': _months[entry['published_parsed'].tm_mon-1],
        'eprint': entry_id,
        'eprinttype': 'arxiv',
        'date': entry['published'], # not really standard, but a resolution more
                                    # granular than months is increasinlgy relevant.
        'url': entry['link'],
        'urldate': datetime.datetime.utcnow().isoformat() + 'Z' # can't hurt.
    }]
    # we don't add eprintclass for old-style ids, as it is in the id already.
    if not _is_arxiv_oldstyle(entry_id):
        db.entries[0]['eprintclass'] = entry['arxiv_primary_category']['term']
    if 'arxiv_doi' in entry:
        db.entries[0]['arxiv_doi'] = entry['arxiv_doi']

    bibtex = bibtexparser.dumps(db)
    return bibtex
Пример #51
0
class BibTexParser(object):
    """
    A parser for reading BibTeX bibliographic data files.

    Example::

        from bibtexparser.bparser import BibTexParser

        bibtex_str = ...

        parser = BibTexParser()
        parser.ignore_nonstandard_types = False
        parser.homogenize_fields = False
        parser.common_strings = False
        bib_database = bibtexparser.loads(bibtex_str, parser)

    """

    def __new__(cls, data=None, **args):
        """
        To catch the old API structure in which creating the parser would
        immediately parse and return data.
        """

        if data is None:
            return super(BibTexParser, cls).__new__(cls)
        else:
            # For backwards compatibility: if data is given, parse
            # and return the `BibDatabase` object instead of the parser.
            return parse(data, **args)

    def __init__(self, data=None,
                 customization=None,
                 ignore_nonstandard_types=True,
                 homogenize_fields=False,
                 common_strings=False):
        """
        Creates a parser for rading BibTeX files

        :return: parser
        :rtype: `BibTexParser`
        """
        self.bib_database = BibDatabase()

        #: Load common strings such as months abbreviation
        #: Default: `False`.
        self.common_strings = common_strings
        if self.common_strings:
            self.bib_database.load_common_strings()

        #: Callback function to process BibTeX entries after parsing,
        #: for example to create a list from a string with multiple values.
        #: By default all BibTeX values are treated as simple strings.
        #: Default: `None`.
        self.customization = customization

        #: Ignore non-standard BibTeX types (`book`, `article`, etc).
        #: Default: `True`.
        self.ignore_nonstandard_types = ignore_nonstandard_types

        #: Sanitize BibTeX field names, for example change `url` to `link` etc.
        #: Field names are always converted to lowercase names.
        #: Default: `False`.
        self.homogenize_fields = homogenize_fields

        # On some sample data files, the character encoding detection simply
        # hangs We are going to default to utf8, and mandate it.
        self.encoding = 'utf8'

        # pre-defined set of key changes
        self.alt_dict = {
            'keyw': u'keyword',
            'keywords': u'keyword',
            'authors': u'author',
            'editors': u'editor',
            'url': u'link',
            'urls': u'link',
            'links': u'link',
            'subjects': u'subject'
        }

        # Setup the parser expression
        self._init_expressions()

    def parse(self, bibtex_str, partial=False):
        """Parse a BibTeX string into an object

        :param bibtex_str: BibTeX string
        :type: str or unicode
        :param partial: If True, print errors only on parsing failures.
        If False, an exception is raised.
        :type: boolean
        :return: bibliographic database
        :rtype: BibDatabase
        """
        bibtex_file_obj = self._bibtex_file_obj(bibtex_str)
        try:
            self._expr.parseFile(bibtex_file_obj)
        except self._expr.ParseException as exc:
            logger.error("Could not parse properly, starting at %s", exc.line)
            if not partial:
                raise exc
        return self.bib_database

    def parse_file(self, file, partial=False):
        """Parse a BibTeX file into an object

        :param file: BibTeX file or file-like object
        :type: file
        :param partial: If True, print errors only on parsing failures.
        If False, an exception is raised.
        :type: boolean
        :return: bibliographic database
        :rtype: BibDatabase
        """
        return self.parse(file.read(), partial=partial)

    def _init_expressions(self):
        """
        Defines all parser expressions used internally.
        """
        self._expr = BibtexExpression()

        # Handle string as BibDataString object
        self._expr.set_string_name_parse_action(
            lambda s, l, t:
                BibDataString(self.bib_database, t[0]))
        self._expr.set_string_expression_parse_action(
            lambda s, l, t:
                self._interpolate_string_expression(t))

        # Add notice to logger
        self._expr.add_log_function(logger.debug)

        # Set actions
        self._expr.entry.addParseAction(
            lambda s, l, t: self._add_entry(
                t.get('EntryType'), t.get('Key'), t.get('Fields'))
            )
        self._expr.implicit_comment.addParseAction(
            lambda s, l, t: self._add_comment(t[0])
            )
        self._expr.explicit_comment.addParseAction(
            lambda s, l, t: self._add_comment(t[0])
            )
        self._expr.preamble_decl.addParseAction(
            lambda s, l, t: self._add_preamble(t[0])
            )
        self._expr.string_def.addParseAction(
            lambda s, l, t: self._add_string(t['StringName'].name,
                                             t['StringValue'])
            )

    def _bibtex_file_obj(self, bibtex_str):
        # Some files have Byte-order marks inserted at the start
        byte = '\xef\xbb\xbf'
        if not isinstance(byte, ustr):
            byte = ustr(byte, self.encoding, 'ignore')
        if bibtex_str[:3] == byte:
            bibtex_str = bibtex_str[3:]
        if not isinstance(bibtex_str, ustr):
            bibtex_str = bibtex_str.decode(encoding=self.encoding)
        return io.StringIO(bibtex_str)

    def _clean_val(self, val):
        """ Clean instring before adding to dictionary

        :param val: a value
        :type val: string
        :returns: string -- value
        """
        if not val or val == "{}":
            return ''
        return val

    def _clean_key(self, key):
        """ Lowercase a key and return as unicode.

        :param key: a key
        :type key: string
        :returns: (unicode) string -- value
        """
        key = key.lower()
        if not isinstance(key, ustr):
            return ustr(key, 'utf-8')
        else:
            return key

    def _clean_field_key(self, key):
        """ Clean a bibtex field key and homogenize alternative forms.

        :param key: a key
        :type key: string
        :returns: string -- value
        """
        key = self._clean_key(key)
        if self.homogenize_fields:
            if key in list(self.alt_dict.keys()):
                key = self.alt_dict[key]
        return key

    def _add_entry(self, entry_type, entry_id, fields):
        """ Adds a parsed entry.
        Includes checking type and fields, cleaning, applying customizations.

        :param entry_type: the entry type
        :type entry_type: string
        :param entry_id: the entry bibid
        :type entry_id: string
        :param fields: the fields and values
        :type fields: dictionary
        :returns: string -- value
        """
        d = {}
        entry_type = self._clean_key(entry_type)
        if self.ignore_nonstandard_types and entry_type not in STANDARD_TYPES:
            logger.warning('Entry type %s not standard. Not considered.',
                           entry_type)
            return
        for key in fields:
            d[self._clean_field_key(key)] = self._clean_val(fields[key])
        d['ENTRYTYPE'] = entry_type
        d['ID'] = entry_id
        if self.customization is not None:
            # apply any customizations to the record object then return it
            logger.debug('Apply customizations and return dict')
            d = self.customization(d)
        self.bib_database.entries.append(d)

    def _add_comment(self, comment):
        """
        Stores a comment in the list of comment.

        :param comment: the parsed comment
        :type comment: string
        """
        logger.debug('Store comment in list of comments: ' +
                     comment.__repr__())
        self.bib_database.comments.append(comment)

    def _add_string(self, string_key, string):
        """
        Stores a new string in the string dictionary.

        :param string_key: the string key
        :type string_key: string
        :param string: the string value
        :type string: string
        """
        if string_key in self.bib_database.strings:
            logger.warning('Overwritting existing string for key: %s.',
                           string_key)
        logger.debug('Store string: {} -> {}'.format(string_key, string))
        self.bib_database.strings[string_key] = self._clean_val(string)

    def _interpolate_string_expression(self, string_expr):
        """
        Replaces bibdatastrings by their values in an expression.

        :param string_expr: the parsed string as a list
        :type string_expr: list
        """
        return ''.join([self._expand_string(s) for s in string_expr])

    def _expand_string(self, string_or_bibdatastring):
        """
        Eventually replaces a bibdatastring by its value.

        :param string_or_bibdatastring: the parsed token
        :type string_expr: string or BibDataString
        :returns: string
        """
        if isinstance(string_or_bibdatastring, BibDataString):
            return string_or_bibdatastring.get_value()
        else:
            return string_or_bibdatastring

    def _add_preamble(self, preamble):
        """
        Stores a preamble.

        :param preamble: the parsed preamble
        :type preamble: string
        """
        logger.debug('Store preamble in list of preambles')
        self.bib_database.preambles.append(preamble)
Пример #52
0
    def formatText(self):
        if self.BibtexfilePath != '':
            self.openfile()
        else:
            self.readcontent()

        m = self.getMap()
        m['IEEE Global Communications Conference'] = m['IEEE Global Communications Conference, incorporating the Global Internet Symposium']
        del m['IEEE Global Communications Conference, incorporating the Global Internet Symposium']
        print m

        length = 0
        nb = {}
        for bibtex in self.allbibtex:
            for key in bibtex.keys():
                if len(key) > length and key != 'ENTRYTYPE':
                    length = len(key)
            for k, v in bibtex.items():
                if k == 'ENTRYTYPE' or k == 'ID':
                    nb[k] = v
                    continue
                elif k == 'ID':
                    nb[k] = v
                    continue
                elif k == 'doi' or k == 'ISSN' or k == 'keywords':
                    continue
                elif v == '':
                    continue
                elif 'url' in k:
                    continue

                nk = k + (length - len(k)) * ' '

                if 'booktitle' in nk:
                    if '(' in v:
                        v1 = v.split('(')[1].split(')')[0]
                        nb[nk] = 'Proc. of ' + v1
                        continue
                    flag = 0 # 未更改booktitle

                    to_remove = "~`!@#$%^&*(){}[];':<>|-=_+"
                    table = {ord(char): None for char in to_remove}
                    clean_v = v.translate(table)

                    #clean_v = v.translate(string.punctuation)
                    #print clean_v
                    for kk, vv in m.items():
                        if kk in clean_v:
                            nb[nk] = 'Proc. of ' + vv[0]
                            publish = 'publish' + (length - 7) * ' '
                            nb[publish] = vv[1]
                            flag = 1
                            break
                    if flag == 0:
                        nb[nk] = v
                        print v
                    continue

                elif nk.strip() == 'title' and 'booktitle' not in nk:
                    self.tilte = v
                    nv = v.split(' ')
                    for i in range(len(nv)):
                        # 标题除介词和冠词外,首字母大写
                        if nv[i] in self.prep or nv[i] in self.artie:
                            continue
                        # 首字母大写
                        else:
                            if 97 <= ord(nv[i][0]) <= 122:
                                nv[i] = chr(ord(nv[i][0])-32)+nv[i][1:]

                    v = ' '.join(nv)
                    nb[nk] = '{' + v + '}'
                    continue

                elif 'pages' in nk:
                    if '--' in v:
                        nb[nk] = v
                        continue
                    nb[nk] = v.replace('-', '--')
                    continue
                elif 'author' in nk:
                    if '\n' in v:
                        nb[nk] = v.replace('\n', ' ')
                        continue

                # 其他不做改变
                nb[nk] = v

            db = BibDatabase()
            db.entries = [nb]
            writer = BibTexWriter()
            writer.indent = '\t'  # indent entries with 4 spaces instead of one
            writer.comma_first = False  # place the comma at the beginning of the line
            with open(self.tilte+'.bib', 'wb') as bibfile:
                bibfile.write(writer.write(db))
import sqlite3
import re
import datetime

from bibtexparser.bwriter import BibTexWriter
from bibtexparser.bibdatabase import BibDatabase

db = sqlite3.connect("profile.db")
c = db.cursor()

db = BibDatabase()
db.entries = []

id_dict = {}

c.execute(
    'SELECT title, authors, journal, journal_abbr, volume, pages, month, year FROM journal_paper WHERE locale = "international" ORDER BY year, month'
)
for row in c:
    title, authors, journal, journal_abbr, volume, pages, month, year = row

    title_kwd = None
    authors_kwd = None
    journal_kwd = None

    bib_obj = {"ENTRYTYPE": "article"}
    if title is not None:
        bib_obj["title"] = title
        for word in re.split("[^A-Za-z0-9]", title):
            word = word.lower()
            if word != "a" and word != "the" and word != "an":
Пример #54
0
 def test_entries_list_method(self):
     bib_db = BibDatabase()
     bib_db.entries = self.entries
     self.assertEqual(bib_db.entries, bib_db.get_entry_list())
Пример #55
0
 def test_entries_dict_prop(self):
     bib_db = BibDatabase()
     bib_db.entries = self.entries
     self.assertEqual(bib_db.entries_dict, bib_db.get_entry_dict())
Пример #56
0
 def test_multiple_string_write(self):
     bib_database = BibDatabase()
     bib_database.preambles = [' a ', 'b']
     result = bibtexparser.dumps(bib_database)
     expected = '@preamble{" a "}\n\n@preamble{"b"}\n\n'
     self.assertEqual(result, expected)
Пример #57
0
								c = 0
								for k in keys:
									if k.startswith(key):
										print('\tFound Match: {} -> {}'.format(k, key))
										c += 1
								if (c > 0):
									key += chr(ord(str(c)) + 0x30) # Hehehehe, everybody needs some dirty pleasures sometimes...

								keys.add(key)

								# Build new bib entry
								cleaned_entry = copy(e)
								cleaned_entry['ID'] = key

								bib_entries.append(cleaned_entry)
						else:
							print('[WARNING] - No year found in {}!'.format(e))
					else:
						print('[WARNING] - No author found in {}!'.format(e))
				else:
					print('[WARNING] - No title found in {}!'.format(e))

	print('Merged Bib Entries: {}'.format(len(bib_entries)))
	db = BibDatabase()
	db.entries = bib_entries

	writer = BibTexWriter()
	with open(os.path.join(args.output_path, args.output_file), 'w') as bibfile:
		bibfile.write(writer.write(db))

Пример #58
0
 def test_multiple_string_write(self):
     bib_database = BibDatabase()
     bib_database.preambles = [" a ", "b"]
     result = bibtexparser.dumps(bib_database)
     expected = "@preamble{ a }\n\n@preamble{b}\n\n"
     self.assertEqual(result, expected)
Пример #59
0
 def test_single_preamble_write(self):
     bib_database = BibDatabase()
     bib_database.preambles = [" a "]
     result = bibtexparser.dumps(bib_database)
     expected = "@preamble{ a }\n\n"
     self.assertEqual(result, expected)
 def test_single_string_write(self):
     bib_database = BibDatabase()
     bib_database.strings['name1'] = 'value1'
     result = bibtexparser.dumps(bib_database)
     expected = '@string{name1 = "value1"}\n\n'
     self.assertEqual(result, expected)