def test_content_entries_only(self):
        with open('bibtexparser/tests/data/multiple_entries_and_comments.bib') as bibtex_file:
            bib_database = bibtexparser.load(bibtex_file)
        writer = BibTexWriter()
        writer.contents = ['entries']
        result = bibtexparser.dumps(bib_database, writer)
        expected = \
"""@book{Toto3000,
 author = {Toto, A and Titi, B},
 title = {A title}
}

@article{Wigner1938,
 author = {Wigner, E.},
 doi = {10.1039/TF9383400029},
 issn = {0014-7672},
 journal = {Trans. Faraday Soc.},
 owner = {fr},
 pages = {29--41},
 publisher = {The Royal Society of Chemistry},
 title = {The transition state method},
 volume = {34},
 year = {1938}
}

@book{Yablon2005,
 author = {Yablon, A.D.},
 publisher = {Springer},
 title = {Optical fiber fusion slicing},
 year = {2005}
}

"""
        self.assertEqual(result, expected)
示例#2
0
def main():
    p = argparse.ArgumentParser()
    p.add_argument("input_file",
                   help='The original .bib file you want to sanitize.')
    p.add_argument("config", help='The config file in JSON format.')
    p.add_argument("output_file", help='Name of the new sanitized file.')

    args = p.parse_args()

    # parse original bibfile
    with open(args.input_file) as bibFile:
        bibDB = bibtexparser.load(bibFile)

    # parse config file
    with open(args.config) as configFile:
        confDB = json.load(configFile)

    checkDuplicates(args.input_file)

    bibDB = checkMandatoryFieldsAndKeywords(bibDB, confDB['read_config'])

    checkTags(bibDB, confDB['read_config']['tag_regex'])

    # write results
    writer = BibTexWriter()
    writer.contents = ['entries']

    # use ordering that is defined in "sort_order" in the config.json file
    writer.order_entries_by = confDB["write_config"]["sort_order"]

    with open(args.output_file, 'w') as resFile:
        resFile.write(bibtexparser.dumps(bibDB, writer))
    def test_align(self):
        bib_database = BibDatabase()
        bib_database.entries = [{
            'ID': 'abc123',
            'ENTRYTYPE': 'book',
            'author': 'test',
            'thisisaverylongkey': 'longvalue'
        }]
        writer = BibTexWriter()
        writer.align_values = True
        result = bibtexparser.dumps(bib_database, writer)
        expected = \
"""@book{abc123,
 author             = {test},
 thisisaverylongkey = {longvalue}
}

"""
        self.assertEqual(result, expected)

        with open('bibtexparser/tests/data/multiple_entries_and_comments.bib'
                  ) as bibtex_file:
            bib_database = bibtexparser.load(bibtex_file)
        writer = BibTexWriter()
        writer.contents = ['entries']
        writer.align_values = True
        result = bibtexparser.dumps(bib_database, writer)
        expected = \
"""@book{Toto3000,
 author    = {Toto, A and Titi, B},
 title     = {A title}
}

@article{Wigner1938,
 author    = {Wigner, E.},
 doi       = {10.1039/TF9383400029},
 issn      = {0014-7672},
 journal   = {Trans. Faraday Soc.},
 owner     = {fr},
 pages     = {29--41},
 publisher = {The Royal Society of Chemistry},
 title     = {The transition state method},
 volume    = {34},
 year      = {1938}
}

@book{Yablon2005,
 author    = {Yablon, A.D.},
 publisher = {Springer},
 title     = {Optical fiber fusion slicing},
 year      = {2005}
}

"""
        self.assertEqual(result, expected)
示例#4
0
 def dumps(bibman):
     db = bibtexparser.bparser.BibDatabase()
     db._entries_dict = bibman.cleaned
     db.entries = list(bibman.cleaned.values())
     writer = BibTexWriter()
     # writer.order_entries_by = ('type', 'author', 'year')
     writer.order_entries_by = None
     writer.contents = ['comments', 'entries']
     writer.indent = '    '
     new_text = bibtexparser.dumps(db, writer)
     return new_text
    def test_align(self):
        bib_database = BibDatabase()
        bib_database.entries = [{'ID': 'abc123',
                                 'ENTRYTYPE': 'book',
                                 'author': 'test',
                                 'thisisaverylongkey': 'longvalue'}]
        writer = BibTexWriter()
        writer.align_values = True
        result = bibtexparser.dumps(bib_database, writer)
        expected = \
"""@book{abc123,
 author             = {test},
 thisisaverylongkey = {longvalue}
}

"""
        self.assertEqual(result, expected)

        with open('bibtexparser/tests/data/multiple_entries_and_comments.bib') as bibtex_file:
            bib_database = bibtexparser.load(bibtex_file)
        writer = BibTexWriter()
        writer.contents = ['entries']
        writer.align_values = True
        result = bibtexparser.dumps(bib_database, writer)
        expected = \
"""@book{Toto3000,
 author    = {Toto, A and Titi, B},
 title     = {A title}
}

@article{Wigner1938,
 author    = {Wigner, E.},
 doi       = {10.1039/TF9383400029},
 issn      = {0014-7672},
 journal   = {Trans. Faraday Soc.},
 owner     = {fr},
 pages     = {29--41},
 publisher = {The Royal Society of Chemistry},
 title     = {The transition state method},
 volume    = {34},
 year      = {1938}
}

@book{Yablon2005,
 author    = {Yablon, A.D.},
 publisher = {Springer},
 title     = {Optical fiber fusion slicing},
 year      = {2005}
}

"""
        self.assertEqual(result, expected)
    def test_content_comment_only(self):
        with open('bibtexparser/tests/data/multiple_entries_and_comments.bib') as bibtex_file:
            bib_database = bibtexparser.load(bibtex_file)
        writer = BibTexWriter()
        writer.contents = ['comments']
        result = bibtexparser.dumps(bib_database, writer)
        expected = \
"""@comment{}

@comment{A comment}

"""
        self.assertEqual(result, expected)
示例#7
0
文件: fetch.py 项目: siudej/Cite
    def _cleanupBibTex(self, count):
        """ Clean up bibtex and ensure uniform look. """
        import bibtexparser
        from bibtexparser.bparser import BibTexParser
        parser = BibTexParser()
        parser.customization = homogeneize_latex_encoding
        bib = bibtexparser.loads(self.refs, parser=parser)

        # save results
        from bibtexparser.bwriter import BibTexWriter
        writer = BibTexWriter()
        writer.contents = ['entries']
        writer.indent = '    '
        writer.order_entries_by = ('id')
        self.number = len(bib.entries)
        self.refs = bibtexparser.dumps(bib, writer)
示例#8
0
def main():
    output = sys.argv[1]
    mds = sys.argv[2:]
    es = []
    for fn in mds:
        # print(f"loading {fn}")
        with open(fn, "r", encoding='UTF-8') as f:
            ls = f.readlines()[1:]
            ls = itertools.takewhile(lambda x: x != "---\n", ls)
            e = yaml.load("".join(ls), Loader=yaml.FullLoader)
            e['ID'] = fn.split("/")[1][0:-3]
            for i in ['title', 'booktitle']:
                if i in e:
                    s = e[i]
                    s = s.replace("#", "\#")
                    s = s.replace("&", "\&")
                    e[i] = s
            e['title'] = "{" + e['title'] + "}"
            if 'authors' in e:
                e['author'] = " and ".join(e['authors'])
                del e['authors']
            for i in ['isbn', 'pages', 'volume', 'year']:
                if i in e: e[i] = str(e[i])
            for i in [
                    'added', 'layout', 'notes', 'papers', 'read', 'readings',
                    'topics'
            ]:
                if i in e: del e[i]
            es.append(e)

    db = BibDatabase()
    db.entries = es

    writer = BibTexWriter()
    writer.contents = ['entries']
    writer.indent = '  '
    # writer.order_entries_by = ('ENTRYTYPE', 'author', 'year')
    bibtex_str = bibtexparser.dumps(db, writer)
    with open(output, "w") as f:
        print(("#############################################\n"
               "# This file is machine generated, do not edit\n"
               "#############################################\n"),
              file=f)
        print(bibtex_str, file=f)
示例#9
0
def write_bibtex(bibtex_entries):
    bib_database = bibtexparser.bibdatabase.BibDatabase()

    for e in bibtex_entries:
        # pop the useless contents
        e.pop('created_time', None)
        e.pop('file', None)
        e.pop('abstract', None)
        for k in e:
            if isinstance(e[k], list):
                e[k] = ' and '.join(e[k])
            e[k] = unicode_to_latex(e[k])
    bib_database.entries = bibtex_entries

    writer = BibTexWriter()
    writer.contents = ['comments', 'entries']
    writer.indent = '  '
    writer.order_entries_by = ('ENTRYTYPE', 'author', 'year')
    bibtex_str = bibtexparser.dumps(bib_database, writer)

    return bibtex_str
示例#10
0
def main(bib_fpath=None):
    r"""
    intro point to fixbib script

    CommmandLine:
        fixbib
        python -m fixtex bib
        python -m fixtex bib --dryrun
        python -m fixtex bib --dryrun --debug
    """

    if bib_fpath is None:
        bib_fpath = 'My Library.bib'

    # DEBUG = ub.argflag('--debug')
    # Read in text and ensure ascii format
    dirty_text = ut.readfrom(bib_fpath)

    from fixtex.fix_tex import find_used_citations, testdata_fpaths

    if exists('custom_extra.bib'):
        extra_parser = bparser.BibTexParser(ignore_nonstandard_types=False)
        parser = bparser.BibTexParser()
        ut.delete_keys(parser.alt_dict, ['url', 'urls'])
        print('Parsing extra bibtex file')
        extra_text = ut.readfrom('custom_extra.bib')
        extra_database = extra_parser.parse(extra_text, partial=False)
        print('Finished parsing extra')
        extra_dict = extra_database.get_entry_dict()
    else:
        extra_dict = None

    #udata = dirty_text.decode("utf-8")
    #dirty_text = udata.encode("ascii", "ignore")
    #dirty_text = udata

    # parser = bparser.BibTexParser()
    # bib_database = parser.parse(dirty_text)
    # d = bib_database.get_entry_dict()

    print('BIBTEXPARSER LOAD')
    parser = bparser.BibTexParser(ignore_nonstandard_types=False,
                                  common_strings=True)
    ut.delete_keys(parser.alt_dict, ['url', 'urls'])
    print('Parsing bibtex file')
    bib_database = parser.parse(dirty_text, partial=False)
    print('Finished parsing')

    bibtex_dict = bib_database.get_entry_dict()
    old_keys = list(bibtex_dict.keys())
    new_keys = []
    for key in ub.ProgIter(old_keys, label='fixing keys'):
        new_key = key
        new_key = new_key.replace(':', '')
        new_key = new_key.replace('-', '_')
        new_key = re.sub('__*', '_', new_key)
        new_keys.append(new_key)

    # assert len(ut.find_duplicate_items(new_keys)) == 0, 'new keys created conflict'
    assert len(ub.find_duplicates(new_keys)) == 0, 'new keys created conflict'

    for key, new_key in zip(old_keys, new_keys):
        if key != new_key:
            entry = bibtex_dict[key]
            entry['ID'] = new_key
            bibtex_dict[new_key] = entry
            del bibtex_dict[key]

    # The bibtext is now clean. Print it to stdout
    #print(clean_text)
    verbose = None
    if verbose is None:
        verbose = 1

    # Find citations from the tex documents
    key_list = None
    if key_list is None:
        cacher = ub.Cacher('texcite1', enabled=0)
        data = cacher.tryload()
        if data is None:
            fpaths = testdata_fpaths()
            key_list, inverse = find_used_citations(fpaths,
                                                    return_inverse=True)
            # ignore = ['JP', '?', 'hendrick']
            # for item in ignore:
            #     try:
            #         key_list.remove(item)
            #     except ValueError:
            #         pass
            if verbose:
                print('Found %d citations used in the document' %
                      (len(key_list), ))
            data = key_list, inverse
            cacher.save(data)
        key_list, inverse = data

    # else:
    #     key_list = None

    unknown_pubkeys = []
    debug_author = ub.argval('--debug-author', default=None)
    # ./fix_bib.py --debug_author=Kappes

    if verbose:
        print('Fixing %d/%d bibtex entries' %
              (len(key_list), len(bibtex_dict)))

    # debug = True
    debug = False
    if debug_author is not None:
        debug = False

    known_keys = list(bibtex_dict.keys())
    missing_keys = set(key_list) - set(known_keys)
    if extra_dict is not None:
        missing_keys.difference_update(set(extra_dict.keys()))

    if missing_keys:
        print('The library is missing keys found in tex files %s' %
              (ub.repr2(missing_keys), ))

    # Search for possible typos:
    candidate_typos = {}
    sedlines = []
    for key in missing_keys:
        candidates = ut.closet_words(key, known_keys, num=3, subset=True)
        if len(candidates) > 1:
            top = candidates[0]
            if ut.edit_distance(key, top) == 1:
                # "sed -i -e 's/{}/{}/g' *.tex".format(key, top)
                import os
                replpaths = ' '.join(
                    [relpath(p, os.getcwd()) for p in inverse[key]])
                sedlines.append("sed -i -e 's/{}/{}/g' {}".format(
                    key, top, replpaths))
        candidate_typos[key] = candidates
        print('Cannot find key = %r' % (key, ))
        print('Did you mean? %r' % (candidates, ))

    print('Quick fixes')
    print('\n'.join(sedlines))

    # group by file
    just = max([0] + list(map(len, missing_keys)))
    missing_fpaths = [inverse[key] for key in missing_keys]
    for fpath in sorted(set(ub.flatten(missing_fpaths))):
        # ut.fix_embed_globals()
        subkeys = [k for k in missing_keys if fpath in inverse[k]]
        print('')
        ut.cprint('--- Missing Keys ---', 'blue')
        ut.cprint('fpath = %r' % (fpath, ), 'blue')
        ut.cprint('{} | {}'.format('Missing'.ljust(just), 'Did you mean?'),
                  'blue')
        for key in subkeys:
            print('{} | {}'.format(ut.highlight_text(key.ljust(just), 'red'),
                                   ' '.join(candidate_typos[key])))

    # for key in list(bibtex_dict.keys()):

    if extra_dict is not None:
        # Extra database takes precidence over regular
        key_list = list(ut.unique(key_list + list(extra_dict.keys())))
        for k, v in extra_dict.items():
            bibtex_dict[k] = v

    full = ub.argflag('--full')

    for key in key_list:
        try:
            entry = bibtex_dict[key]
        except KeyError:
            continue
        self = BibTexCleaner(key, entry, full=full)

        if debug_author is not None:
            debug = debug_author in entry.get('author', '')

        if debug:
            ut.cprint(' --- ENTRY ---', 'yellow')
            print(ub.repr2(entry, nl=1))

        entry = self.fix()
        # self.clip_abstract()
        # self.shorten_keys()
        # self.fix_authors()
        # self.fix_year()
        # old_pubval = self.fix_pubkey()
        # if old_pubval:
        #     unknown_pubkeys.append(old_pubval)
        # self.fix_arxiv()
        # self.fix_general()
        # self.fix_paper_types()

        if debug:
            print(ub.repr2(entry, nl=1))
            ut.cprint(' --- END ENTRY ---', 'yellow')
        bibtex_dict[key] = entry

    unwanted_keys = set(bibtex_dict.keys()) - set(key_list)
    if verbose:
        print('Removing unwanted %d entries' % (len(unwanted_keys)))
    ut.delete_dict_keys(bibtex_dict, unwanted_keys)

    if 0:
        d1 = bibtex_dict.copy()
        full = True
        for key, entry in d1.items():
            self = BibTexCleaner(key, entry, full=full)
            pub = self.publication()
            if pub is None:
                print(self.entry['ENTRYTYPE'])

            old = self.fix_pubkey()
            x1 = self._pubval()
            x2 = self.standard_pubval(full=full)
            # if x2 is not None and len(x2) > 5:
            #     print(ub.repr2(self.entry))

            if x1 != x2:
                print('x2 = %r' % (x2, ))
                print('x1 = %r' % (x1, ))
                print(ub.repr2(self.entry))

            # if 'CVPR' in self.entry.get('booktitle', ''):
            #     if 'CVPR' != self.entry.get('booktitle', ''):
            #         break
            if old:
                print('old = %r' % (old, ))
            d1[key] = self.entry

    if full:
        d1 = bibtex_dict.copy()

        import numpy as np
        import pandas as pd
        df = pd.DataFrame.from_dict(d1, orient='index')

        paged_items = df[~pd.isnull(df['pub_accro'])]
        has_pages = ~pd.isnull(paged_items['pages'])
        print('have pages {} / {}'.format(has_pages.sum(), len(has_pages)))
        print(ub.repr2(paged_items[~has_pages]['title'].values.tolist()))

        entrytypes = dict(list(df.groupby('pub_type')))
        if False:
            # entrytypes['misc']
            g = entrytypes['online']
            g = g[g.columns[~np.all(pd.isnull(g), axis=0)]]

            entrytypes['book']
            entrytypes['thesis']
            g = entrytypes['article']
            g = entrytypes['incollection']
            g = entrytypes['conference']

        def lookup_pub(e):
            if e == 'article':
                return 'journal', 'journal'
            elif e == 'incollection':
                return 'booksection', 'booktitle'
            elif e == 'conference':
                return 'conference', 'booktitle'
            return None, None

        for e, g in entrytypes.items():
            print('e = %r' % (e, ))
            g = g[g.columns[~np.all(pd.isnull(g), axis=0)]]
            if 'pub_full' in g.columns:
                place_title = g['pub_full'].tolist()
                print(ub.repr2(ub.dict_hist(place_title)))
            else:
                print('Unknown publications')

        if 'report' in entrytypes:
            g = entrytypes['report']
            missing = g[pd.isnull(g['title'])]
            if len(missing):
                print('Missing Title')
                print(ub.repr2(missing[['title', 'author']].values.tolist()))

        if 'journal' in entrytypes:
            g = entrytypes['journal']
            g = g[g.columns[~np.all(pd.isnull(g), axis=0)]]

            missing = g[pd.isnull(g['journal'])]
            if len(missing):
                print('Missing Journal')
                print(ub.repr2(missing[['title', 'author']].values.tolist()))

        if 'conference' in entrytypes:
            g = entrytypes['conference']
            g = g[g.columns[~np.all(pd.isnull(g), axis=0)]]

            missing = g[pd.isnull(g['booktitle'])]
            if len(missing):
                print('Missing Booktitle')
                print(ub.repr2(missing[['title', 'author']].values.tolist()))

        if 'incollection' in entrytypes:
            g = entrytypes['incollection']
            g = g[g.columns[~np.all(pd.isnull(g), axis=0)]]

            missing = g[pd.isnull(g['booktitle'])]
            if len(missing):
                print('Missing Booktitle')
                print(ub.repr2(missing[['title', 'author']].values.tolist()))

        if 'thesis' in entrytypes:
            g = entrytypes['thesis']
            g = g[g.columns[~np.all(pd.isnull(g), axis=0)]]
            missing = g[pd.isnull(g['institution'])]
            if len(missing):
                print('Missing Institution')
                print(ub.repr2(missing[['title', 'author']].values.tolist()))

        # import utool
        # utool.embed()

    # Overwrite BibDatabase structure
    bib_database._entries_dict = bibtex_dict
    bib_database.entries = list(bibtex_dict.values())

    #conftitle_to_types_set_hist = {key: set(val) for key, val in conftitle_to_types_hist.items()}
    #print(ub.repr2(conftitle_to_types_set_hist))

    print('Unknown conference keys:')
    print(ub.repr2(sorted(unknown_pubkeys)))
    print('len(unknown_pubkeys) = %r' % (len(unknown_pubkeys), ))

    writer = BibTexWriter()
    writer.contents = ['comments', 'entries']
    writer.indent = '  '
    writer.order_entries_by = ('type', 'author', 'year')

    new_bibtex_str = bibtexparser.dumps(bib_database, writer)

    # Need to check
    #jegou_aggregating_2012

    # Fix the Journal Abreviations
    # References:
    # https://www.ieee.org/documents/trans_journal_names.pdf

    # Write out clean bibfile in ascii format
    clean_bib_fpath = ub.augpath(bib_fpath.replace(' ', '_'), suffix='_clean')

    if not ub.argflag('--dryrun'):
        ut.writeto(clean_bib_fpath, new_bibtex_str)
示例#11
0
文件: main.py 项目: Juvawa/bib2web
            'formatter': 'standard',
            'class':'logging.StreamHandler',
        },
    },
    'loggers': {
        '': {
            'handlers': ['default'],
            'level': 'ERROR',
            'formatter': 'standard',
            'propagate': True
        }
    }
})

writer = BibTexWriter()
writer.contents = ['comments', 'entries']
writer.indent = '  '
writer.order_entries_by = ('ENTRYTYPE', 'author', 'year')

def create_id(t, year, title):
	return str(t) + "_" + str(year) + "_" + str(space_to_underscore(title))

def pdf(pdf_files, shared_pdf, bibtex_folder, bibtex_files, gscholar):
	for pdf in pdf_files:
		txt = re.sub("\W", " ", gs.convert_pdf_to_txt(pdf)).lower()
		#Research determined that the cutting of 35 words gives the 
		#highest accuracy
		words = txt.strip().split()[:35]
		words = " ".join(words)		
		print words
		if gscholar == True:
示例#12
0
def fix_conference_title_names(clean_text, key_list=None):
    """
    mass bibtex fixes

    CommandLine:
        ./fix_bib.py
    """

    # Find citations from the tex documents
    if key_list is None:
        key_list = find_used_citations(testdata_fpaths())
        key_list = list(set(key_list))
        ignore = ['JP', '?']
        for item in ignore:
            try:
                key_list.remove(item)
            except ValueError:
                pass

    unknown_confkeys = []

    conference_keys = [
        'journal',
        'booktitle',
    ]

    ignore_confkey = []

    bib_database = bibtexparser.loads(clean_text)

    bibtex_dict = bib_database.get_entry_dict()

    isect = set(ignore_confkey).intersection(
        set(constants_tex_fixes.CONFERENCE_TITLE_MAPS.keys()))
    assert len(isect) == 0, repr(isect)

    #ut.embed()
    #conftitle_to_types_hist = ut.ddict(list)

    type_key = 'ENTRYTYPE'

    debug_author = ut.get_argval('--debug-author', type_=str, default=None)
    # ./fix_bib.py --debug_author=Kappes

    for key in bibtex_dict.keys():
        entry = bibtex_dict[key]

        if debug_author is not None:
            debug = debug_author in entry.get('author', '')
        else:
            debug = False

        if debug:
            print(' --- ENTRY ---')
            print(ut.repr3(entry))

        #if type_key not in entry:
        #    #entry[type_key] = entry['ENTRYTYPE']
        #    ut.embed()

        # Clip abstrat
        if 'abstract' in entry:
            entry['abstract'] = ' '.join(entry['abstract'].split(' ')[0:7])

        # Remove Keys
        remove_keys = [
            'note',
            'urldate',
            'series',
            'publisher',
            'isbn',
            'editor',
            'shorttitle',
            'copyright',
            'language',
            'month',
            # These will be put back in
            #'number',
            #'pages',
            #'volume',
        ]
        entry = ut.delete_dict_keys(entry, remove_keys)

        # Fix conference names
        confkeys = list(set(entry.keys()).intersection(set(conference_keys)))
        #entry = ut.delete_dict_keys(entry, ['abstract'])
        # TODO: FIX THESE IF NEEDBE
        #if len(confkeys) == 0:
        #    print(ut.dict_str(entry))
        #    print(entry.keys())
        if len(confkeys) == 1:
            confkey = confkeys[0]
            old_confval = entry[confkey]
            # Remove curly braces
            old_confval = old_confval.replace('{', '').replace('}', '')
            if old_confval in ignore_confkey:
                print(ut.dict_str(entry))
                continue

            new_confval_candiates = []
            if old_confval.startswith('arXiv'):
                continue

            # for conf_title, patterns in constants_tex_fixes.CONFERENCE_TITLE_MAPS.items():
            for conf in constants_tex_fixes.CONFERENCES:
                if conf.matches(old_confval):
                    conf_title = conf.accro()
                    if debug:
                        print('old_confval = %r' % (old_confval, ))
                        print('conf_title = %r' % (conf_title, ))
                    new_confval = conf_title
                    new_confval_candiates.append(new_confval)

            if len(new_confval_candiates) == 0:
                new_confval = None
            elif len(new_confval_candiates) == 1:
                new_confval = new_confval_candiates[0]
            else:
                assert False, 'double match'

            if new_confval is None:
                if key in key_list:
                    unknown_confkeys.append(old_confval)
                #print(old_confval)
            else:
                # Overwrite old confval
                entry[confkey] = new_confval

            # Record info about types of conferneces
            true_confval = entry[confkey].replace('{', '').replace('}', '')

            # FIX ENTRIES THAT SHOULD BE CONFERENCES
            if true_confval in constants_tex_fixes.CONFERENCE_LIST:
                if entry[type_key] == 'inproceedings':
                    pass
                    #print(confkey)
                    #print(ut.dict_str(entry))
                elif entry[type_key] == 'article':
                    entry['booktitle'] = entry['journal']
                    del entry['journal']
                    #print(ut.dict_str(entry))
                elif entry[type_key] == 'incollection':
                    pass
                else:
                    raise AssertionError('UNKNOWN TYPE: %r' %
                                         (entry[type_key], ))

                if 'booktitle' not in entry:
                    print('DOES NOT HAVE CORRECT CONFERENCE KEY')
                    print(ut.dict_str(entry))

                assert 'journal' not in entry, 'should not have journal'

                #print(entry['type'])
                entry[type_key] = 'inproceedings'

            # FIX ENTRIES THAT SHOULD BE JOURNALS
            if true_confval in constants_tex_fixes.JOURNAL_LIST:

                if entry[type_key] == 'article':
                    pass
                elif entry[type_key] == 'inproceedings':
                    pass
                    #print(ut.dict_str(entry))
                elif entry[type_key] == 'incollection':
                    pass
                else:
                    raise AssertionError('UNKNOWN TYPE: %r' %
                                         (entry['type'], ))

                if 'journal' not in entry:
                    print('DOES NOT HAVE CORRECT CONFERENCE KEY')
                    print(ut.dict_str(entry))

                assert 'booktitle' not in entry, 'should not have booktitle'
                #print(entry['type'])
                #entry['type'] = 'article'

            #conftitle_to_types_hist[true_confval].append(entry['type'])

        elif len(confkeys) > 1:
            raise AssertionError('more than one confkey=%r' % (confkeys, ))

        # Fix Authors
        if 'author' in entry:
            authors = six.text_type(entry['author'])
            for truename, alias_list in constants_tex_fixes.AUTHOR_NAME_MAPS.items(
            ):
                pattern = six.text_type(
                    ut.regex_or([
                        ut.util_regex.whole_word(alias) for alias in alias_list
                    ]))
                authors = re.sub(pattern,
                                 six.text_type(truename),
                                 authors,
                                 flags=re.UNICODE)
            entry['author'] = authors
    """
    article = journal
    inprocedings = converence paper

    """

    #conftitle_to_types_set_hist = {key: set(val) for key, val in conftitle_to_types_hist.items()}
    #print(ut.dict_str(conftitle_to_types_set_hist))

    print(ut.list_str(sorted(unknown_confkeys)))
    print('len(unknown_confkeys) = %r' % (len(unknown_confkeys), ))

    writer = BibTexWriter()
    writer.contents = ['comments', 'entries']
    writer.indent = '  '
    writer.order_entries_by = ('type', 'author', 'year')

    new_bibtex_str = bibtexparser.dumps(bib_database, writer)
    return new_bibtex_str
示例#13
0
folder = sys.argv[1] if len(sys.argv) > 1 else "bib"

if os.path.exists(folder + '-clean'):
    print 'cleaning ' + folder + '-clean/'
    for file in os.listdir(folder + '-clean'):
        try:
            if os.path.isfile(folder + '-clean/' + file):
                os.unlink(folder + '-clean/' + file)
        except Exception as e:
            print(e)
else:
    os.makedirs(folder + '-clean')

#Writer customization
writer = BibTexWriter()
writer.contents = ['entries']
writer.indent = '  '
writer.order_entries_by = ('ENTRYTYPE', 'author', 'year')

#parser customization, need a new parser for each file
#parser = BibTexParser()
#parser.common_strings = True

#Bib dictionary for months
Months = """@STRING{ jan = "jan"}
@STRING{ feb = "feb"}
@STRING{ mar = "mar"}
@STRING{ apr = "apr"}
@STRING{ may = "may"}
@STRING{ jun = "jun"}
@STRING{ jul = "jul"}
示例#14
0
folder=sys.argv[1] if len(sys.argv) > 1 else "bib"

if os.path.exists(folder+'-clean'):
    print 'cleaning '+folder+'-clean/'
    for file in os.listdir(folder+'-clean'):
        try:
            if os.path.isfile(folder+'-clean/'+file):
                os.unlink(folder+'-clean/'+file)
        except Exception as e:
            print(e)
else:
    os.makedirs(folder+'-clean')

#Writer customization
writer = BibTexWriter()
writer.contents = ['entries']
writer.indent = '  '
writer.order_entries_by = ('ENTRYTYPE', 'author', 'year')

#parser customization, need a new parser for each file
#parser = BibTexParser()
#parser.common_strings = True

#Bib dictionary for months
Months = """@STRING{ jan = "jan"}
@STRING{ feb = "feb"}
@STRING{ mar = "mar"}
@STRING{ apr = "apr"}
@STRING{ may = "may"}
@STRING{ jun = "jun"}
@STRING{ jul = "jul"}