def test_content_entries_only(self): with open('bibtexparser/tests/data/multiple_entries_and_comments.bib') as bibtex_file: bib_database = bibtexparser.load(bibtex_file) writer = BibTexWriter() writer.contents = ['entries'] result = bibtexparser.dumps(bib_database, writer) expected = \ """@book{Toto3000, author = {Toto, A and Titi, B}, title = {A title} } @article{Wigner1938, author = {Wigner, E.}, doi = {10.1039/TF9383400029}, issn = {0014-7672}, journal = {Trans. Faraday Soc.}, owner = {fr}, pages = {29--41}, publisher = {The Royal Society of Chemistry}, title = {The transition state method}, volume = {34}, year = {1938} } @book{Yablon2005, author = {Yablon, A.D.}, publisher = {Springer}, title = {Optical fiber fusion slicing}, year = {2005} } """ self.assertEqual(result, expected)
def main(): p = argparse.ArgumentParser() p.add_argument("input_file", help='The original .bib file you want to sanitize.') p.add_argument("config", help='The config file in JSON format.') p.add_argument("output_file", help='Name of the new sanitized file.') args = p.parse_args() # parse original bibfile with open(args.input_file) as bibFile: bibDB = bibtexparser.load(bibFile) # parse config file with open(args.config) as configFile: confDB = json.load(configFile) checkDuplicates(args.input_file) bibDB = checkMandatoryFieldsAndKeywords(bibDB, confDB['read_config']) checkTags(bibDB, confDB['read_config']['tag_regex']) # write results writer = BibTexWriter() writer.contents = ['entries'] # use ordering that is defined in "sort_order" in the config.json file writer.order_entries_by = confDB["write_config"]["sort_order"] with open(args.output_file, 'w') as resFile: resFile.write(bibtexparser.dumps(bibDB, writer))
def test_align(self): bib_database = BibDatabase() bib_database.entries = [{ 'ID': 'abc123', 'ENTRYTYPE': 'book', 'author': 'test', 'thisisaverylongkey': 'longvalue' }] writer = BibTexWriter() writer.align_values = True result = bibtexparser.dumps(bib_database, writer) expected = \ """@book{abc123, author = {test}, thisisaverylongkey = {longvalue} } """ self.assertEqual(result, expected) with open('bibtexparser/tests/data/multiple_entries_and_comments.bib' ) as bibtex_file: bib_database = bibtexparser.load(bibtex_file) writer = BibTexWriter() writer.contents = ['entries'] writer.align_values = True result = bibtexparser.dumps(bib_database, writer) expected = \ """@book{Toto3000, author = {Toto, A and Titi, B}, title = {A title} } @article{Wigner1938, author = {Wigner, E.}, doi = {10.1039/TF9383400029}, issn = {0014-7672}, journal = {Trans. Faraday Soc.}, owner = {fr}, pages = {29--41}, publisher = {The Royal Society of Chemistry}, title = {The transition state method}, volume = {34}, year = {1938} } @book{Yablon2005, author = {Yablon, A.D.}, publisher = {Springer}, title = {Optical fiber fusion slicing}, year = {2005} } """ self.assertEqual(result, expected)
def dumps(bibman): db = bibtexparser.bparser.BibDatabase() db._entries_dict = bibman.cleaned db.entries = list(bibman.cleaned.values()) writer = BibTexWriter() # writer.order_entries_by = ('type', 'author', 'year') writer.order_entries_by = None writer.contents = ['comments', 'entries'] writer.indent = ' ' new_text = bibtexparser.dumps(db, writer) return new_text
def test_align(self): bib_database = BibDatabase() bib_database.entries = [{'ID': 'abc123', 'ENTRYTYPE': 'book', 'author': 'test', 'thisisaverylongkey': 'longvalue'}] writer = BibTexWriter() writer.align_values = True result = bibtexparser.dumps(bib_database, writer) expected = \ """@book{abc123, author = {test}, thisisaverylongkey = {longvalue} } """ self.assertEqual(result, expected) with open('bibtexparser/tests/data/multiple_entries_and_comments.bib') as bibtex_file: bib_database = bibtexparser.load(bibtex_file) writer = BibTexWriter() writer.contents = ['entries'] writer.align_values = True result = bibtexparser.dumps(bib_database, writer) expected = \ """@book{Toto3000, author = {Toto, A and Titi, B}, title = {A title} } @article{Wigner1938, author = {Wigner, E.}, doi = {10.1039/TF9383400029}, issn = {0014-7672}, journal = {Trans. Faraday Soc.}, owner = {fr}, pages = {29--41}, publisher = {The Royal Society of Chemistry}, title = {The transition state method}, volume = {34}, year = {1938} } @book{Yablon2005, author = {Yablon, A.D.}, publisher = {Springer}, title = {Optical fiber fusion slicing}, year = {2005} } """ self.assertEqual(result, expected)
def test_content_comment_only(self): with open('bibtexparser/tests/data/multiple_entries_and_comments.bib') as bibtex_file: bib_database = bibtexparser.load(bibtex_file) writer = BibTexWriter() writer.contents = ['comments'] result = bibtexparser.dumps(bib_database, writer) expected = \ """@comment{} @comment{A comment} """ self.assertEqual(result, expected)
def _cleanupBibTex(self, count): """ Clean up bibtex and ensure uniform look. """ import bibtexparser from bibtexparser.bparser import BibTexParser parser = BibTexParser() parser.customization = homogeneize_latex_encoding bib = bibtexparser.loads(self.refs, parser=parser) # save results from bibtexparser.bwriter import BibTexWriter writer = BibTexWriter() writer.contents = ['entries'] writer.indent = ' ' writer.order_entries_by = ('id') self.number = len(bib.entries) self.refs = bibtexparser.dumps(bib, writer)
def main(): output = sys.argv[1] mds = sys.argv[2:] es = [] for fn in mds: # print(f"loading {fn}") with open(fn, "r", encoding='UTF-8') as f: ls = f.readlines()[1:] ls = itertools.takewhile(lambda x: x != "---\n", ls) e = yaml.load("".join(ls), Loader=yaml.FullLoader) e['ID'] = fn.split("/")[1][0:-3] for i in ['title', 'booktitle']: if i in e: s = e[i] s = s.replace("#", "\#") s = s.replace("&", "\&") e[i] = s e['title'] = "{" + e['title'] + "}" if 'authors' in e: e['author'] = " and ".join(e['authors']) del e['authors'] for i in ['isbn', 'pages', 'volume', 'year']: if i in e: e[i] = str(e[i]) for i in [ 'added', 'layout', 'notes', 'papers', 'read', 'readings', 'topics' ]: if i in e: del e[i] es.append(e) db = BibDatabase() db.entries = es writer = BibTexWriter() writer.contents = ['entries'] writer.indent = ' ' # writer.order_entries_by = ('ENTRYTYPE', 'author', 'year') bibtex_str = bibtexparser.dumps(db, writer) with open(output, "w") as f: print(("#############################################\n" "# This file is machine generated, do not edit\n" "#############################################\n"), file=f) print(bibtex_str, file=f)
def write_bibtex(bibtex_entries): bib_database = bibtexparser.bibdatabase.BibDatabase() for e in bibtex_entries: # pop the useless contents e.pop('created_time', None) e.pop('file', None) e.pop('abstract', None) for k in e: if isinstance(e[k], list): e[k] = ' and '.join(e[k]) e[k] = unicode_to_latex(e[k]) bib_database.entries = bibtex_entries writer = BibTexWriter() writer.contents = ['comments', 'entries'] writer.indent = ' ' writer.order_entries_by = ('ENTRYTYPE', 'author', 'year') bibtex_str = bibtexparser.dumps(bib_database, writer) return bibtex_str
def main(bib_fpath=None): r""" intro point to fixbib script CommmandLine: fixbib python -m fixtex bib python -m fixtex bib --dryrun python -m fixtex bib --dryrun --debug """ if bib_fpath is None: bib_fpath = 'My Library.bib' # DEBUG = ub.argflag('--debug') # Read in text and ensure ascii format dirty_text = ut.readfrom(bib_fpath) from fixtex.fix_tex import find_used_citations, testdata_fpaths if exists('custom_extra.bib'): extra_parser = bparser.BibTexParser(ignore_nonstandard_types=False) parser = bparser.BibTexParser() ut.delete_keys(parser.alt_dict, ['url', 'urls']) print('Parsing extra bibtex file') extra_text = ut.readfrom('custom_extra.bib') extra_database = extra_parser.parse(extra_text, partial=False) print('Finished parsing extra') extra_dict = extra_database.get_entry_dict() else: extra_dict = None #udata = dirty_text.decode("utf-8") #dirty_text = udata.encode("ascii", "ignore") #dirty_text = udata # parser = bparser.BibTexParser() # bib_database = parser.parse(dirty_text) # d = bib_database.get_entry_dict() print('BIBTEXPARSER LOAD') parser = bparser.BibTexParser(ignore_nonstandard_types=False, common_strings=True) ut.delete_keys(parser.alt_dict, ['url', 'urls']) print('Parsing bibtex file') bib_database = parser.parse(dirty_text, partial=False) print('Finished parsing') bibtex_dict = bib_database.get_entry_dict() old_keys = list(bibtex_dict.keys()) new_keys = [] for key in ub.ProgIter(old_keys, label='fixing keys'): new_key = key new_key = new_key.replace(':', '') new_key = new_key.replace('-', '_') new_key = re.sub('__*', '_', new_key) new_keys.append(new_key) # assert len(ut.find_duplicate_items(new_keys)) == 0, 'new keys created conflict' assert len(ub.find_duplicates(new_keys)) == 0, 'new keys created conflict' for key, new_key in zip(old_keys, new_keys): if key != new_key: entry = bibtex_dict[key] entry['ID'] = new_key bibtex_dict[new_key] = entry del bibtex_dict[key] # The bibtext is now clean. Print it to stdout #print(clean_text) verbose = None if verbose is None: verbose = 1 # Find citations from the tex documents key_list = None if key_list is None: cacher = ub.Cacher('texcite1', enabled=0) data = cacher.tryload() if data is None: fpaths = testdata_fpaths() key_list, inverse = find_used_citations(fpaths, return_inverse=True) # ignore = ['JP', '?', 'hendrick'] # for item in ignore: # try: # key_list.remove(item) # except ValueError: # pass if verbose: print('Found %d citations used in the document' % (len(key_list), )) data = key_list, inverse cacher.save(data) key_list, inverse = data # else: # key_list = None unknown_pubkeys = [] debug_author = ub.argval('--debug-author', default=None) # ./fix_bib.py --debug_author=Kappes if verbose: print('Fixing %d/%d bibtex entries' % (len(key_list), len(bibtex_dict))) # debug = True debug = False if debug_author is not None: debug = False known_keys = list(bibtex_dict.keys()) missing_keys = set(key_list) - set(known_keys) if extra_dict is not None: missing_keys.difference_update(set(extra_dict.keys())) if missing_keys: print('The library is missing keys found in tex files %s' % (ub.repr2(missing_keys), )) # Search for possible typos: candidate_typos = {} sedlines = [] for key in missing_keys: candidates = ut.closet_words(key, known_keys, num=3, subset=True) if len(candidates) > 1: top = candidates[0] if ut.edit_distance(key, top) == 1: # "sed -i -e 's/{}/{}/g' *.tex".format(key, top) import os replpaths = ' '.join( [relpath(p, os.getcwd()) for p in inverse[key]]) sedlines.append("sed -i -e 's/{}/{}/g' {}".format( key, top, replpaths)) candidate_typos[key] = candidates print('Cannot find key = %r' % (key, )) print('Did you mean? %r' % (candidates, )) print('Quick fixes') print('\n'.join(sedlines)) # group by file just = max([0] + list(map(len, missing_keys))) missing_fpaths = [inverse[key] for key in missing_keys] for fpath in sorted(set(ub.flatten(missing_fpaths))): # ut.fix_embed_globals() subkeys = [k for k in missing_keys if fpath in inverse[k]] print('') ut.cprint('--- Missing Keys ---', 'blue') ut.cprint('fpath = %r' % (fpath, ), 'blue') ut.cprint('{} | {}'.format('Missing'.ljust(just), 'Did you mean?'), 'blue') for key in subkeys: print('{} | {}'.format(ut.highlight_text(key.ljust(just), 'red'), ' '.join(candidate_typos[key]))) # for key in list(bibtex_dict.keys()): if extra_dict is not None: # Extra database takes precidence over regular key_list = list(ut.unique(key_list + list(extra_dict.keys()))) for k, v in extra_dict.items(): bibtex_dict[k] = v full = ub.argflag('--full') for key in key_list: try: entry = bibtex_dict[key] except KeyError: continue self = BibTexCleaner(key, entry, full=full) if debug_author is not None: debug = debug_author in entry.get('author', '') if debug: ut.cprint(' --- ENTRY ---', 'yellow') print(ub.repr2(entry, nl=1)) entry = self.fix() # self.clip_abstract() # self.shorten_keys() # self.fix_authors() # self.fix_year() # old_pubval = self.fix_pubkey() # if old_pubval: # unknown_pubkeys.append(old_pubval) # self.fix_arxiv() # self.fix_general() # self.fix_paper_types() if debug: print(ub.repr2(entry, nl=1)) ut.cprint(' --- END ENTRY ---', 'yellow') bibtex_dict[key] = entry unwanted_keys = set(bibtex_dict.keys()) - set(key_list) if verbose: print('Removing unwanted %d entries' % (len(unwanted_keys))) ut.delete_dict_keys(bibtex_dict, unwanted_keys) if 0: d1 = bibtex_dict.copy() full = True for key, entry in d1.items(): self = BibTexCleaner(key, entry, full=full) pub = self.publication() if pub is None: print(self.entry['ENTRYTYPE']) old = self.fix_pubkey() x1 = self._pubval() x2 = self.standard_pubval(full=full) # if x2 is not None and len(x2) > 5: # print(ub.repr2(self.entry)) if x1 != x2: print('x2 = %r' % (x2, )) print('x1 = %r' % (x1, )) print(ub.repr2(self.entry)) # if 'CVPR' in self.entry.get('booktitle', ''): # if 'CVPR' != self.entry.get('booktitle', ''): # break if old: print('old = %r' % (old, )) d1[key] = self.entry if full: d1 = bibtex_dict.copy() import numpy as np import pandas as pd df = pd.DataFrame.from_dict(d1, orient='index') paged_items = df[~pd.isnull(df['pub_accro'])] has_pages = ~pd.isnull(paged_items['pages']) print('have pages {} / {}'.format(has_pages.sum(), len(has_pages))) print(ub.repr2(paged_items[~has_pages]['title'].values.tolist())) entrytypes = dict(list(df.groupby('pub_type'))) if False: # entrytypes['misc'] g = entrytypes['online'] g = g[g.columns[~np.all(pd.isnull(g), axis=0)]] entrytypes['book'] entrytypes['thesis'] g = entrytypes['article'] g = entrytypes['incollection'] g = entrytypes['conference'] def lookup_pub(e): if e == 'article': return 'journal', 'journal' elif e == 'incollection': return 'booksection', 'booktitle' elif e == 'conference': return 'conference', 'booktitle' return None, None for e, g in entrytypes.items(): print('e = %r' % (e, )) g = g[g.columns[~np.all(pd.isnull(g), axis=0)]] if 'pub_full' in g.columns: place_title = g['pub_full'].tolist() print(ub.repr2(ub.dict_hist(place_title))) else: print('Unknown publications') if 'report' in entrytypes: g = entrytypes['report'] missing = g[pd.isnull(g['title'])] if len(missing): print('Missing Title') print(ub.repr2(missing[['title', 'author']].values.tolist())) if 'journal' in entrytypes: g = entrytypes['journal'] g = g[g.columns[~np.all(pd.isnull(g), axis=0)]] missing = g[pd.isnull(g['journal'])] if len(missing): print('Missing Journal') print(ub.repr2(missing[['title', 'author']].values.tolist())) if 'conference' in entrytypes: g = entrytypes['conference'] g = g[g.columns[~np.all(pd.isnull(g), axis=0)]] missing = g[pd.isnull(g['booktitle'])] if len(missing): print('Missing Booktitle') print(ub.repr2(missing[['title', 'author']].values.tolist())) if 'incollection' in entrytypes: g = entrytypes['incollection'] g = g[g.columns[~np.all(pd.isnull(g), axis=0)]] missing = g[pd.isnull(g['booktitle'])] if len(missing): print('Missing Booktitle') print(ub.repr2(missing[['title', 'author']].values.tolist())) if 'thesis' in entrytypes: g = entrytypes['thesis'] g = g[g.columns[~np.all(pd.isnull(g), axis=0)]] missing = g[pd.isnull(g['institution'])] if len(missing): print('Missing Institution') print(ub.repr2(missing[['title', 'author']].values.tolist())) # import utool # utool.embed() # Overwrite BibDatabase structure bib_database._entries_dict = bibtex_dict bib_database.entries = list(bibtex_dict.values()) #conftitle_to_types_set_hist = {key: set(val) for key, val in conftitle_to_types_hist.items()} #print(ub.repr2(conftitle_to_types_set_hist)) print('Unknown conference keys:') print(ub.repr2(sorted(unknown_pubkeys))) print('len(unknown_pubkeys) = %r' % (len(unknown_pubkeys), )) writer = BibTexWriter() writer.contents = ['comments', 'entries'] writer.indent = ' ' writer.order_entries_by = ('type', 'author', 'year') new_bibtex_str = bibtexparser.dumps(bib_database, writer) # Need to check #jegou_aggregating_2012 # Fix the Journal Abreviations # References: # https://www.ieee.org/documents/trans_journal_names.pdf # Write out clean bibfile in ascii format clean_bib_fpath = ub.augpath(bib_fpath.replace(' ', '_'), suffix='_clean') if not ub.argflag('--dryrun'): ut.writeto(clean_bib_fpath, new_bibtex_str)
'formatter': 'standard', 'class':'logging.StreamHandler', }, }, 'loggers': { '': { 'handlers': ['default'], 'level': 'ERROR', 'formatter': 'standard', 'propagate': True } } }) writer = BibTexWriter() writer.contents = ['comments', 'entries'] writer.indent = ' ' writer.order_entries_by = ('ENTRYTYPE', 'author', 'year') def create_id(t, year, title): return str(t) + "_" + str(year) + "_" + str(space_to_underscore(title)) def pdf(pdf_files, shared_pdf, bibtex_folder, bibtex_files, gscholar): for pdf in pdf_files: txt = re.sub("\W", " ", gs.convert_pdf_to_txt(pdf)).lower() #Research determined that the cutting of 35 words gives the #highest accuracy words = txt.strip().split()[:35] words = " ".join(words) print words if gscholar == True:
def fix_conference_title_names(clean_text, key_list=None): """ mass bibtex fixes CommandLine: ./fix_bib.py """ # Find citations from the tex documents if key_list is None: key_list = find_used_citations(testdata_fpaths()) key_list = list(set(key_list)) ignore = ['JP', '?'] for item in ignore: try: key_list.remove(item) except ValueError: pass unknown_confkeys = [] conference_keys = [ 'journal', 'booktitle', ] ignore_confkey = [] bib_database = bibtexparser.loads(clean_text) bibtex_dict = bib_database.get_entry_dict() isect = set(ignore_confkey).intersection( set(constants_tex_fixes.CONFERENCE_TITLE_MAPS.keys())) assert len(isect) == 0, repr(isect) #ut.embed() #conftitle_to_types_hist = ut.ddict(list) type_key = 'ENTRYTYPE' debug_author = ut.get_argval('--debug-author', type_=str, default=None) # ./fix_bib.py --debug_author=Kappes for key in bibtex_dict.keys(): entry = bibtex_dict[key] if debug_author is not None: debug = debug_author in entry.get('author', '') else: debug = False if debug: print(' --- ENTRY ---') print(ut.repr3(entry)) #if type_key not in entry: # #entry[type_key] = entry['ENTRYTYPE'] # ut.embed() # Clip abstrat if 'abstract' in entry: entry['abstract'] = ' '.join(entry['abstract'].split(' ')[0:7]) # Remove Keys remove_keys = [ 'note', 'urldate', 'series', 'publisher', 'isbn', 'editor', 'shorttitle', 'copyright', 'language', 'month', # These will be put back in #'number', #'pages', #'volume', ] entry = ut.delete_dict_keys(entry, remove_keys) # Fix conference names confkeys = list(set(entry.keys()).intersection(set(conference_keys))) #entry = ut.delete_dict_keys(entry, ['abstract']) # TODO: FIX THESE IF NEEDBE #if len(confkeys) == 0: # print(ut.dict_str(entry)) # print(entry.keys()) if len(confkeys) == 1: confkey = confkeys[0] old_confval = entry[confkey] # Remove curly braces old_confval = old_confval.replace('{', '').replace('}', '') if old_confval in ignore_confkey: print(ut.dict_str(entry)) continue new_confval_candiates = [] if old_confval.startswith('arXiv'): continue # for conf_title, patterns in constants_tex_fixes.CONFERENCE_TITLE_MAPS.items(): for conf in constants_tex_fixes.CONFERENCES: if conf.matches(old_confval): conf_title = conf.accro() if debug: print('old_confval = %r' % (old_confval, )) print('conf_title = %r' % (conf_title, )) new_confval = conf_title new_confval_candiates.append(new_confval) if len(new_confval_candiates) == 0: new_confval = None elif len(new_confval_candiates) == 1: new_confval = new_confval_candiates[0] else: assert False, 'double match' if new_confval is None: if key in key_list: unknown_confkeys.append(old_confval) #print(old_confval) else: # Overwrite old confval entry[confkey] = new_confval # Record info about types of conferneces true_confval = entry[confkey].replace('{', '').replace('}', '') # FIX ENTRIES THAT SHOULD BE CONFERENCES if true_confval in constants_tex_fixes.CONFERENCE_LIST: if entry[type_key] == 'inproceedings': pass #print(confkey) #print(ut.dict_str(entry)) elif entry[type_key] == 'article': entry['booktitle'] = entry['journal'] del entry['journal'] #print(ut.dict_str(entry)) elif entry[type_key] == 'incollection': pass else: raise AssertionError('UNKNOWN TYPE: %r' % (entry[type_key], )) if 'booktitle' not in entry: print('DOES NOT HAVE CORRECT CONFERENCE KEY') print(ut.dict_str(entry)) assert 'journal' not in entry, 'should not have journal' #print(entry['type']) entry[type_key] = 'inproceedings' # FIX ENTRIES THAT SHOULD BE JOURNALS if true_confval in constants_tex_fixes.JOURNAL_LIST: if entry[type_key] == 'article': pass elif entry[type_key] == 'inproceedings': pass #print(ut.dict_str(entry)) elif entry[type_key] == 'incollection': pass else: raise AssertionError('UNKNOWN TYPE: %r' % (entry['type'], )) if 'journal' not in entry: print('DOES NOT HAVE CORRECT CONFERENCE KEY') print(ut.dict_str(entry)) assert 'booktitle' not in entry, 'should not have booktitle' #print(entry['type']) #entry['type'] = 'article' #conftitle_to_types_hist[true_confval].append(entry['type']) elif len(confkeys) > 1: raise AssertionError('more than one confkey=%r' % (confkeys, )) # Fix Authors if 'author' in entry: authors = six.text_type(entry['author']) for truename, alias_list in constants_tex_fixes.AUTHOR_NAME_MAPS.items( ): pattern = six.text_type( ut.regex_or([ ut.util_regex.whole_word(alias) for alias in alias_list ])) authors = re.sub(pattern, six.text_type(truename), authors, flags=re.UNICODE) entry['author'] = authors """ article = journal inprocedings = converence paper """ #conftitle_to_types_set_hist = {key: set(val) for key, val in conftitle_to_types_hist.items()} #print(ut.dict_str(conftitle_to_types_set_hist)) print(ut.list_str(sorted(unknown_confkeys))) print('len(unknown_confkeys) = %r' % (len(unknown_confkeys), )) writer = BibTexWriter() writer.contents = ['comments', 'entries'] writer.indent = ' ' writer.order_entries_by = ('type', 'author', 'year') new_bibtex_str = bibtexparser.dumps(bib_database, writer) return new_bibtex_str
folder = sys.argv[1] if len(sys.argv) > 1 else "bib" if os.path.exists(folder + '-clean'): print 'cleaning ' + folder + '-clean/' for file in os.listdir(folder + '-clean'): try: if os.path.isfile(folder + '-clean/' + file): os.unlink(folder + '-clean/' + file) except Exception as e: print(e) else: os.makedirs(folder + '-clean') #Writer customization writer = BibTexWriter() writer.contents = ['entries'] writer.indent = ' ' writer.order_entries_by = ('ENTRYTYPE', 'author', 'year') #parser customization, need a new parser for each file #parser = BibTexParser() #parser.common_strings = True #Bib dictionary for months Months = """@STRING{ jan = "jan"} @STRING{ feb = "feb"} @STRING{ mar = "mar"} @STRING{ apr = "apr"} @STRING{ may = "may"} @STRING{ jun = "jun"} @STRING{ jul = "jul"}
folder=sys.argv[1] if len(sys.argv) > 1 else "bib" if os.path.exists(folder+'-clean'): print 'cleaning '+folder+'-clean/' for file in os.listdir(folder+'-clean'): try: if os.path.isfile(folder+'-clean/'+file): os.unlink(folder+'-clean/'+file) except Exception as e: print(e) else: os.makedirs(folder+'-clean') #Writer customization writer = BibTexWriter() writer.contents = ['entries'] writer.indent = ' ' writer.order_entries_by = ('ENTRYTYPE', 'author', 'year') #parser customization, need a new parser for each file #parser = BibTexParser() #parser.common_strings = True #Bib dictionary for months Months = """@STRING{ jan = "jan"} @STRING{ feb = "feb"} @STRING{ mar = "mar"} @STRING{ apr = "apr"} @STRING{ may = "may"} @STRING{ jun = "jun"} @STRING{ jul = "jul"}