def metaDictToBib(jobid, metadict, omit_keys, path_prefix): """Export meta data to bibtex format Args: jobid (int): id of job. metadict (DocMeta): meta dict of a doc. alt_dict (dict): dict for key changes. omit_keys (list): keys to omit in the converted dict. path_prefix (str): folder path to prepend to attachment file paths. Returns: rec (int): 0 if successful, 1 otherwise. jobid (int): the input jobid as it is. dbtext (str): formated bibtex entry, '' if <rec>==1. docid (int): id of the processed document. """ try: alt_dict = INV_ALT_KEYS ord_dict = toOrdinaryDict(metadict, alt_dict, omit_keys, path_prefix) db = BibDatabase() db.entries = [ ord_dict, ] writer = BibTexWriter() writer.indent = ' ' writer.comma_first = False dbtext = writer.write(db) return 0, jobid, dbtext, metadict['id'] except Exception: LOGGER.exception('Failed to write to bibtex') return 1, jobid, '', metadict['id']
def save(self, bibfile=-1): """ save the biblist with : - the original filename without any arg or - the given file name if not empty """ if bibfile == -1: bibfile = self.name db = BibDatabase() for item in self: db.entries.append(item) writer = BibTexWriter() # this class is needed to prepare format writer.indent = ' ' # indent entries with 4 spaces instead of one writer.comma_first = False # place the comma at the beginning of the line writer.align_values = True # with a nice indentation print('') print(os.path.join(os.path.expandvars('$PYBLIO_BIB'), bibfile)) print('') with open(os.path.join(os.path.expandvars('$PYBLIO_BIB'), bibfile), 'w') as bf: bf.write('\n') bf.write(writer.write(db)) bf.write('\n')
def convert_to_bib(content, save_fpath): papers = parse_api_response(content) db = BibDatabase() db.entries = papers writer = BibTexWriter() writer.indent = " " writer.comma_first = True with open(save_fpath, "w+") as bibfile: bibfile.write(writer.write(db))
def test_comma_first(self): with io.open(_data_path('book.bib'), 'r') as bibfile: bib = BibTexParser(bibfile.read()) with io.open(_data_path('book_comma_first.bib'), 'r') as bibfile: expected = bibfile.read() writer = BibTexWriter() writer.indent = ' ' writer.comma_first = True result = writer.write(bib) self.maxDiff = None self.assertEqual(expected, result)
def test_comma_first_and_trailing_comma(self): with io.open(_data_path('article.bib'), 'r') as bibfile: bib = BibTexParser(bibfile.read()) with io.open(_data_path('article_comma_first_and_trailing_comma_output.bib'), 'r') as bibfile: expected = bibfile.read() writer = BibTexWriter() writer.add_trailing_comma = True writer.comma_first = True result = writer.write(bib) self.maxDiff = None self.assertEqual(expected, result)
def test_comma_first_and_trailing_comma(self): with io.open(_data_path('article.bib'), 'r') as bibfile: bib = BibTexParser(bibfile.read()) with io.open( _data_path( 'article_comma_first_and_trailing_comma_output.bib'), 'r') as bibfile: expected = bibfile.read() writer = BibTexWriter() writer.add_trailing_comma = True writer.comma_first = True result = writer.write(bib) self.maxDiff = None self.assertEqual(expected, result)
def write_res(self, passed_entries, passed_name, failed_entries, failed_name): db = BibDatabase() db.entries = passed_entries writer = BibTexWriter() writer.indent = ' ' writer.comma_first = False with open("results/" + passed_name, 'w') as bibfile: bibfile.write(writer.write(db)) with open("results/" + failed_name, 'w') as f: json.dump(failed_entries, f, indent=4) print("Writing data to filesystem!") print(" -successful results can be found in: results/" + passed_name) print(" -failed results can be found in: results/" + failed_name)
None dup(entries, 'ID') dup(entries, 'title') entries = list(filter(lambda x: not hasattr(x, 'ID'), entries)) entries.sort(key=lambda x: x['year'], reverse=True) entries.sort(key=lambda x: x['ID'], reverse=True) db.entries = entries if len(keys) != len(set(keys)): print('\u001b[31m' + 'Duplicate Keys' + '\u001b[0m') for i in set(keys): if keys.count(i) > 1: print('-- Duplicate ', i, '--') if len(set(db.entries_dict.keys())) != len(set(keys)): print('\u001b[31m' + 'Unparsed Keys' + '\u001b[0m') for i in set(keys) - set(db.entries_dict.keys()): print('-- MISSING: ', i, '--') writer = BibTexWriter() writer.indent = ' ' writer.comma_first = False # place the comma at the beginning of the line with open('bibtex.bib', 'w') as bibfile: bibfile.write(writer.write(db)) print('bibtex written')
def toString(e): writer = BibTexWriter() writer.indent = ' ' # indent entries with 4 spaces instead of one writer.comma_first = True # place the comma at the beginning of the line return writer._entry_to_bibtex(e)
def formatText(self): if self.BibtexfilePath != '': self.openfile() else: self.readcontent() m = self.getMap() m['IEEE Global Communications Conference'] = m['IEEE Global Communications Conference, incorporating the Global Internet Symposium'] del m['IEEE Global Communications Conference, incorporating the Global Internet Symposium'] print m length = 0 nb = {} for bibtex in self.allbibtex: for key in bibtex.keys(): if len(key) > length and key != 'ENTRYTYPE': length = len(key) for k, v in bibtex.items(): if k == 'ENTRYTYPE' or k == 'ID': nb[k] = v continue elif k == 'ID': nb[k] = v continue elif k == 'doi' or k == 'ISSN' or k == 'keywords': continue elif v == '': continue elif 'url' in k: continue nk = k + (length - len(k)) * ' ' if 'booktitle' in nk: if '(' in v: v1 = v.split('(')[1].split(')')[0] nb[nk] = 'Proc. of ' + v1 continue flag = 0 # 未更改booktitle to_remove = "~`!@#$%^&*(){}[];':<>|-=_+" table = {ord(char): None for char in to_remove} clean_v = v.translate(table) #clean_v = v.translate(string.punctuation) #print clean_v for kk, vv in m.items(): if kk in clean_v: nb[nk] = 'Proc. of ' + vv[0] publish = 'publish' + (length - 7) * ' ' nb[publish] = vv[1] flag = 1 break if flag == 0: nb[nk] = v print v continue elif nk.strip() == 'title' and 'booktitle' not in nk: self.tilte = v nv = v.split(' ') for i in range(len(nv)): # 标题除介词和冠词外,首字母大写 if nv[i] in self.prep or nv[i] in self.artie: continue # 首字母大写 else: if 97 <= ord(nv[i][0]) <= 122: nv[i] = chr(ord(nv[i][0])-32)+nv[i][1:] v = ' '.join(nv) nb[nk] = '{' + v + '}' continue elif 'pages' in nk: if '--' in v: nb[nk] = v continue nb[nk] = v.replace('-', '--') continue elif 'author' in nk: if '\n' in v: nb[nk] = v.replace('\n', ' ') continue # 其他不做改变 nb[nk] = v db = BibDatabase() db.entries = [nb] writer = BibTexWriter() writer.indent = '\t' # indent entries with 4 spaces instead of one writer.comma_first = False # place the comma at the beginning of the line with open(self.tilte+'.bib', 'wb') as bibfile: bibfile.write(writer.write(db))
dbtext = writer.write(db) return 0, jobid, dbtext, metadict['id'] except Exception: LOGGER.exception('Failed to write to bibtex') return 1, jobid, '', metadict['id'] if __name__ == '__main__': aa = readBibFile('test.bib') pprint(aa[-1]) # test export entries = [] for eii in aa: dii = toOrdinaryDict(eii, INV_ALT_KEYS, OMIT_KEYS, '/home/') entries.append(dii) db = BibDatabase() db.entries = entries writer = BibTexWriter() writer.indent = ' ' writer.comma_first = False with open('testexport.bib', 'w') as fout: dbtext = writer.write(db) print('dbtext', dbtext) fout.write(dbtext)
def formatText(self): if self.BibtexfilePath != '': self.openfile() else: self.readcontent() m = self.getMap() m['IEEE Global Communications Conference'] = m[ 'IEEE Global Communications Conference, incorporating the Global Internet Symposium'] del m[ 'IEEE Global Communications Conference, incorporating the Global Internet Symposium'] print m length = 0 nb = {} for bibtex in self.allbibtex: for key in bibtex.keys(): if len(key) > length and key != 'ENTRYTYPE': length = len(key) for k, v in bibtex.items(): if k == 'ENTRYTYPE' or k == 'ID': nb[k] = v continue elif k == 'ID': nb[k] = v continue elif k == 'doi' or k == 'ISSN' or k == 'keywords': continue elif v == '': continue elif 'url' in k: continue nk = k + (length - len(k)) * ' ' if 'booktitle' in nk: if '(' in v: v1 = v.split('(')[1].split(')')[0] nb[nk] = 'Proc. of ' + v1 continue flag = 0 # 未更改booktitle to_remove = "~`!@#$%^&*(){}[];':<>|-=_+" table = {ord(char): None for char in to_remove} clean_v = v.translate(table) #clean_v = v.translate(string.punctuation) #print clean_v for kk, vv in m.items(): if kk in clean_v: nb[nk] = 'Proc. of ' + vv[0] publish = 'publish' + (length - 7) * ' ' nb[publish] = vv[1] flag = 1 break if flag == 0: nb[nk] = v print v continue elif nk.strip() == 'title' and 'booktitle' not in nk: self.tilte = v nv = v.split(' ') for i in range(len(nv)): # 标题除介词和冠词外,首字母大写 if nv[i] in self.prep or nv[i] in self.artie: continue # 首字母大写 else: if 97 <= ord(nv[i][0]) <= 122: nv[i] = chr(ord(nv[i][0]) - 32) + nv[i][1:] v = ' '.join(nv) nb[nk] = '{' + v + '}' continue elif 'pages' in nk: if '--' in v: nb[nk] = v continue nb[nk] = v.replace('-', '--') continue elif 'author' in nk: if '\n' in v: nb[nk] = v.replace('\n', ' ') continue # 其他不做改变 nb[nk] = v db = BibDatabase() db.entries = [nb] writer = BibTexWriter() writer.indent = '\t' # indent entries with 4 spaces instead of one writer.comma_first = False # place the comma at the beginning of the line with open(self.tilte + '.bib', 'wb') as bibfile: bibfile.write(writer.write(db))