def format_folder(folder, use_backup, context=None): """ Pretty print bibtex file in the given folder. This function looks for a file named 'queried.bib' in the given folder, and use it as an input to pretty print a file called 'biblio.bib'. Args: folder (str): absolute or relative path to the folder to process. use_backup (bool): whether to backup previous files before writing. context (set): set of existing bibtex keys in the current context. Returns: Nothing, but writes the results in a file called 'biblio.bib'. """ # Create database db = utils.read_bib_file(os.path.join(folder, '.queried.bib'), homogenize=True) utils.guess_manual_files(folder, db, update_queried_db=True) if context is None: context = set() # Generate bibkeys for entry in db.entries: entry['ID'] = nomenclature.gen_bibkey(entry, context) # Write output bibtex file output_bib_path = os.path.join(folder, 'biblio.bib') output_bib_str = utils.write_bib(db, order=True) utils.write_with_backup(output_bib_path, output_bib_str, use_backup)
def query_crossref_folder(folder, use_backup): """ Query metadata information for unmatched pdf files in the given folder. This function only queries Crossref. Args: folder (str): absolute or relative path to the folder to process. use_backup (bool): whether to backup previous files before writing. Returns: Nothing, but writes the queried databases in bibtex format in the given folder (and backup previous database if it differed). """ # Create database db = utils.read_bib_file(os.path.join(folder, '.queried.bib')) files = utils.guess_manual_files(folder, db, update_queried_db=False) utils.add_skip_files(folder, files) json_entries = [] rejected = [] # For each pdf in the folder import pdb pdb.set_trace() for path in utils.get_pdf_list(folder): file = os.path.basename(path) parsed = nomenclature.parse_filename(file) if parsed is None or file in files: continue print('Q: ' + os.path.basename(file)) authors, title = parsed # Crossref rbib, rjson, score = providers.crossref_query(authors, title) if score >= config.crossref_accept_threshold: # Append filename and store entry rbib['file'] = utils.encode_filename_field(file) json_entries.append(rjson) db.entries.append(rbib) else: rejected.append(os.path.basename(file)) # Store results bib_path = os.path.join(folder, '.queried.bib') utils.write_with_backup(bib_path, utils.write_bib(db, order=False), use_backup) json_path = os.path.join(folder, '.queried.json') json_str = json.dumps(json_entries, sort_keys=True, indent=4, separators=(',', ': ')) utils.write_with_backup(json_path, json_str, use_backup) rejected_path = os.path.join(folder, '.rejected.txt') if len(rejected) > 0: utils.write_with_backup(rejected_path, '\n'.join(rejected), use_backup)
def query_crossref_folder(folder, use_backup): """ Query metadata information for unmatched pdf files in the given folder. This function only queries Crossref. Args: folder (str): absolute or relative path to the folder to process. use_backup (bool): whether to backup previous files before writing. Returns: Nothing, but writes the queried databases in bibtex format in the given folder (and backup previous database if it differed). """ # Create database db = utils.read_bib_file(os.path.join(folder, '.queried.bib')) files = utils.guess_manual_files(folder, db, update_queried_db=False) utils.add_skip_files(folder, files) json_entries = [] rejected = [] # For each pdf in the folder for path in utils.get_pdf_list(folder): file = os.path.basename(path) parsed = nomenclature.parse_filename(file) if parsed is None or file in files: continue print('Q: ' + os.path.basename(file)) authors, title = parsed # Crossref rbib, rjson, score = providers.crossref_query(authors, title) if score >= config.crossref_accept_threshold: # Append filename and store entry rbib['file'] = utils.encode_filename_field(file) json_entries.append(rjson) db.entries.append(rbib) else: rejected.append(os.path.basename(file)) # Store results bib_path = os.path.join(folder, '.queried.bib') utils.write_with_backup(bib_path, utils.write_bib(db, order=False), use_backup) json_path = os.path.join(folder, '.queried.json') json_str = json.dumps(json_entries, sort_keys=True, indent=4, separators=(',', ': ')) utils.write_with_backup(json_path, json_str, use_backup) rejected_path = os.path.join(folder, '.rejected.txt') if len(rejected) > 0: utils.write_with_backup(rejected_path, '\n'.join(rejected), use_backup)
def query_google_folder(folder, use_backup): """ Query metadata information for unmatched pdf files in the given folder. This function only queries Google Scholar. Args: folder (str): absolute or relative path to the folder to process. use_backup (bool): whether to backup previous files before writing. Returns: Nothing, but writes the queried databases in bibtex format in the given folder (and backup previous database if it differed). """ # Create database db = utils.read_bib_file(os.path.join(folder, '.queried.bib')) files = utils.guess_manual_files(folder, db, update_queried_db=False) utils.add_skip_files(folder, files) for path in utils.get_pdf_list(folder): file = os.path.basename(path) parsed = nomenclature.parse_filename(file) if parsed is None or file in files: continue print('Q: ' + os.path.basename(file)) authors, title = parsed # Google Scholar rbib = providers.scholarly_query(authors, title) if rbib is None: continue # Append filename and store entry rbib['file'] = utils.encode_filename_field(file) db.entries.append(rbib) # Store results bib_path = os.path.join(folder, '.queried.bib') utils.write_with_backup(bib_path, utils.write_bib(db, order=False), use_backup)