示例#1
0
def main():
    """Main function of the script.

    Loads the bib file, does the chcecking on it and prints out
    sorted and formated database.
    """
    parser = argparse.ArgumentParser()
    parser.add_argument("--input",
                        type=argparse.FileType('r'),
                        default=sys.stdin,
                        help="Input file, default is stdin.")
    parser.add_argument("--output",
                        type=argparse.FileType('w'),
                        default=sys.stdout,
                        help="Optional output file.")
    parser.add_argument("--try-fix",
                        default=False,
                        action="store_true",
                        help="Flag to search information to fix the dtabase.")
    parser.add_argument("--anthologies",
                        type=str,
                        nargs='+',
                        help="List of BibTeX files with know papers.")
    args = parser.parse_args()

    if args.anthologies is not None:
        load_anthologies(args.anthologies)
    bib_database = bibtexparser.load(args.input, get_bibparser())
    cache_journal_issn(bib_database)
    authors, journals, booktitles = check_database(bib_database, args.try_fix)

    look_for_misspellings(authors, 'Authors')
    look_for_misspellings(journals, 'Journals')
    look_for_misspellings(booktitles,
                          'Booktitles (proceedings)',
                          threshold=0.9)

    writer = BibTexWriter()
    writer.indent = '    '
    writer.order_by = ['author', 'year', 'title']
    writer.display_order = ['author', 'title', 'booktitle', 'journal']
    writer.align_values = True
    args.output.write(writer.write(bib_database))
示例#2
0
def main():
    print("Reading from stdin ...", end="", file=sys.stderr)
    input_records = sys.stdin.read().split("\n\n")
    print("done.", file=sys.stderr)

    bib_parser = BibTexParser(ignore_nonstandard_types=True,
                              homogenize_fields=True,
                              common_strings=True)

    writer = BibTexWriter()
    writer.indent = '    '
    writer.order_by = ['author', 'year', 'title']
    writer.display_order = ['author', 'title', 'booktitle', 'journal']
    writer.align_values = True

    records = 0
    skipped = 0
    for record in input_records:
        if not record:
            continue
        try:
            parsed = bibtexparser.loads(record, bib_parser)
            records += 1
            if records % 1000 == 0:
                print("Processed {} records.".format(records), file=sys.stderr)
        except (pyparsing.ParseException,
                bibtexparser.bibdatabase.UndefinedString):
            skipped += 1

    for item in parsed.get_entry_list():
        if "abstract" in item:
            del item["abstract"]

    parsed.comments = []
    parsed.entries = [e for e in parsed.entries if e["ENTRYTYPE"] != "book"]
    parsed.entries = list(parsed.get_entry_dict().values())

    print(writer.write(parsed))
    print("Finished. {} records kept, {} skipped.".format(records, skipped),
          file=sys.stderr)