def make_ninja_deps(rules_filename, out=sys.stdout): """ Output a complete Ninja file describing how to build the wordfreq data. """ print(HEADER, file=out) # Copy in the rules section with open(rules_filename, encoding='utf-8') as rulesfile: print(rulesfile.read(), file=out) lines = [] # The first dependency is to make sure the build file is up to date. add_dep(lines, 'build_deps', 'rules.ninja', 'build.ninja', extra='wordfreq_builder/ninja.py') lines.extend(itertools.chain( twitter_deps( data_filename('raw-input/twitter/all-2014.txt'), slice_prefix=data_filename('slices/twitter/tweets-2014'), combined_prefix=data_filename('generated/twitter/tweets-2014'), slices=40, languages=CONFIG['sources']['twitter'] ), wikipedia_deps( data_filename('raw-input/wikipedia'), CONFIG['sources']['wikipedia'] ), google_books_deps( data_filename('raw-input/google-books') ), leeds_deps( data_filename('source-lists/leeds'), CONFIG['sources']['leeds'] ), opensubtitles_deps( data_filename('source-lists/opensubtitles'), CONFIG['sources']['opensubtitles'] ), subtlex_en_deps( data_filename('source-lists/subtlex'), CONFIG['sources']['subtlex-en'] ), subtlex_other_deps( data_filename('source-lists/subtlex'), CONFIG['sources']['subtlex-other'] ), jieba_deps( data_filename('source-lists/jieba'), CONFIG['sources']['jieba'] ), combine_lists(all_languages()) )) print('\n'.join(lines), file=out)
def make_ninja_deps(rules_filename, out=sys.stdout): """ Output a complete Ninja file describing how to build the wordfreq data. """ print(HEADER, file=out) # Copy in the rules section with open(rules_filename, encoding="utf-8") as rulesfile: print(rulesfile.read(), file=out) lines = [] # The first dependency is to make sure the build file is up to date. add_dep(lines, "build_deps", "rules.ninja", "build.ninja", extra="wordfreq_builder/ninja.py") lines.extend( twitter_deps( data_filename("raw-input/twitter/all-2014.txt"), slice_prefix=data_filename("slices/twitter/tweets-2014"), combined_prefix=data_filename("generated/twitter/tweets-2014"), slices=40, languages=CONFIG["sources"]["twitter"], ) ) lines.extend(wikipedia_deps(data_filename("raw-input/wikipedia"), CONFIG["sources"]["wikipedia"])) lines.extend(google_books_deps(data_filename("raw-input/google-books"))) lines.extend(leeds_deps(data_filename("source-lists/leeds"), CONFIG["sources"]["leeds"])) lines.extend(opensubtitles_deps(data_filename("source-lists/opensubtitles"), CONFIG["sources"]["opensubtitles"])) lines.extend(combine_lists(all_languages())) print("\n".join(lines), file=out)
def make_ninja_deps(rules_filename, out=sys.stdout): """ Output a complete Ninja file describing how to build the wordfreq data. """ print(HEADER, file=out) # Copy in the rules section with open(rules_filename, encoding='utf-8') as rulesfile: print(rulesfile.read(), file=out) lines = [] # The first dependency is to make sure the build file is up to date. add_dep(lines, 'build_deps', 'rules.ninja', 'build.ninja', extra='wordfreq_builder/ninja.py') lines.extend( twitter_deps( data_filename('raw-input/twitter/all-2014.txt'), slice_prefix=data_filename('slices/twitter/tweets-2014'), combined_prefix=data_filename('generated/twitter/tweets-2014'), slices=40, languages=CONFIG['sources']['twitter'])) lines.extend( wikipedia_deps(data_filename('raw-input/wikipedia'), CONFIG['sources']['wikipedia'])) lines.extend(google_books_deps(data_filename('raw-input/google-books'))) lines.extend( leeds_deps(data_filename('source-lists/leeds'), CONFIG['sources']['leeds'])) lines.extend( opensubtitles_deps(data_filename('source-lists/opensubtitles'), CONFIG['sources']['opensubtitles'])) lines.extend(combine_lists(all_languages())) print('\n'.join(lines), file=out)
from wordfreq_builder.config import (CONFIG, data_filename, wordlist_filename, all_languages, source_names) import sys import pathlib HEADER = """# This file is automatically generated. Do not edit it. # You can regenerate it using the 'wordfreq-build-deps' command. """ TMPDIR = data_filename('tmp') def add_dep(lines, rule, input, output, extra=None, params=None): if isinstance(output, list): output = ' '.join(output) if isinstance(input, list): input = ' '.join(input) if extra: if isinstance(extra, list): extra = ' '.join(extra) extrastr = ' | ' + extra else: extrastr = '' build_rule = "build {output}: {rule} {input}{extra}".format(output=output, rule=rule, input=input, extra=extrastr) lines.append(build_rule) if params: for key, val in params.items(): lines.append(" {key} = {val}".format(key=key, val=val)) lines.append("")
from wordfreq_builder.config import ( CONFIG, data_filename, wordlist_filename, all_languages, source_names ) import sys import pathlib import itertools HEADER = """# This file is automatically generated. Do not edit it. # You can change its behavior by editing wordfreq_builder/ninja.py, # and regenerate it by running 'make'. """ TMPDIR = data_filename('tmp') def add_dep(lines, rule, input, output, extra=None, params=None): if isinstance(output, list): output = ' '.join(output) if isinstance(input, list): input = ' '.join(input) if extra: if isinstance(extra, list): extra = ' '.join(extra) extrastr = ' | ' + extra else: extrastr = '' build_rule = "build {output}: {rule} {input}{extra}".format( output=output, rule=rule, input=input, extra=extrastr ) lines.append(build_rule) if params: for key, val in params.items():
from wordfreq_builder.config import CONFIG, data_filename, wordlist_filename, all_languages, source_names import sys import pathlib HEADER = """# This file is automatically generated. Do not edit it. # You can regenerate it using the 'wordfreq-build-deps' command. """ TMPDIR = data_filename("tmp") def add_dep(lines, rule, input, output, extra=None, params=None): if isinstance(output, list): output = " ".join(output) if isinstance(input, list): input = " ".join(input) if extra: if isinstance(extra, list): extra = " ".join(extra) extrastr = " | " + extra else: extrastr = "" build_rule = "build {output}: {rule} {input}{extra}".format(output=output, rule=rule, input=input, extra=extrastr) lines.append(build_rule) if params: for key, val in params.items(): lines.append(" {key} = {val}".format(key=key, val=val)) lines.append("") def make_ninja_deps(rules_filename, out=sys.stdout): """