def populate_parser(parser): parser.add_argument('config', type=lambda x: is_valid_configfile(parser, x), help="Configuration file path") parser.add_argument('-k', type=int, required=False, help="Number of Topics") parser.add_argument('-p', dest='port', type=int, help="Port Number", default=None) parser.add_argument('--host', default=None, help='Hostname') parser.add_argument('--no-browser', dest='browser', action='store_false') parser.add_argument("-q", "--quiet", action="store_true") parser.add_argument('--fulltext', action='store_true', help='Serve raw corpus files.') parser.add_argument('--bibtex', default=None, type=lambda x: is_valid_filepath(parser, x), help='BibTeX library location') parser.add_argument('--ssl', action='store_true', help="Use SSL (must specify certfile, keyfile, and ca_certs in config)") parser.add_argument('--ssl-certfile', dest='certfile', nargs="?", const='server.pem', default=None, type=lambda x: is_valid_filepath(parser, x), help="SSL certificate file") parser.add_argument('--ssl-keyfile', dest='keyfile', default=None, type=lambda x: is_valid_filepath(parser, x), help="SSL certificate key file") parser.add_argument('--ssl-ca', dest='ca_certs', default=None, type=lambda x: is_valid_filepath(parser, x), help="SSL certificate authority file")
def populate_parser(parser): parser.add_argument('config', type=lambda x: is_valid_filepath(parser, x), help="Configuration file path") parser.add_argument('-k', type=int, required=True, help="Number of Topics") parser.add_argument('-p', dest='port', type=int, help="Port Number", default=None) parser.add_argument('--host', default=None, help='Hostname') parser.add_argument( '--ssl', action='store_true', help="Use SSL (must specify certfile, keyfile, and ca_certs in config)" ) parser.add_argument('--ssl-certfile', dest='certfile', nargs="?", const='server.pem', default=None, type=lambda x: is_valid_filepath(parser, x), help="SSL certificate file") parser.add_argument('--ssl-keyfile', dest='keyfile', default=None, type=lambda x: is_valid_filepath(parser, x), help="SSL certificate key file") parser.add_argument('--ssl-ca', dest='ca_certs', default=None, type=lambda x: is_valid_filepath(parser, x), help="SSL certificate authority file")
def populate_parser(parser): parser.add_argument('config', type=lambda x: is_valid_configfile(parser, x), help="Configuration file path") parser.add_argument('-k', type=int, required=True, help="Number of Topics") parser.add_argument('-p', dest='port', type=int, help="Port Number", default=None) parser.add_argument('--host', default=None, help='Hostname') parser.add_argument('--fulltext', action='store_true', help='Serve raw corpus files.') parser.add_argument('--bibtex', default=None, type=lambda x: is_valid_filepath(parser, x), help='BibTeX library location') parser.add_argument('--ssl', action='store_true', help="Use SSL (must specify certfile, keyfile, and ca_certs in config)") parser.add_argument('--ssl-certfile', dest='certfile', nargs="?", const='server.pem', default=None, type=lambda x: is_valid_filepath(parser, x), help="SSL certificate file") parser.add_argument('--ssl-keyfile', dest='keyfile', default=None, type=lambda x: is_valid_filepath(parser, x), help="SSL certificate key file") parser.add_argument('--ssl-ca', dest='ca_certs', default=None, type=lambda x: is_valid_filepath(parser, x), help="SSL certificate authority file")
def populate_parser(parser): parser.epilog = ('Available language stoplists (use 2-letter code): \n\t' + '\n\t'.join([ '{k} {v}'.format(k=k, v=v.capitalize()) for k, v in sorted(langs.items(), key=lambda x: x[1]) ])) parser.add_argument("config_file", help="Path to Config", type=lambda x: is_valid_filepath(parser, x)) parser.add_argument("--htrc", action="store_true") parser.add_argument("--stopword-file", dest="stopword_file", help="File with custom stopwords") parser.add_argument("--high", type=int, dest="high_filter", help="High frequency word filter", default=None) parser.add_argument("--low", type=int, dest="low_filter", default=None, help="Low frequency word filter [Default: 5]") parser.add_argument("--lang", nargs='+', choices=langs.keys(), help="Languages to stoplist. See options below.", metavar='xx')
def populate_parser(parser): parser.add_argument("corpus_path", help="Path to Corpus", type=lambda x: is_valid_filepath(parser, x)) parser.add_argument("config_file", nargs="?", help="Path to Config [optional]") parser.add_argument( "--name", dest="corpus_print_name", metavar="\"CORPUS NAME\"", help="Corpus name (for web interface) [Default: [corpus_path]]") parser.add_argument("--model-path", dest="model_path", help="Model Path [Default: [corpus_path]/../models]") parser.add_argument( "--tokenizer", default="default", choices=['default', 'simple', 'ltc', 'zh', 'inpho', 'brain']) parser.add_argument("--unidecode", action="store_true", dest='decode', help="Convert unicode characters to ascii.") parser.set_defaults(decode=False) parser.add_argument("--htrc", action="store_true") parser.add_argument("--rebuild", action="store_true") parser.add_argument("-q", "--quiet", action="store_true") parser.set_defaults(stop_freq=0, nltk=False, simple=True, sentences=False)
def populate_parser(parser): parser.add_argument("corpus_path", help="Path to Corpus", type=lambda x: is_valid_filepath(parser, x)) parser.add_argument("--name", dest="corpus_print_name", metavar="\"CORPUS NAME\"", help="Corpus name (for web interface) [Default: [corpus_path]]") parser.add_argument("config_file", nargs="?", help="Path to Config [optional]") parser.add_argument("--model-path", dest="model_path", help="Model Path [Default: [corpus_path]/../models]") group = parser.add_mutually_exclusive_group() group.add_argument("--unicode", action="store_false", dest='decode', help="Store unicode characters. [Default]") group.add_argument("--decode", action="store_true", dest='decode', help="Convert unicode characters to ascii.") parser.set_defaults(decode=False) parser.add_argument("--htrc", action="store_true") parser.add_argument("--rebuild", action="store_true") parser.add_argument("--tokenizer", choices=['zh', 'ltc', 'och', 'inpho', 'default'], default="default") parser.add_argument("--simple", action="store_true", default=True, help="Skip sentence tokenizations [default].") parser.add_argument("--sentences", action="store_true", help="Parse at the sentence level") parser.add_argument("--freq", dest="stop_freq", default=5, type=int, help="Filter words occurring less than freq times [Default: 5])")
def populate_parser(parser): parser.add_argument("config_file", help="Path to Config", type=lambda x: is_valid_filepath(parser, x)) parser.add_argument("--context-type", dest="context_type", help="Level of corpus modeling, prompts if not set") parser.add_argument("-p", "--processes", default=1, type=int, help="Number of CPU cores for training [Default: 1]") parser.add_argument("--seed", default=None, type=int, help="Random seed for topic modeling [Default: None]") parser.add_argument("-k", nargs="+", help="K values to train upon", type=int) parser.add_argument("--iter", type=int, help="Number of training iterations")
def populate_parser(parser): parser.add_argument("path", nargs='+', help="file or folder to parse", type=lambda x: util.is_valid_filepath(parser, x)) parser.add_argument("--tokenizer", choices=['ancient', 'modern'], default="modern") parser.add_argument("-o", '--output', required=True, help="output path")
def populate_parser(parser): parser.add_argument("corpus_path", help="Path to Corpus", type=lambda x: is_valid_filepath(parser, x)) parser.add_argument("--name", dest="corpus_print_name", metavar="\"CORPUS NAME\"", help="Corpus name (for web interface) [Default: [corpus_path]]") parser.add_argument("config_file", nargs="?", help="Path to Config [optional]") parser.add_argument("--model-path", dest="model_path", help="Model Path [Default: [corpus_path]/../models]") parser.add_argument("--htrc", action="store_true") parser.add_argument("--rebuild", action="store_true")
def populate_parser(parser): parser.add_argument("corpus_path", help="Path to Corpus", type=lambda x: is_valid_filepath(parser, x)) parser.add_argument( "--name", dest="corpus_print_name", metavar="\"CORPUS NAME\"", help="Corpus name (for web interface) [Default: [corpus_path]]") parser.add_argument("config_file", nargs="?", help="Path to Config [optional]") parser.add_argument("--model-path", dest="model_path", help="Model Path [Default: [corpus_path]/../models]") group = parser.add_mutually_exclusive_group() group.add_argument("--unicode", action="store_false", dest='decode', help="Store unicode characters. [Default]") group.add_argument("--decode", action="store_true", dest='decode', help="Convert unicode characters to ascii.") parser.set_defaults(decode=False) parser.add_argument("--htrc", action="store_true") parser.add_argument("--rebuild", action="store_true") parser.add_argument("-q", "--quiet", action="store_true") parser.add_argument( "--tokenizer", default="default", choices=['zh', 'ltc', 'och', 'inpho', 'default', 'brain']) parser.add_argument("--simple", action="store_true", default=True, help="Skip sentence tokenizations [default].") parser.add_argument("--sentences", action="store_true", help="Parse at the sentence level") parser.add_argument( "--freq", dest="stop_freq", default=5, type=int, help="Filter words occurring less than freq times [Default: 5])")
def populate_parser(parser): parser.epilog = ('Available language stoplists (use 2-letter code): \n\t' + '\n\t'.join(['{k} {v}'.format(k=k, v=v.capitalize()) for k,v in sorted(langs.items(), key=lambda x: x[1])])) parser.add_argument("config_file", help="Path to Config", type=lambda x: is_valid_filepath(parser, x)) parser.add_argument("--htrc", action="store_true") parser.add_argument("--stopword-file", dest="stopword_file", help="File with custom stopwords") parser.add_argument("--high", type=int, dest="high_filter", help="High frequency word filter", default=None) parser.add_argument("--low", type=int, dest="low_filter", default=None, help="Low frequency word filter [Default: 5]") parser.add_argument("--lang", nargs='+', choices=langs.keys(), help="Languages to stoplist. See options below.", metavar='xx')
def populate_parser(parser): parser.add_argument("corpus_path", help="Path to Corpus", type=lambda x: is_valid_filepath(parser, x)) parser.add_argument( "--name", dest="corpus_print_name", metavar="\"CORPUS NAME\"", help="Corpus name (for web interface) [Default: [corpus_path]]") parser.add_argument("config_file", nargs="?", help="Path to Config [optional]") parser.add_argument("--model-path", dest="model_path", help="Model Path [Default: [corpus_path]/../models]") parser.add_argument("--htrc", action="store_true") parser.add_argument("--rebuild", action="store_true") parser.add_argument("--tokenizer", choices=['inpho', 'default'], default="default")
def populate_parser(parser): parser.add_argument("config_file", help="Path to Config", type=lambda x: is_valid_filepath(parser, x)) parser.add_argument("--context-type", dest='context_type', help="Level of corpus modeling, prompts if not set") parser.add_argument("-p", "--processes", default=1, type=int, help="Number of CPU cores for training [Default: 1]") parser.add_argument("--seed", default=None, type=int, help="Random seed for topic modeling [Default: None]") parser.add_argument("-k", nargs='+', help="K values to train upon", type=int) parser.add_argument('--iter', type=int, help="Number of training iterations")
def populate_parser(parser): parser.add_argument("corpus_path", help="Path to Corpus", type=lambda x: is_valid_filepath(parser, x)) parser.add_argument("config_file", nargs="?", help="Path to Config [optional]") parser.add_argument("--name", dest="corpus_print_name", metavar="\"CORPUS NAME\"", help="Corpus name (for web interface) [Default: [corpus_path]]") parser.add_argument("--model-path", dest="model_path", help="Model Path [Default: [corpus_path]/../models]") parser.add_argument("--tokenizer", default="default", choices=['default', 'simple', 'ltc', 'zh', 'inpho', 'brain']) parser.add_argument("--unidecode", action="store_true", dest='decode', help="Convert unicode characters to ascii.") parser.set_defaults(decode=False) parser.add_argument("--htrc", action="store_true") parser.add_argument("--rebuild", action="store_true") parser.add_argument("-q", "--quiet", action="store_true") parser.set_defaults(stop_freq=0, nltk=False, simple=True, sentences=False)
def populate_parser(parser): parser.add_argument("config_file", help="Path to Config File", type=lambda x: is_valid_filepath(parser, x)) parser.add_argument('--no-launch', dest='launch', action='store_false')
def populate_parser(parser): parser.add_argument('config_file', help="Configuration file path", type=lambda x: is_valid_filepath(parser, x)) parser.add_argument('--no-browser', dest='browser', action='store_false')
futures.append( executor.submit(convert_and_write, pdffile, output_dir, True, True)) if verbose == 1: pbar = ProgressBar(widgets=[Percentage(), Bar()], maxval=len(futures)).start() for file_n, f in enumerate( concurrent.futures.as_completed(futures)): pbar.update(file_n) pbar.finish() if __name__ == '__main__': from argparse import ArgumentParser parser = ArgumentParser() parser.add_argument("path", nargs='+', help="PDF file or folder to parse", type=lambda x: util.is_valid_filepath(parser, x)) parser.add_argument("-o", '--output', help="output path [default: same as filename]") args = parser.parse_args() main(args.path, args.output)
try: futures.append(executor.submit(convert_and_write, pdffile, output_dir, True)) except (PDFException, PSException): print "Skipping {0} due to PDF Exception".format(pdffile) else: futures.append(executor.submit(convert_and_write, pdffile, output_dir, True, True)) if verbose == 1: pbar = ProgressBar(widgets=[Percentage(), Bar()], maxval=len(futures)).start() for file_n,f in enumerate(concurrent.futures.as_completed(futures)): pbar.update(file_n) pbar.finish() if __name__ == '__main__': from argparse import ArgumentParser parser = ArgumentParser() parser.add_argument("path", nargs='+', help="PDF file or folder to parse", type=lambda x: util.is_valid_filepath(parser, x)) parser.add_argument("-o", '--output', help="output path [default: same as filename]") args = parser.parse_args() main(args.path, args.output)