示例#1
0
def populate_parser(parser):
    parser.add_argument('config', type=lambda x: is_valid_configfile(parser, x),
                        help="Configuration file path")
    parser.add_argument('-k', type=int, required=False,
                        help="Number of Topics")
    parser.add_argument('-p', dest='port', type=int,
                        help="Port Number", default=None)
    parser.add_argument('--host', default=None, help='Hostname')
    parser.add_argument('--no-browser', dest='browser', action='store_false')
    parser.add_argument("-q", "--quiet", action="store_true")
    parser.add_argument('--fulltext', action='store_true',
                        help='Serve raw corpus files.')
    parser.add_argument('--bibtex', default=None,
                        type=lambda x: is_valid_filepath(parser, x),
                        help='BibTeX library location')
    parser.add_argument('--ssl', action='store_true',
                        help="Use SSL (must specify certfile, keyfile, and ca_certs in config)")
    parser.add_argument('--ssl-certfile', dest='certfile', nargs="?",
                        const='server.pem', default=None,
                        type=lambda x: is_valid_filepath(parser, x),
                        help="SSL certificate file")
    parser.add_argument('--ssl-keyfile', dest='keyfile', default=None,
                        type=lambda x: is_valid_filepath(parser, x),
                        help="SSL certificate key file")
    parser.add_argument('--ssl-ca', dest='ca_certs', default=None,
                        type=lambda x: is_valid_filepath(parser, x),
                        help="SSL certificate authority file")
示例#2
0
def populate_parser(parser):
    parser.add_argument('config',
                        type=lambda x: is_valid_filepath(parser, x),
                        help="Configuration file path")
    parser.add_argument('-k', type=int, required=True, help="Number of Topics")
    parser.add_argument('-p',
                        dest='port',
                        type=int,
                        help="Port Number",
                        default=None)
    parser.add_argument('--host', default=None, help='Hostname')
    parser.add_argument(
        '--ssl',
        action='store_true',
        help="Use SSL (must specify certfile, keyfile, and ca_certs in config)"
    )
    parser.add_argument('--ssl-certfile',
                        dest='certfile',
                        nargs="?",
                        const='server.pem',
                        default=None,
                        type=lambda x: is_valid_filepath(parser, x),
                        help="SSL certificate file")
    parser.add_argument('--ssl-keyfile',
                        dest='keyfile',
                        default=None,
                        type=lambda x: is_valid_filepath(parser, x),
                        help="SSL certificate key file")
    parser.add_argument('--ssl-ca',
                        dest='ca_certs',
                        default=None,
                        type=lambda x: is_valid_filepath(parser, x),
                        help="SSL certificate authority file")
示例#3
0
def populate_parser(parser):
    parser.add_argument('config', type=lambda x: is_valid_configfile(parser, x),
        help="Configuration file path")
    parser.add_argument('-k', type=int, required=True,
        help="Number of Topics")
    parser.add_argument('-p', dest='port', type=int, 
        help="Port Number", default=None)
    parser.add_argument('--host', default=None, help='Hostname')
    parser.add_argument('--fulltext', action='store_true', 
        help='Serve raw corpus files.')
    parser.add_argument('--bibtex', default=None, 
        type=lambda x: is_valid_filepath(parser, x),
        help='BibTeX library location')
    parser.add_argument('--ssl', action='store_true',
        help="Use SSL (must specify certfile, keyfile, and ca_certs in config)")
    parser.add_argument('--ssl-certfile', dest='certfile', nargs="?",
        const='server.pem', default=None,
        type=lambda x: is_valid_filepath(parser, x),
        help="SSL certificate file")
    parser.add_argument('--ssl-keyfile', dest='keyfile', default=None,
        type=lambda x: is_valid_filepath(parser, x),
        help="SSL certificate key file")
    parser.add_argument('--ssl-ca', dest='ca_certs', default=None,
        type=lambda x: is_valid_filepath(parser, x),
        help="SSL certificate authority file")
示例#4
0
def populate_parser(parser):
    parser.epilog = ('Available language stoplists (use 2-letter code): \n\t' +
                     '\n\t'.join([
                         '{k}    {v}'.format(k=k, v=v.capitalize())
                         for k, v in sorted(langs.items(), key=lambda x: x[1])
                     ]))
    parser.add_argument("config_file",
                        help="Path to Config",
                        type=lambda x: is_valid_filepath(parser, x))
    parser.add_argument("--htrc", action="store_true")
    parser.add_argument("--stopword-file",
                        dest="stopword_file",
                        help="File with custom stopwords")
    parser.add_argument("--high",
                        type=int,
                        dest="high_filter",
                        help="High frequency word filter",
                        default=None)
    parser.add_argument("--low",
                        type=int,
                        dest="low_filter",
                        default=None,
                        help="Low frequency word filter [Default: 5]")
    parser.add_argument("--lang",
                        nargs='+',
                        choices=langs.keys(),
                        help="Languages to stoplist. See options below.",
                        metavar='xx')
示例#5
0
def populate_parser(parser):
    parser.add_argument("corpus_path",
                        help="Path to Corpus",
                        type=lambda x: is_valid_filepath(parser, x))
    parser.add_argument("config_file",
                        nargs="?",
                        help="Path to Config [optional]")

    parser.add_argument(
        "--name",
        dest="corpus_print_name",
        metavar="\"CORPUS NAME\"",
        help="Corpus name (for web interface) [Default: [corpus_path]]")
    parser.add_argument("--model-path",
                        dest="model_path",
                        help="Model Path [Default: [corpus_path]/../models]")

    parser.add_argument(
        "--tokenizer",
        default="default",
        choices=['default', 'simple', 'ltc', 'zh', 'inpho', 'brain'])

    parser.add_argument("--unidecode",
                        action="store_true",
                        dest='decode',
                        help="Convert unicode characters to ascii.")
    parser.set_defaults(decode=False)

    parser.add_argument("--htrc", action="store_true")
    parser.add_argument("--rebuild", action="store_true")
    parser.add_argument("-q", "--quiet", action="store_true")

    parser.set_defaults(stop_freq=0, nltk=False, simple=True, sentences=False)
示例#6
0
def populate_parser(parser):
    parser.add_argument("corpus_path", help="Path to Corpus",
        type=lambda x: is_valid_filepath(parser, x))
    parser.add_argument("--name", dest="corpus_print_name", 
        metavar="\"CORPUS NAME\"",
        help="Corpus name (for web interface) [Default: [corpus_path]]")
    parser.add_argument("config_file", nargs="?",
        help="Path to Config [optional]")
    parser.add_argument("--model-path", dest="model_path",
        help="Model Path [Default: [corpus_path]/../models]")

    group = parser.add_mutually_exclusive_group()
    group.add_argument("--unicode", action="store_false", dest='decode',
        help="Store unicode characters. [Default]")
    group.add_argument("--decode", action="store_true", dest='decode',
        help="Convert unicode characters to ascii.")
    parser.set_defaults(decode=False)
    
    parser.add_argument("--htrc", action="store_true")
    parser.add_argument("--rebuild", action="store_true")
    parser.add_argument("--tokenizer", choices=['zh', 'ltc', 'och', 'inpho', 'default'], default="default")
    
    parser.add_argument("--simple", action="store_true", default=True, 
        help="Skip sentence tokenizations [default].")
    parser.add_argument("--sentences", action="store_true", help="Parse at the sentence level")
    parser.add_argument("--freq", dest="stop_freq", default=5, type=int,
        help="Filter words occurring less than freq times [Default: 5])")
示例#7
0
def populate_parser(parser):
    parser.add_argument("config_file", help="Path to Config", type=lambda x: is_valid_filepath(parser, x))
    parser.add_argument("--context-type", dest="context_type", help="Level of corpus modeling, prompts if not set")
    parser.add_argument("-p", "--processes", default=1, type=int, help="Number of CPU cores for training [Default: 1]")
    parser.add_argument("--seed", default=None, type=int, help="Random seed for topic modeling [Default: None]")
    parser.add_argument("-k", nargs="+", help="K values to train upon", type=int)
    parser.add_argument("--iter", type=int, help="Number of training iterations")
示例#8
0
def populate_parser(parser):
    parser.add_argument("path",
                        nargs='+',
                        help="file or folder to parse",
                        type=lambda x: util.is_valid_filepath(parser, x))
    parser.add_argument("--tokenizer",
                        choices=['ancient', 'modern'],
                        default="modern")
    parser.add_argument("-o", '--output', required=True, help="output path")
示例#9
0
def populate_parser(parser):
    parser.add_argument("corpus_path", help="Path to Corpus",
        type=lambda x: is_valid_filepath(parser, x))
    parser.add_argument("--name", dest="corpus_print_name", 
        metavar="\"CORPUS NAME\"",
        help="Corpus name (for web interface) [Default: [corpus_path]]")
    parser.add_argument("config_file", nargs="?",
        help="Path to Config [optional]")
    parser.add_argument("--model-path", dest="model_path",
        help="Model Path [Default: [corpus_path]/../models]")
    parser.add_argument("--htrc", action="store_true")
    parser.add_argument("--rebuild", action="store_true")
示例#10
0
def populate_parser(parser):
    parser.add_argument("corpus_path",
                        help="Path to Corpus",
                        type=lambda x: is_valid_filepath(parser, x))
    parser.add_argument(
        "--name",
        dest="corpus_print_name",
        metavar="\"CORPUS NAME\"",
        help="Corpus name (for web interface) [Default: [corpus_path]]")
    parser.add_argument("config_file",
                        nargs="?",
                        help="Path to Config [optional]")
    parser.add_argument("--model-path",
                        dest="model_path",
                        help="Model Path [Default: [corpus_path]/../models]")

    group = parser.add_mutually_exclusive_group()
    group.add_argument("--unicode",
                       action="store_false",
                       dest='decode',
                       help="Store unicode characters. [Default]")
    group.add_argument("--decode",
                       action="store_true",
                       dest='decode',
                       help="Convert unicode characters to ascii.")
    parser.set_defaults(decode=False)

    parser.add_argument("--htrc", action="store_true")
    parser.add_argument("--rebuild", action="store_true")
    parser.add_argument("-q", "--quiet", action="store_true")
    parser.add_argument(
        "--tokenizer",
        default="default",
        choices=['zh', 'ltc', 'och', 'inpho', 'default', 'brain'])

    parser.add_argument("--simple",
                        action="store_true",
                        default=True,
                        help="Skip sentence tokenizations [default].")
    parser.add_argument("--sentences",
                        action="store_true",
                        help="Parse at the sentence level")
    parser.add_argument(
        "--freq",
        dest="stop_freq",
        default=5,
        type=int,
        help="Filter words occurring less than freq times [Default: 5])")
示例#11
0
def populate_parser(parser):
    parser.epilog = ('Available language stoplists (use 2-letter code): \n\t' + 
            '\n\t'.join(['{k}    {v}'.format(k=k, v=v.capitalize()) 
                          for k,v in sorted(langs.items(), 
                              key=lambda x: x[1])]))
    parser.add_argument("config_file", help="Path to Config",
        type=lambda x: is_valid_filepath(parser, x))
    parser.add_argument("--htrc", action="store_true")
    parser.add_argument("--stopword-file", dest="stopword_file",
        help="File with custom stopwords")
    parser.add_argument("--high", type=int, dest="high_filter",
        help="High frequency word filter", default=None)
    parser.add_argument("--low", type=int, dest="low_filter",
        default=None, help="Low frequency word filter [Default: 5]")
    parser.add_argument("--lang", nargs='+', choices=langs.keys(),
        help="Languages to stoplist. See options below.", metavar='xx')
示例#12
0
def populate_parser(parser):
    parser.add_argument("corpus_path",
                        help="Path to Corpus",
                        type=lambda x: is_valid_filepath(parser, x))
    parser.add_argument(
        "--name",
        dest="corpus_print_name",
        metavar="\"CORPUS NAME\"",
        help="Corpus name (for web interface) [Default: [corpus_path]]")
    parser.add_argument("config_file",
                        nargs="?",
                        help="Path to Config [optional]")
    parser.add_argument("--model-path",
                        dest="model_path",
                        help="Model Path [Default: [corpus_path]/../models]")
    parser.add_argument("--htrc", action="store_true")
    parser.add_argument("--rebuild", action="store_true")
    parser.add_argument("--tokenizer",
                        choices=['inpho', 'default'],
                        default="default")
示例#13
0
def populate_parser(parser):
    parser.add_argument("config_file",
                        help="Path to Config",
                        type=lambda x: is_valid_filepath(parser, x))
    parser.add_argument("--context-type",
                        dest='context_type',
                        help="Level of corpus modeling, prompts if not set")
    parser.add_argument("-p",
                        "--processes",
                        default=1,
                        type=int,
                        help="Number of CPU cores for training [Default: 1]")
    parser.add_argument("--seed",
                        default=None,
                        type=int,
                        help="Random seed for topic modeling [Default: None]")
    parser.add_argument("-k",
                        nargs='+',
                        help="K values to train upon",
                        type=int)
    parser.add_argument('--iter',
                        type=int,
                        help="Number of training iterations")
示例#14
0
def populate_parser(parser):
    parser.add_argument("corpus_path", help="Path to Corpus",
                        type=lambda x: is_valid_filepath(parser, x))
    parser.add_argument("config_file", nargs="?",
                        help="Path to Config [optional]")
    
    parser.add_argument("--name", dest="corpus_print_name",
                        metavar="\"CORPUS NAME\"",
                        help="Corpus name (for web interface) [Default: [corpus_path]]")
    parser.add_argument("--model-path", dest="model_path",
                        help="Model Path [Default: [corpus_path]/../models]")

    parser.add_argument("--tokenizer", default="default",
        choices=['default', 'simple', 'ltc', 'zh', 'inpho', 'brain'])
    
    parser.add_argument("--unidecode", action="store_true", dest='decode',
                       help="Convert unicode characters to ascii.")
    parser.set_defaults(decode=False)
    
    parser.add_argument("--htrc", action="store_true")
    parser.add_argument("--rebuild", action="store_true")
    parser.add_argument("-q", "--quiet", action="store_true")

    parser.set_defaults(stop_freq=0, nltk=False, simple=True, sentences=False)
示例#15
0
def populate_parser(parser):
    parser.add_argument("path", nargs='+', help="file or folder to parse",
        type=lambda x: util.is_valid_filepath(parser, x))
    parser.add_argument("--tokenizer", choices=['ancient', 'modern'], default="modern")
    parser.add_argument("-o", '--output', required=True,
        help="output path")
示例#16
0
def populate_parser(parser):
    parser.add_argument("config_file",
                        help="Path to Config File",
                        type=lambda x: is_valid_filepath(parser, x))
    parser.add_argument('--no-launch', dest='launch', action='store_false')
示例#17
0
def populate_parser(parser):
    parser.add_argument('config_file', help="Configuration file path",
        type=lambda x: is_valid_filepath(parser, x))
    parser.add_argument('--no-browser', dest='browser', action='store_false')
示例#18
0
                futures.append(
                    executor.submit(convert_and_write, pdffile, output_dir,
                                    True, True))

        if verbose == 1:
            pbar = ProgressBar(widgets=[Percentage(), Bar()],
                               maxval=len(futures)).start()

            for file_n, f in enumerate(
                    concurrent.futures.as_completed(futures)):
                pbar.update(file_n)

            pbar.finish()


if __name__ == '__main__':
    from argparse import ArgumentParser
    parser = ArgumentParser()

    parser.add_argument("path",
                        nargs='+',
                        help="PDF file or folder to parse",
                        type=lambda x: util.is_valid_filepath(parser, x))
    parser.add_argument("-o",
                        '--output',
                        help="output path [default: same as filename]")

    args = parser.parse_args()

    main(args.path, args.output)
示例#19
0
def populate_parser(parser):
    parser.add_argument('config_file',
                        help="Configuration file path",
                        type=lambda x: is_valid_filepath(parser, x))
    parser.add_argument('--no-browser', dest='browser', action='store_false')
示例#20
0
                    try:
                        futures.append(executor.submit(convert_and_write, pdffile, output_dir, True))
                    except (PDFException, PSException):
                        print "Skipping {0} due to PDF Exception".format(pdffile)
            else:
                futures.append(executor.submit(convert_and_write, pdffile, output_dir, True, True))

        if verbose == 1:
            pbar = ProgressBar(widgets=[Percentage(), Bar()],
                                   maxval=len(futures)).start()

            for file_n,f in enumerate(concurrent.futures.as_completed(futures)):
                pbar.update(file_n)

            pbar.finish()

    

if __name__ == '__main__':
    from argparse import ArgumentParser
    parser = ArgumentParser()

    parser.add_argument("path", nargs='+', help="PDF file or folder to parse",
        type=lambda x: util.is_valid_filepath(parser, x))
    parser.add_argument("-o", '--output',
        help="output path [default: same as filename]")

    args = parser.parse_args()

    main(args.path, args.output)
示例#21
0
def populate_parser(parser):
    parser.add_argument("config_file", help="Path to Config File",
        type=lambda x: is_valid_filepath(parser, x))
    parser.add_argument('--no-launch', dest='launch', action='store_false')