示例#1
0
def generate_config():
    # TODO ensure this works still
    config = qtoml.loads(Path("example.toml").read_text())
    botsettings = config["Bot"]
    for each in botsettings.keys():
        if type(botsettings[each]) is bool:
            botsettings[each] = prompt(
                f"Would you like to enable {each}? y/N ")
        elif type(botsettings[each]) is int:
            numerical_option = input(f"Please enter a number for {each}: ")
            assert numerical_option.isdigit() is True, "Must be a number!"
            botsettings[each] = int(numerical_option)
        else:
            botsettings[each] = input(f"Please enter your {each}: ")
    modules = getmodules()
    print(
        "Enter the number for each module you'd like to enable, separated by commas"
    )
    print("Example: 1,5,8")
    message = ", ".join([f"{i}) {v}" for i, v in enumerate(modules)])
    to_enable = input(f"{message}\n")
    for module_index in to_enable.split(","):
        if module_index.isdigit() and int(module_index) <= len(modules):
            config["Modules"]["enabled"].append(modules[int(module_index)])
    return qtoml.dumps(config)
示例#2
0
def write_config(config: str, path: str) -> bool:
    check = prompt(f"\n{config}\nDoes this look correct? y/N")
    if check is False:
        return False
    else:
        path = Path.cwd() / path
        with open(path, "w") as tomlfile:
            qtoml.dump(config, tomlfile)
        return True
示例#3
0
def main(args):
    # TODO: remove this code, check if there is an issue and unit test
    # convert to unicode to avoid windows errors
    # args.corpus_path = args.corpus_path

    # config corpus_path
    # process bibtex files
    args.bibtex = args.corpus_path.endswith('.bib')
    if args.bibtex:
        args.bibtex = args.corpus_path
        args.corpus_path = process_bibtex(args.corpus_path)

    # set corpus_name
    args.corpus_name = os.path.basename(args.corpus_path)
    if not args.corpus_name:
        args.corpus_name = os.path.basename(os.path.dirname(args.corpus_path))

    if not args.corpus_print_name and not args.quiet:
        args.corpus_print_name = prompt("Corpus Name", default=args.corpus_name)
    
    # configure model-path
    if args.model_path is None:
        if os.path.isdir(args.corpus_path):
            args.model_path = os.path.join(args.corpus_path, '../models/')
        else:
            args.model_path = os.path.dirname(args.corpus_path)
    if args.model_path and not os.path.exists(args.model_path):
        os.makedirs(args.model_path)

    args.corpus_filename = get_corpus_filename(
        args.corpus_path, args.model_path, stop_freq=args.stop_freq)
    if not args.rebuild and os.path.exists(args.corpus_filename):
        args.rebuild = bool_prompt("\nCorpus file found. Rebuild? ", 
            default=False)
    else:
        args.rebuild = True

    if args.htrc:
        import vsm.extensions.htrc as htrc
        if os.path.isdir(args.corpus_path):
            htrc.proc_htrc_coll(args.corpus_path)
            ids = listdir_nohidden(args.corpus_path)

            args.htrc_metapath = os.path.abspath(args.corpus_path + '/../')
            args.htrc_metapath = os.path.join(args.htrc_metapath,
                os.path.dirname(args.corpus_path) + '.metadata.json')
        else:
            import topicexplorer.extensions.htrc_features as htrc_features
            with open(args.corpus_path) as idfile:
                ids = [row.strip() for row in idfile]

            c = htrc_features.create_corpus(ids)
            c.save(args.corpus_filename)

            args.htrc_metapath = os.path.abspath(args.corpus_path)
            args.htrc_metapath = os.path.join(
                os.path.dirname(args.htrc_metapath),
                os.path.basename(args.htrc_metapath) + '.metadata.json')

        import htrc.metadata
        print("Downloading metadata to ", args.htrc_metapath)
        htrc.metadata.get_metadata(ids, output_file=args.htrc_metapath)

    if args.rebuild and (not args.htrc or os.path.isdir(args.corpus_path)):
        try:
            args.corpus_filename = build_corpus(args.corpus_path, args.model_path,
                                                stop_freq=args.stop_freq, decode=args.decode,
                                                sentences=args.sentences,
                                                simple=args.simple, tokenizer=args.tokenizer)
        except IOError:
            print("ERROR: invalid path, please specify either:")
            print("  * a single plain-text or PDF file,")
            print("  * a single bibtex (.bib) file with 'file' fields,")
            print("  * a folder of plain-text or PDF files, or")
            print("  * a folder of folders of plain-text or PDF files.")
            print("\nExiting...")
            sys.exit(74)
        """
        except LookupError as e:
            if 'punkt' in e.message:
                print "\nERROR: sentence tokenizer not available, download by running:"
                print "    python -m nltk.downloader punkt"

            elif 'stopwords' in e.message:
                print "\nERROR: stopwords not available, download by running:"
                print "    python -m nltk.downloader stopwords"
            else:
                raise e
            print "\nExiting..."
            sys.exit(74)
        """

    args.config_file = write_config(args, args.config_file)

    args.corpus_desc = args.config_file + '.md'
    if not args.quiet and os.path.exists(args.corpus_desc):
        while args.corpus_desc not in ['y', 'n', False]:
            args.corpus_desc = input("\nExisting corpus description found. Remove? [y/N] ")
            args.corpus_desc = args.corpus_desc.lower().strip()
            if args.corpus_desc == '':
                args.corpus_desc = False
        else:
            if args.corpus_desc == 'y':
                args.corpus_desc = args.config_file + '.md'

    if args.corpus_desc:
        with open(args.corpus_desc, 'w') as outfile:
            outfile.write(
"""This is an instance of the [InPhO Topic Explorer](http://inphodata.cogs.indiana.edu/). If you would like
to add a custom corpus description, either:
- Modify the contents of the file `{}`
- Change the main:corpus_desc path in `{}` to an existing Markdown file.
""".format(os.path.abspath(args.corpus_desc), 
           os.path.abspath(args.config_file)))

    return args.config_file