def generate_config(): # TODO ensure this works still config = qtoml.loads(Path("example.toml").read_text()) botsettings = config["Bot"] for each in botsettings.keys(): if type(botsettings[each]) is bool: botsettings[each] = prompt( f"Would you like to enable {each}? y/N ") elif type(botsettings[each]) is int: numerical_option = input(f"Please enter a number for {each}: ") assert numerical_option.isdigit() is True, "Must be a number!" botsettings[each] = int(numerical_option) else: botsettings[each] = input(f"Please enter your {each}: ") modules = getmodules() print( "Enter the number for each module you'd like to enable, separated by commas" ) print("Example: 1,5,8") message = ", ".join([f"{i}) {v}" for i, v in enumerate(modules)]) to_enable = input(f"{message}\n") for module_index in to_enable.split(","): if module_index.isdigit() and int(module_index) <= len(modules): config["Modules"]["enabled"].append(modules[int(module_index)]) return qtoml.dumps(config)
def write_config(config: str, path: str) -> bool: check = prompt(f"\n{config}\nDoes this look correct? y/N") if check is False: return False else: path = Path.cwd() / path with open(path, "w") as tomlfile: qtoml.dump(config, tomlfile) return True
def main(args): # TODO: remove this code, check if there is an issue and unit test # convert to unicode to avoid windows errors # args.corpus_path = args.corpus_path # config corpus_path # process bibtex files args.bibtex = args.corpus_path.endswith('.bib') if args.bibtex: args.bibtex = args.corpus_path args.corpus_path = process_bibtex(args.corpus_path) # set corpus_name args.corpus_name = os.path.basename(args.corpus_path) if not args.corpus_name: args.corpus_name = os.path.basename(os.path.dirname(args.corpus_path)) if not args.corpus_print_name and not args.quiet: args.corpus_print_name = prompt("Corpus Name", default=args.corpus_name) # configure model-path if args.model_path is None: if os.path.isdir(args.corpus_path): args.model_path = os.path.join(args.corpus_path, '../models/') else: args.model_path = os.path.dirname(args.corpus_path) if args.model_path and not os.path.exists(args.model_path): os.makedirs(args.model_path) args.corpus_filename = get_corpus_filename( args.corpus_path, args.model_path, stop_freq=args.stop_freq) if not args.rebuild and os.path.exists(args.corpus_filename): args.rebuild = bool_prompt("\nCorpus file found. Rebuild? ", default=False) else: args.rebuild = True if args.htrc: import vsm.extensions.htrc as htrc if os.path.isdir(args.corpus_path): htrc.proc_htrc_coll(args.corpus_path) ids = listdir_nohidden(args.corpus_path) args.htrc_metapath = os.path.abspath(args.corpus_path + '/../') args.htrc_metapath = os.path.join(args.htrc_metapath, os.path.dirname(args.corpus_path) + '.metadata.json') else: import topicexplorer.extensions.htrc_features as htrc_features with open(args.corpus_path) as idfile: ids = [row.strip() for row in idfile] c = htrc_features.create_corpus(ids) c.save(args.corpus_filename) args.htrc_metapath = os.path.abspath(args.corpus_path) args.htrc_metapath = os.path.join( os.path.dirname(args.htrc_metapath), os.path.basename(args.htrc_metapath) + '.metadata.json') import htrc.metadata print("Downloading metadata to ", args.htrc_metapath) htrc.metadata.get_metadata(ids, output_file=args.htrc_metapath) if args.rebuild and (not args.htrc or os.path.isdir(args.corpus_path)): try: args.corpus_filename = build_corpus(args.corpus_path, args.model_path, stop_freq=args.stop_freq, decode=args.decode, sentences=args.sentences, simple=args.simple, tokenizer=args.tokenizer) except IOError: print("ERROR: invalid path, please specify either:") print(" * a single plain-text or PDF file,") print(" * a single bibtex (.bib) file with 'file' fields,") print(" * a folder of plain-text or PDF files, or") print(" * a folder of folders of plain-text or PDF files.") print("\nExiting...") sys.exit(74) """ except LookupError as e: if 'punkt' in e.message: print "\nERROR: sentence tokenizer not available, download by running:" print " python -m nltk.downloader punkt" elif 'stopwords' in e.message: print "\nERROR: stopwords not available, download by running:" print " python -m nltk.downloader stopwords" else: raise e print "\nExiting..." sys.exit(74) """ args.config_file = write_config(args, args.config_file) args.corpus_desc = args.config_file + '.md' if not args.quiet and os.path.exists(args.corpus_desc): while args.corpus_desc not in ['y', 'n', False]: args.corpus_desc = input("\nExisting corpus description found. Remove? [y/N] ") args.corpus_desc = args.corpus_desc.lower().strip() if args.corpus_desc == '': args.corpus_desc = False else: if args.corpus_desc == 'y': args.corpus_desc = args.config_file + '.md' if args.corpus_desc: with open(args.corpus_desc, 'w') as outfile: outfile.write( """This is an instance of the [InPhO Topic Explorer](http://inphodata.cogs.indiana.edu/). If you would like to add a custom corpus description, either: - Modify the contents of the file `{}` - Change the main:corpus_desc path in `{}` to an existing Markdown file. """.format(os.path.abspath(args.corpus_desc), os.path.abspath(args.config_file))) return args.config_file