def main(args): argsmap = parseargs(args) files = argsmap.get('files') if (not files): sys.exit(0) topic = argsmap.get("topic") if (not topic): topic = "IT Assessment" else: topic = topic[0] summaryfile = argsmap.get("summary") if (not summaryfile): print("Summary file must be specified...") sys.exit(1) summaryfile = summaryfile[0] summary_map = find_topics.get_summary_map(summaryfile) tmodelfile = argsmap.get("tmodel") if (not tmodelfile): print('Topic Model must be specified. using --tmmodel ..') sys.exit(1) tmodelfile = tmodelfile[0] (origmap, sorted_y, vectorizer, le, grid_search) = read_model_file(tmodelfile) topics = find_topics.toc_entries(origmap) # print(topics) mapper = Mapper(origmap, sorted_y, vectorizer, le, grid_search) subtopicReader = SubtopicReader(topic, mapper, summary_map) smodelfile = argsmap.get("smodel") if (not smodelfile): print('Subtopic Model must be specified using --smodel ...') sys.exit(1) smodelfile = smodelfile[0] subtopicPredictor = SubtopicPredictor(smodelfile) for filename in files: subtopic_dict = subtopicReader.mapped_subtopics( filename, subtopicPredictor) subtopic_columns = subtopicReader.get_column_names(subtopicPredictor) print(json.dumps(subtopic_dict, indent=2)) print('---------------------------------------') print(subtopic_columns)
def main(args): argsmap = parseargs(args) # print(args_dict) files = argsmap.get('files') if (not files): sys.exit(0) topic = argsmap.get("topic") if (not topic): topic = "IT Assessment" else: topic = topic[0] summaryfile = argsmap.get("summary") if (not summaryfile): print("Summary file must be specified...") sys.exit(1) summaryfile = summaryfile[0] summary_map = find_topics.get_summary_map(summaryfile) # print(summary_map) modelfile = argsmap.get("model") if (not modelfile): print('Model must be specified...') sys.exit(1) modelfile = modelfile[0] (origmap, sorted_y, vectorizer, le, grid_search) = read_model_file(modelfile) topics = find_topics.toc_entries(origmap) # print(topics) mapper = Mapper(origmap, sorted_y, vectorizer, le, grid_search) subtopicReader = SubtopicReader(topic, mapper, summary_map) all_subtopics = subtopicReader.read_all_files(files, 'print_detail' in argsmap.keys()) if 'print_summary' in argsmap.keys(): subtopicReader.print_summary()
def main(args): argsmap = parseargs(args) # print(args_dict) files = argsmap.get('files') if (not files): sys.exit(0) summaryfile = argsmap.get("summary")[0] summary_map = get_summary_map(summaryfile) # print(summary_map) modelfile = argsmap.get("model") if (not modelfile): print('Model must be specified...') sys.exit(1) modelfile = modelfile[0] (origmap, sorted_y, vectorizer, le, grid_search) = read_model_file(modelfile) topics = toc_entries(origmap) # print(topics) mapper = Mapper(origmap, sorted_y, vectorizer, le, grid_search) read_all_files(files, mapper, summary_map)
def main(args): global NEWLINE_WITHIN_COLUMN argsmap = parseargs(args) files = argsmap.get('files') if (not files): sys.exit(0) summaryfile = argsmap.get("summary") if (not summaryfile or len(summaryfile) == 0): print('Summary file must be specified...') sys.exit(1) summary_map = get_summary_map(summaryfile[0]) # print(summary_map) modelfile = argsmap.get("model") if (not modelfile): print('Model must be specified...') sys.exit(1) modelfile = modelfile[0] (origmap, sorted_y, vectorizer, le, grid_search) = read_model_file(modelfile) topics = toc_entries(origmap) mapper = Mapper(origmap, sorted_y, vectorizer, le, grid_search) nosplit = argsmap.get('nosplit') if nosplit == []: nosplit = True else: nosplit = False if not nosplit: topic_split_times = argsmap.get('split') if (not topic_split_times): topic_split_times = 4 else: topic_split_times = int(topic_split_times[0]) else: topic_split_times = 0 NL = argsmap.get('NL') ## Set newline character for multiline columns if (NL): NL = NL[0] if (NL): NEWLINE_WITHIN_COLUMN = NL outfile = argsmap.get("out") if (outfile): outfile = outfile[0] exfile = argsmap.get("err") if exfile: exfile = exfile[0] if not exfile: print("Exception file name must be entered using the --err option...") sys.exit(1) ratings_mapper_file = argsmap.get("rmap") if ratings_mapper_file: ratings_mapper_file = ratings_mapper_file[0] if not ratings_mapper_file: print("Ratings Mapper File file name must be entered using the --rmap option...") sys.exit(1) ratings = Ratings(ratings_mapper_file) global CSV_FIELD_DELIMITER field_delim = argsmap.get('fd') if field_delim: field_delim = field_delim[0] if field_delim: CSV_FIELD_DELIMITER = field_delim global FD_REPLACED fd_replaced = argsmap.get('fdr') if fd_replaced: fd_replaced = fd_replaced[0] if fd_replaced: FD_REPLACED = fd_replaced smodels = argsmap.get("smodels") stopics = argsmap.get("stopics") get_headers_for_files(files, topics, mapper, summary_map, outfile, exfile, nosplit, topic_split_times, ratings, smodels, stopics)
#!/usr/bin/env python import sys from predict_using_toc_mapper import Mapper, get_topic, read_model_file from find_topics import toc_entries, get_summary_map, read_topics import csv def print_topics(topics): for topic in topics: print(topic + '*') if __name__ == '__main__': modelfile = sys.argv[1] (origmap, sorted_y, vectorizer, le, grid_search) = read_model_file(modelfile) topics = toc_entries(origmap) mapper = Mapper(origmap, sorted_y, vectorizer, le, grid_search) # print(sorted_y) sorted_topics = sorted(topics, key=lambda x: x.lower()) print_topics(sorted_topics) # print(len(origmap)) if (len(sys.argv) > 2): f = open(sys.argv[2], 'w') writer = csv.writer(f) data = [[orig, "OK", mapped] for orig, mapped in origmap.items()] for line in data: writer.writerow(line) f.close()