def _mine_stories(self, stories, systemname, **kw): # Keep track of all errors errors = "" # Keeps track of all succesfully created User Stories objects us_instances = [] failed_stories = [] # Parse every user story (remove punctuation and mine) for us_id, s in enumerate(stories, start=1): try: user_story = self.parse(s, us_id, systemname, StoryMiner()) user_story = Counter.count(user_story) us_instances.append(user_story) except ValueError as err: failed_stories.append([us_id, s, err.args]) errors += "\n[User Story {} ERROR] {}! (\"{}\")".format( us_id, err.args[0], " ".join(str.split(s))) # Print errors (if found) if errors: Printer._print_head("PARSING ERRORS") print(errors) return us_instances, failed_stories
def gen_concept_model(self): nlp = self.nlp miner = StoryMiner() counter = Counter() # Keep tracking the number of successes and fails success = 0 fail = 0 # Keeps tracking of all success and failed User Stories user_stories_lst = [] failed_stories_lst = [] success_stories_lst = [] us_id = 1 # Parse every user story (remove punctuation and mine) for story_line in self.messages: try: user_story = UserStory.parse(story_line, us_id, self.system_name, nlp, miner) user_story = counter.count(user_story) success = success + 1 user_stories_lst.append(user_story) success_stories_lst.append(story_line) except ValueError as err: failed_stories_lst.append([us_id, story_line, err.args]) fail = fail + 1 us_id = us_id + 1 # Generate the term-by-user story matrix (m), and additional data in two other matrices matrix = Matrix(self.base_weight, self.weights) matrices = matrix.generate( user_stories_lst, ' '.join([u.sentence for u in user_stories_lst]), nlp) m, count_matrix, stories_list, rme = matrices # Generate the ontology patterns = Constructor(nlp, user_stories_lst, m) out = patterns.make(self.system_name, self.threshold, self.link) output_ontology, output_prolog, output_ontobj, output_prologobj, onto_per_role = out # Return objects so that they can be used as input for other tools return { 'stories': user_stories_lst, 'ontology': output_ontology, 'prolog': output_prolog, 'matrix': m }
def main(filename, systemname, print_us, print_ont, statistics, link, prolog, json, per_role, threshold, base, weights, spacy_nlp): """General class to run the entire program """ start_nlp_time = timeit.default_timer() nlp = spacy_nlp nlp_time = timeit.default_timer() - start_nlp_time start_parse_time = timeit.default_timer() miner = StoryMiner() # Read the input file set = Reader.parse(filename) us_id = 1 # Keep track of all errors success = 0 fail = 0 list_of_fails = [] errors = "" c = Counter() # Keeps track of all succesfully created User Stories objects us_instances = [] failed_stories = [] success_stories = [] # Parse every user story (remove punctuation and mine) for s in set: try: user_story = parse(s, us_id, systemname, nlp, miner) user_story = c.count(user_story) success = success + 1 us_instances.append(user_story) success_stories.append(s) except ValueError as err: failed_stories.append([us_id, s, err.args]) errors += "\n[User Story " + str(us_id) + " ERROR] " + str( err.args[0]) + "! (\"" + " ".join(str.split(s)) + "\")" fail = fail + 1 us_id = us_id + 1 # Print errors (if found) if errors: Printer.print_head("PARSING ERRORS") print(errors) parse_time = timeit.default_timer() - start_parse_time # Generate the term-by-user story matrix (m), and additional data in two other matrices start_matr_time = timeit.default_timer() matrix = Matrix(base, weights) matrices = matrix.generate(us_instances, ' '.join([u.sentence for u in us_instances]), nlp) m, count_matrix, stories_list, rme = matrices matr_time = timeit.default_timer() - start_matr_time # Print details per user story, if argument '-u'/'--print_us' is chosen if print_us: print("Details:\n") for us in us_instances: Printer.print_us_data(us) # Generate the ontology start_gen_time = timeit.default_timer() patterns = Constructor(nlp, us_instances, m) out = patterns.make(systemname, threshold, link) output_ontology, output_prolog, output_ontobj, output_prologobj, onto_per_role = out # Print out the ontology in the terminal, if argument '-o'/'--print_ont' is chosen if print_ont: Printer.print_head("MANCHESTER OWL") print(output_ontology) gen_time = timeit.default_timer() - start_gen_time # Gather statistics and print the results stats_time = 0 if statistics: start_stats_time = timeit.default_timer() statsarr = Statistics.to_stats_array(us_instances) Printer.print_head("USER STORY STATISTICS") Printer.print_stats(statsarr[0], True) Printer.print_stats(statsarr[1], True) Printer.print_subhead( "Term - by - User Story Matrix ( Terms w/ total weight 0 hidden )") hide_zero = m[(m['sum'] > 0)] print(hide_zero) stats_time = timeit.default_timer() - start_stats_time # Write output files w = Writer() folder = "output/" + str(systemname) reports_folder = folder + "/reports" stats_folder = reports_folder + "/stats" outputfile = w.make_file(folder + "/ontology", str(systemname), "omn", output_ontology) files = [["Manchester Ontology", outputfile]] outputcsv = "" sent_outputcsv = "" matrixcsv = "" if statistics: files.append([ "General statistics", w.make_file(stats_folder, str(systemname), "csv", statsarr[0]) ]) files.append([ "Term-by-User Story matrix", w.make_file(stats_folder, str(systemname) + "-term_by_US_matrix", "csv", m) ]) files.append([ "Sentence statistics", w.make_file(stats_folder, str(systemname) + "-sentences", "csv", statsarr[1]) ]) if prolog: files.append([ "Prolog", w.make_file(folder + "/prolog", str(systemname), "pl", output_prolog) ]) if json: output_json_li = [str(us.toJSON()) for us in us_instances] output_json = "\n".join(output_json_li) files.append([ "JSON", w.make_file(folder + "/json", str(systemname) + "-user_stories", "json", output_json) ]) if per_role: for o in onto_per_role: files.append([ "Individual Ontology for '" + str(o[0]) + "'", w.make_file(folder + "/ontology", str(systemname) + "-" + str(o[0]), "omn", o[1]) ]) # Print the used ontology generation settings Printer.print_gen_settings(matrix, base, threshold) # Print details of the generation Printer.print_details(fail, success, nlp_time, parse_time, matr_time, gen_time, stats_time) report_dict = { "stories": us_instances, "failed_stories": failed_stories, "systemname": systemname, "us_success": success, "us_fail": fail, "times": [[ "Initializing Natural Language Processor (<em>spaCy</em> v" + pkg_resources.get_distribution("spacy").version + ")", nlp_time ], ["Mining User Stories", parse_time], ["Creating Factor Matrix", matr_time], ["Generating Manchester Ontology", gen_time], ["Gathering statistics", stats_time]], "dir": os.path.dirname(os.path.realpath(__file__)), "inputfile": filename, "inputfile_lines": len(set), "outputfiles": files, "threshold": threshold, "base": base, "matrix": matrix, "weights": m['sum'].copy().reset_index().sort_values( ['sum'], ascending=False).values.tolist(), "counts": count_matrix.reset_index().values.tolist(), "classes": output_ontobj.classes, "relationships": output_prologobj.relationships, "types": list(count_matrix.columns.values), "ontology": multiline(output_ontology) } # Finally, generate a report report = w.make_file(reports_folder, str(systemname) + "_REPORT", "html", generate_report(report_dict)) files.append(["Report", report]) # Print the location and name of all output files for file in files: if str(file[1]) != "": print( str(file[0]) + " file succesfully created at: \"" + str(file[1]) + "\"") # Return objects so that they can be used as input for other tools return { 'us_instances': us_instances, 'output_ontobj': output_ontobj, 'output_prologobj': output_prologobj, 'matrix': m }
def main(filename, systemname, print_us, print_ont, statistics, link, prolog, json, per_role, threshold, base, weights, spacy_nlp): """General class to run the entire program """ start_nlp_time = timeit.default_timer() nlp = spacy_nlp nlp_time = timeit.default_timer() - start_nlp_time start_parse_time = timeit.default_timer() miner = StoryMiner() # Read the input file set = Reader.parse(filename) us_id = 1 # Keep track of all errors success = 0 fail = 0 list_of_fails = [] errors = "" c = Counter() # Keeps track of all succesfully created User Stories objects us_instances = [] failed_stories = [] success_stories = [] # Parse every user story (remove punctuation and mine) for s in set: try: user_story = parse(s, us_id, systemname, nlp, miner) user_story = c.count(user_story) success = success + 1 us_instances.append(user_story) success_stories.append(s) except ValueError as err: failed_stories.append([us_id, s, err.args]) errors += "\n[User Story " + str(us_id) + " ERROR] " + str( err.args[0]) + "! (\"" + " ".join(str.split(s)) + "\")" fail = fail + 1 us_id = us_id + 1 # Print errors (if found) if errors: Printer.print_head("PARSING ERRORS") print(errors) parse_time = timeit.default_timer() - start_parse_time # Generate the term-by-user story matrix (m), and additional data in two other matrices start_matr_time = timeit.default_timer() matrix = Matrix(base, weights) matrices = matrix.generate(us_instances, ' '.join([u.sentence for u in us_instances]), nlp) m, count_matrix, stories_list, rme = matrices matr_time = timeit.default_timer() - start_matr_time # Generate the ontology start_gen_time = timeit.default_timer() patterns = Constructor(nlp, us_instances, m) out = patterns.make(systemname, threshold, link) output_ontology, output_prolog, output_ontobj, output_prologobj, onto_per_role = out all_classes_list = [] i = 0 for class_vn in output_ontobj.classes: one_concept = { 'id': i, 'class_name': class_vn.name, 'parent_name': class_vn.parent, 'occurs_in': occurence_list(class_vn.stories), 'weight': '0', 'group': class_vn.is_role } all_classes_list.append(one_concept) i += 1 nodes = [{ "id": cl["id"], "label": cl["class_name"], "weight": cl["weight"] } for cl in all_classes_list] relationships_query = output_prologobj.relationships all_relationships_list = [] for relationship in relationships_query: one_concept = { 'relationship_domain': relationship.domain, 'relationship_name': relationship.name, 'relationship_range': relationship.range } all_relationships_list.append(one_concept) edges_id_list = [] concepts_query = [] concepts_dict = {} concepts_dict_list = [] relationshipslist = [] i = 0 for class_vn in all_classes_list: one_concept = { 'class_id': i, 'class_name': class_vn['class_name'], 'parent_name': class_vn['parent_name'], 'weight': '0', 'group': class_vn['group'] } concepts_query.append(one_concept) i += 1 for concept in concepts_query: # print(concept) concepts_dict[concept['class_id']] = concept['class_name'] concepts_dict_list.append([concept['class_id'], concept['class_name']]) i = 0 for rel in all_relationships_list: #app.py 868 # print(rel) relationshipslist.append([ rel['relationship_domain'], rel['relationship_range'], rel['relationship_name'] ]) for concept in concepts_dict_list: if rel['relationship_domain'] == concept[1]: x = concept[0] for concept in concepts_dict_list: if rel['relationship_range'] == concept[1]: y = concept[0] if rel['relationship_name'] == 'isa': edges_id_dict = { 'from': x, 'to': y, 'label': rel['relationship_name'], 'dashes': "true" } else: edges_id_dict = { 'from': x, 'to': y, 'label': rel['relationship_name'] } i += 1 # ELSE?? edges_id_list.append(edges_id_dict) print({'nodes': nodes, 'edges': edges_id_list}) return ({'nodes': nodes, 'edges': edges_id_list})
def main(filename, systemname, print_us, print_ont, statistics, link, prolog, json, per_role, threshold, base, weights, spacy_nlp): """General class to run the entire program """ start_nlp_time = timeit.default_timer() nlp = spacy_nlp nlp_time = timeit.default_timer() - start_nlp_time start_parse_time = timeit.default_timer() miner = StoryMiner() # Read the input file set = Reader.parse(filename) us_id = 1 # Keep track of all errors success = 0 fail = 0 list_of_fails = [] errors = "" c = Counter() # Keeps track of all succesfully created User Stories objects us_instances = [] failed_stories = [] success_stories = [] # Parse every user story (remove punctuation and mine) for s in set: try: user_story = parse(s, us_id, systemname, nlp, miner) user_story = c.count(user_story) success = success + 1 us_instances.append(user_story) success_stories.append(s) except ValueError as err: failed_stories.append([us_id, s, err.args]) errors += "\n[User Story " + str(us_id) + " ERROR] " + str( err.args[0]) + "! (\"" + " ".join(str.split(s)) + "\")" fail = fail + 1 us_id = us_id + 1 # Print errors (if found) if errors: Printer.print_head("PARSING ERRORS") print(errors) parse_time = timeit.default_timer() - start_parse_time # Generate the term-by-user story matrix (m), and additional data in two other matrices start_matr_time = timeit.default_timer() matrix = Matrix(base, weights) matrices = matrix.generate(us_instances, ' '.join([u.sentence for u in us_instances]), nlp) m, count_matrix, stories_list, rme = matrices matr_time = timeit.default_timer() - start_matr_time # Print details per user story, if argument '-u'/'--print_us' is chosen if print_us: print("Details:\n") for us in us_instances: Printer.print_us_data(us) # Generate the ontology start_gen_time = timeit.default_timer() patterns = Constructor(nlp, us_instances, m) out = patterns.make(systemname, threshold, link) output_ontology, output_prolog, output_ontobj, output_prologobj, onto_per_role = out print("HEY THIS IS THE OUTPUT_ONTOBJ WITH THE CLASSES APPARENTLY???") print(output_ontobj.classes) all_classes_list = [] for class_vn in output_ontobj.classes: one_concept = { 'class_name': class_vn.name, 'parent_name': class_vn.parent, 'occurs_in': occurence_list(class_vn.stories), 'weight': '0', 'group': class_vn.is_role } all_classes_list.append(one_concept) print(all_classes_list) # nodes = [] # for cl in all_classes_list: # print(cl) # nodes.append({"label": cl['class_name']}) # taking out class_id from the nodes. idk if this will bite me later. nodes = [{ "label": cl["class_name"], "weight": cl["weight"] } for cl in all_classes_list] # print(nodes) print('IDK WHAT THIS IS BUT IMMA PRINT IT OUT TOO') relationships_query = output_prologobj.relationships all_relationships_list = [] for relationship in relationships_query: one_concept = { 'relationship_domain': relationship.domain, 'relationship_name': relationship.name, 'relationship_range': relationship.range } all_relationships_list.append(one_concept) print(all_relationships_list) edges_id_list = [] concepts_query = [] concepts_dict = {} concepts_dict_list = [] relationshipslist = [] i = 0 for class_vn in all_classes_list: one_concept = { 'class_id': i, 'class_name': class_vn['class_name'], 'parent_name': class_vn['parent_name'], 'weight': '0', 'group': class_vn['group'] } concepts_query.append(one_concept) i += 1 # print(concepts_query) for concept in concepts_query: print(concept) concepts_dict[concept['class_id']] = concept['class_name'] concepts_dict_list.append([concept['class_id'], concept['class_name']]) print('THIS IS WHAT UR CURRENTLY LOOKING AT') print(concepts_dict_list) i = 0 for rel in all_relationships_list: #app.py 868 # print(rel) relationshipslist.append([ rel['relationship_domain'], rel['relationship_range'], rel['relationship_name'] ]) for concept in concepts_dict_list: if rel['relationship_domain'] == concept[1]: x = concept[0] for concept in concepts_dict_list: if rel['relationship_range'] == concept[1]: y = concept[0] if rel['relationship_name'] == 'isa': edges_id_dict = { 'id': i, 'from': x, 'to': y, 'label': rel['relationship_name'], 'dashes': "true" } else: edges_id_dict = { 'id': i, 'from': x, 'to': y, 'label': rel['relationship_name'] } i += 1 # ELSE?? edges_id_list.append(edges_id_dict) print(edges_id_list) # Print out the ontology in the terminal, if argument '-o'/'--print_ont' is chosen if print_ont: Printer.print_head("MANCHESTER OWL") print(output_ontology) gen_time = timeit.default_timer() - start_gen_time # Gather statistics and print the results stats_time = 0 if statistics: start_stats_time = timeit.default_timer() statsarr = Statistics.to_stats_array(us_instances) Printer.print_head("USER STORY STATISTICS") Printer.print_stats(statsarr[0], True) Printer.print_stats(statsarr[1], True) Printer.print_subhead( "Term - by - User Story Matrix ( Terms w/ total weight 0 hidden )") hide_zero = m[(m['sum'] > 0)] print(hide_zero) stats_time = timeit.default_timer() - start_stats_time # Write output files w = Writer() folder = "output/" + str(systemname) reports_folder = folder + "/reports" stats_folder = reports_folder + "/stats" outputfile = w.make_file(folder + "/ontology", str(systemname), "omn", output_ontology) files = [["Manchester Ontology", outputfile]] outputcsv = "" sent_outputcsv = "" matrixcsv = "" if statistics: files.append([ "General statistics", w.make_file(stats_folder, str(systemname), "csv", statsarr[0]) ]) files.append([ "Term-by-User Story matrix", w.make_file(stats_folder, str(systemname) + "-term_by_US_matrix", "csv", m) ]) files.append([ "Sentence statistics", w.make_file(stats_folder, str(systemname) + "-sentences", "csv", statsarr[1]) ]) if prolog: files.append([ "Prolog", w.make_file(folder + "/prolog", str(systemname), "pl", output_prolog) ]) if json: output_json_li = [str(us.toJSON()) for us in us_instances] output_json = "\n".join(output_json_li) files.append([ "JSON", w.make_file(folder + "/json", str(systemname) + "-user_stories", "json", output_json) ]) if per_role: for o in onto_per_role: files.append([ "Individual Ontology for '" + str(o[0]) + "'", w.make_file(folder + "/ontology", str(systemname) + "-" + str(o[0]), "omn", o[1]) ]) # Print the used ontology generation settings Printer.print_gen_settings(matrix, base, threshold) # Print details of the generation Printer.print_details(fail, success, nlp_time, parse_time, matr_time, gen_time, stats_time) report_dict = { "stories": us_instances, "failed_stories": failed_stories, "systemname": systemname, "us_success": success, "us_fail": fail, "times": [[ "Initializing Natural Language Processor (<em>spaCy</em> v" + pkg_resources.get_distribution("spacy").version + ")", nlp_time ], ["Mining User Stories", parse_time], ["Creating Factor Matrix", matr_time], ["Generating Manchester Ontology", gen_time], ["Gathering statistics", stats_time]], "dir": os.path.dirname(os.path.realpath(__file__)), "inputfile": filename, "inputfile_lines": len(set), "outputfiles": files, "threshold": threshold, "base": base, "matrix": matrix, "weights": m['sum'].copy().reset_index().sort_values( ['sum'], ascending=False).values.tolist(), "counts": count_matrix.reset_index().values.tolist(), "classes": output_ontobj.classes, "relationships": output_prologobj.relationships, "types": list(count_matrix.columns.values), "ontology": multiline(output_ontology) } # Finally, generate a report report = w.make_file(reports_folder, str(systemname) + "_REPORT", "html", generate_report(report_dict)) files.append(["Report", report]) # Print the location and name of all output files for file in files: if str(file[1]) != "": print( str(file[0]) + " file succesfully created at: \"" + str(file[1]) + "\"") # Return objects so that they can be used as input for other tools return { 'us_instances': us_instances, 'output_ontobj': output_ontobj, 'output_prologobj': output_prologobj, 'matrix': m }