def get_parts(self, class_name, story): case = class_name.split() means_compounds = [] means_compounds.append(story.means.main_object.compound) ends_compounds = story.ends.compounds if story.means.free_form: if len(story.means.compounds) > 0: if type(story.means.compounds[0]) is list: mc = [item for item in sublist for sublist in story.means.compounds] else: mc = story.means.compounds means_compounds.extend(mc) if len(ends_compounds) > 0: if type(ends_compounds[0]) is list: ends_compounds = [item for item in sublist for sublist in story.ends.compounds] role = [] means = [] ends = [] rme = [] for token in story.data: if token in story.role.text: if len(case) != 1: role.append(NLPUtility.case(token)) elif token not in story.role.functional_role.compound: role.append(NLPUtility.case(token)) if token in story.means.text: if len(case) != 1: means.append(NLPUtility.case(token)) elif token not in means_compounds: means.append(NLPUtility.case(token)) if story.has_ends: if token in story.ends.text: if len(case) != 1: ends.append(NLPUtility.case(token)) elif token not in ends_compounds: ends.append(NLPUtility.case(token)) if Utility.is_sublist(case, role): rme.append('Role') if Utility.is_sublist(case, means): rme.append('Means') if Utility.is_sublist(case, ends): rme.append('Ends') return rme
def to_stats_array(stories): stats = [] sent_stats = [] if stories: header = ['US_ID', 'User_Story', 'Words', 'Verbs', 'Nouns', 'NPs', 'Ind_R', 'Ind_M', 'Ind_E', 'FR_Type', 'MV_Type', 'DO_Type'] stats.append(header) sent_header = ['US_ID', 'Role_NP', 'Role_Struct', 'Role_Struct_Detail', 'Means_NP', 'Means_Struct', 'Means_Struct_Detail', 'Ends_NP', 'Ends_Struct', 'Ends_Struct_Detail'] sent_stats.append(sent_header) for us in stories: stats.append([us.number, us.text, us.stats.words, us.stats.verbs, us.stats.nouns, us.stats.noun_phrases, us.stats.indicators.role, us.stats.indicators.means, us.stats.indicators.ends, us.stats.fr_type, us.stats.mv_type, us.stats.do_type]) sent_stats.append([us.number, Utility.text(us.stats.role.nps), Utility.text(us.stats.role.general), Utility.text(us.stats.role.detail), Utility.text(us.stats.means.nps), Utility.text(us.stats.means.general), Utility.text(us.stats.means.detail), Utility.text(us.stats.means.nps), Utility.text(us.stats.means.general), Utility.text(us.stats.means.detail)]) return stats, sent_stats
def get_noun_phrases(story, span, part='data'): phrases = [] for chunk in eval('story.' + str(part) + '.noun_chunks'): chunk = MinerUtility.get_span(story, chunk) if Utility.is_sublist(chunk, span): phrases.append(MinerUtility.get_span(story, chunk)) return phrases
def parse(text, id, systemname, nlp, miner): """Create a new user story object and mines it to map all data in the user story text to a predefined model :param text: The user story text :param id: The user story ID, which can later be used to identify the user story :param systemname: Name of the system this user story belongs to :param nlp: Natural Language Processor (spaCy) :param miner: instance of class Miner :returns: A new user story object """ no_punct = Utility.remove_punct(text) no_double_space = ' '.join(no_punct.split()) doc = nlp(no_double_space) user_story = UserStory(id, text, no_double_space) user_story.system.main = nlp(systemname)[0] user_story.data = doc #Printer.print_dependencies(user_story) #Printer.print_noun_phrases(user_story) miner.structure(user_story) user_story.old_data = user_story.data user_story.data = nlp(user_story.sentence) miner.mine(user_story, nlp) return user_story
def to_stats_array(stories): stats = [] sent_stats = [] if stories: header = [ "US_ID", "User_Story", "Words", "Verbs", "Nouns", "NPs", "Ind_R", "Ind_M", "Ind_E", "FR_Type", "MV_Type", "DO_Type", ] stats.append(header) sent_header = [ "US_ID", "Role_NP", "Role_Struct", "Role_Struct_Detail", "Means_NP", "Means_Struct", "Means_Struct_Detail", "Ends_NP", "Ends_Struct", "Ends_Struct_Detail", ] sent_stats.append(sent_header) for us in stories: stats.append( [ us.number, us.text, us.stats.words, us.stats.verbs, us.stats.nouns, us.stats.noun_phrases, us.stats.indicators.role, us.stats.indicators.means, us.stats.indicators.ends, us.stats.fr_type, us.stats.mv_type, us.stats.do_type, ] ) sent_stats.append( [ us.number, Utility.text(us.stats.role.nps), Utility.text(us.stats.role.general), Utility.text(us.stats.role.detail), Utility.text(us.stats.means.nps), Utility.text(us.stats.means.general), Utility.text(us.stats.means.detail), Utility.text(us.stats.means.nps), Utility.text(us.stats.means.general), Utility.text(us.stats.means.detail), ] ) return stats, sent_stats
def main(filename, systemname, print_us, print_ont, statistics, link, prolog, per_role, threshold, base, weights): """General class to run the entire program """ # Initialize spaCy just once (this takes most of the time...) print("Initializing Natural Language Processor . . .") start_nlp_time = timeit.default_timer() nlp = English() nlp_time = timeit.default_timer() - start_nlp_time start_parse_time = timeit.default_timer() miner = StoryMiner() # Read the input file set = Reader.parse(filename) us_id = 1 # Keep track of all errors success = 0 fail = 0 list_of_fails = [] errors = "" c = Counter() # Keeps track of all succesfully created User Stories objects us_instances = [] failed_stories = [] success_stories = [] # Parse every user story (remove punctuation and mine) for s in set: try: user_story = parse(s, us_id, systemname, nlp, miner) user_story = c.count(user_story) success = success + 1 us_instances.append(user_story) success_stories.append(s) except ValueError as err: failed_stories.append([us_id, s, err.args]) errors += "\n[User Story " + str(us_id) + " ERROR] " + str(err.args[0]) + "! (\"" + " ".join(str.split(s)) + "\")" fail = fail + 1 us_id = us_id + 1 # Print errors (if found) if errors: Printer.print_head("PARSING ERRORS") print(errors) parse_time = timeit.default_timer() - start_parse_time # Generate the term-by-user story matrix (m), and additional data in two other matrices start_matr_time = timeit.default_timer() matrix = Matrix(base, weights) matrices = matrix.generate(us_instances, ' '.join(success_stories), nlp) m = matrices[0] count_matrix = matrices[1] stories_list = matrices[2] rme = matrices[3] matr_time = timeit.default_timer() - start_matr_time # Print details per user story, if argument '-u'/'--print_us' is chosen if print_us: print("Details:\n") for us in us_instances: Printer.print_us_data(us) # Generate the ontology start_gen_time = timeit.default_timer() patterns = Constructor(nlp, us_instances, m) out = patterns.make(systemname, threshold, link) output_ontology = out[0] output_prolog = out[1] output_ontobj = out[2] output_prologobj = out[3] onto_per_role = out[4] # Print out the ontology in the terminal, if argument '-o'/'--print_ont' is chosen if print_ont: Printer.print_head("MANCHESTER OWL") print(output_ontology) gen_time = timeit.default_timer() - start_gen_time # Gather statistics and print the results stats_time = 0 if statistics: start_stats_time = timeit.default_timer() statsarr = Statistics.to_stats_array(us_instances) Printer.print_head("USER STORY STATISTICS") Printer.print_stats(statsarr[0], True) Printer.print_stats(statsarr[1], True) Printer.print_subhead("Term - by - User Story Matrix ( Terms w/ total weight 0 hidden )") hide_zero = m[(m['sum'] > 0)] print(hide_zero) stats_time = timeit.default_timer() - start_stats_time # Write output files w = Writer() folder = "output/" + str(systemname) reports_folder = folder + "/reports" stats_folder = reports_folder + "/stats" outputfile = w.make_file(folder + "/ontology", str(systemname), "omn", output_ontology) files = [["Manchester Ontology", outputfile]] outputcsv = "" sent_outputcsv = "" matrixcsv = "" if statistics: outputcsv = w.make_file(stats_folder, str(systemname), "csv", statsarr[0]) matrixcsv = w.make_file(stats_folder, str(systemname) + "-term_by_US_matrix", "csv", m) sent_outputcsv = w.make_file(stats_folder, str(systemname) + "-sentences", "csv", statsarr[1]) files.append(["General statistics", outputcsv]) files.append(["Term-by-User Story matrix", matrixcsv]) files.append(["Sentence statistics", sent_outputcsv]) if prolog: outputpl = w.make_file(folder + "/prolog", str(systemname), "pl", output_prolog) files.append(["Prolog", outputpl]) if per_role: for o in onto_per_role: name = str(systemname) + "-" + str(o[0]) pont = w.make_file(folder + "/ontology", name, "omn", o[1]) files.append(["Individual Ontology for '" + str(o[0]) + "'", pont]) # Print the used ontology generation settings Printer.print_gen_settings(matrix, base, threshold) # Print details of the generation Printer.print_details(fail, success, nlp_time, parse_time, matr_time, gen_time, stats_time) report_dict = { "stories": us_instances, "failed_stories": failed_stories, "systemname": systemname, "us_success": success, "us_fail": fail, "times": [["Initializing Natural Language Processor (<em>spaCy</em> v" + pkg_resources.get_distribution("spacy").version + ")" , nlp_time], ["Mining User Stories", parse_time], ["Creating Factor Matrix", matr_time], ["Generating Manchester Ontology", gen_time], ["Gathering statistics", stats_time]], "dir": os.path.dirname(os.path.realpath(__file__)), "inputfile": filename, "inputfile_lines": len(set), "outputfiles": files, "threshold": threshold, "base": base, "matrix": matrix, "weights": m['sum'].copy().reset_index().sort_values(['sum'], ascending=False).values.tolist(), "counts": count_matrix.reset_index().values.tolist(), "classes": output_ontobj.classes, "relationships": output_prologobj.relationships, "types": list(count_matrix.columns.values), "ontology": Utility.multiline(output_ontology) } # Finally, generate a report report = w.make_file(reports_folder, str(systemname) + "_REPORT", "html", generate_report(report_dict)) files.append(["Report", report]) # Print the location and name of all output files for file in files: if str(file[1]) != "": print(str(file[0]) + " file succesfully created at: \"" + str(file[1]) + "\"")
def main(filename, systemname, print_us, print_ont, statistics, link, prolog, per_role, threshold, base, weights): """General class to run the entire program """ # Initialize spaCy just once (this takes most of the time...) print("Initializing Natural Language Processor . . .") start_nlp_time = timeit.default_timer() nlp = English() nlp_time = timeit.default_timer() - start_nlp_time start_parse_time = timeit.default_timer() miner = StoryMiner() # Read the input file set = Reader.parse(filename) us_id = 1 # Keep track of all errors success = 0 fail = 0 list_of_fails = [] errors = "" c = Counter() # Keeps track of all succesfully created User Stories objects us_instances = [] failed_stories = [] success_stories = [] # Parse every user story (remove punctuation and mine) for s in set: try: user_story = parse(s, us_id, systemname, nlp, miner) user_story = c.count(user_story) success = success + 1 us_instances.append(user_story) success_stories.append(s) except ValueError as err: failed_stories.append([us_id, s, err.args]) errors += "\n[User Story " + str(us_id) + " ERROR] " + str( err.args[0]) + "! (\"" + " ".join(str.split(s)) + "\")" fail = fail + 1 us_id = us_id + 1 # Print errors (if found) if errors: Printer.print_head("PARSING ERRORS") print(errors) parse_time = timeit.default_timer() - start_parse_time # Generate the term-by-user story matrix (m), and additional data in two other matrices start_matr_time = timeit.default_timer() matrix = Matrix(base, weights) matrices = matrix.generate(us_instances, ' '.join(success_stories), nlp) m = matrices[0] count_matrix = matrices[1] stories_list = matrices[2] rme = matrices[3] matr_time = timeit.default_timer() - start_matr_time # Print details per user story, if argument '-u'/'--print_us' is chosen if print_us: print("Details:\n") for us in us_instances: Printer.print_us_data(us) # Generate the ontology start_gen_time = timeit.default_timer() patterns = Constructor(nlp, us_instances, m) out = patterns.make(systemname, threshold, link) output_ontology = out[0] output_prolog = out[1] output_ontobj = out[2] output_prologobj = out[3] onto_per_role = out[4] # Print out the ontology in the terminal, if argument '-o'/'--print_ont' is chosen if print_ont: Printer.print_head("MANCHESTER OWL") print(output_ontology) gen_time = timeit.default_timer() - start_gen_time # Gather statistics and print the results stats_time = 0 if statistics: start_stats_time = timeit.default_timer() statsarr = Statistics.to_stats_array(us_instances) Printer.print_head("USER STORY STATISTICS") Printer.print_stats(statsarr[0], True) Printer.print_stats(statsarr[1], True) Printer.print_subhead( "Term - by - User Story Matrix ( Terms w/ total weight 0 hidden )") hide_zero = m[(m['sum'] > 0)] print(hide_zero) stats_time = timeit.default_timer() - start_stats_time # Write output files w = Writer() folder = "output/" + str(systemname) reports_folder = folder + "/reports" stats_folder = reports_folder + "/stats" outputfile = w.make_file(folder + "/ontology", str(systemname), "omn", output_ontology) files = [["Manchester Ontology", outputfile]] outputcsv = "" sent_outputcsv = "" matrixcsv = "" if statistics: outputcsv = w.make_file(stats_folder, str(systemname), "csv", statsarr[0]) matrixcsv = w.make_file(stats_folder, str(systemname) + "-term_by_US_matrix", "csv", m) sent_outputcsv = w.make_file(stats_folder, str(systemname) + "-sentences", "csv", statsarr[1]) files.append(["General statistics", outputcsv]) files.append(["Term-by-User Story matrix", matrixcsv]) files.append(["Sentence statistics", sent_outputcsv]) if prolog: outputpl = w.make_file(folder + "/prolog", str(systemname), "pl", output_prolog) files.append(["Prolog", outputpl]) if per_role: for o in onto_per_role: name = str(systemname) + "-" + str(o[0]) pont = w.make_file(folder + "/ontology", name, "omn", o[1]) files.append(["Individual Ontology for '" + str(o[0]) + "'", pont]) # Print the used ontology generation settings Printer.print_gen_settings(matrix, base, threshold) # Print details of the generation Printer.print_details(fail, success, nlp_time, parse_time, matr_time, gen_time, stats_time) report_dict = { "stories": us_instances, "failed_stories": failed_stories, "systemname": systemname, "us_success": success, "us_fail": fail, "times": [[ "Initializing Natural Language Processor (<em>spaCy</em> v" + pkg_resources.get_distribution("spacy").version + ")", nlp_time ], ["Mining User Stories", parse_time], ["Creating Factor Matrix", matr_time], ["Generating Manchester Ontology", gen_time], ["Gathering statistics", stats_time]], "dir": os.path.dirname(os.path.realpath(__file__)), "inputfile": filename, "inputfile_lines": len(set), "outputfiles": files, "threshold": threshold, "base": base, "matrix": matrix, "weights": m['sum'].copy().reset_index().sort_values( ['sum'], ascending=False).values.tolist(), "counts": count_matrix.reset_index().values.tolist(), "classes": output_ontobj.classes, "relationships": output_prologobj.relationships, "types": list(count_matrix.columns.values), "ontology": Utility.multiline(output_ontology) } # Finally, generate a report report = w.make_file(reports_folder, str(systemname) + "_REPORT", "html", generate_report(report_dict)) files.append(["Report", report]) # Print the location and name of all output files for file in files: if str(file[1]) != "": print( str(file[0]) + " file succesfully created at: \"" + str(file[1]) + "\"")