Пример #1
0
	def get_parts(self, class_name, story):
		case = class_name.split()

		means_compounds = []
		means_compounds.append(story.means.main_object.compound)
		ends_compounds = story.ends.compounds

		if story.means.free_form:
			if len(story.means.compounds) > 0:
				if type(story.means.compounds[0]) is list:
					mc = [item for item in sublist for sublist in story.means.compounds]
				else:
					mc = story.means.compounds
				means_compounds.extend(mc)
			
		if len(ends_compounds) > 0:
			if type(ends_compounds[0]) is list:
				ends_compounds = [item for item in sublist for sublist in story.ends.compounds]

		role = []
		means = []
		ends = []
		rme = []

		for token in story.data:
			if token in story.role.text:
				if len(case) != 1:
					role.append(NLPUtility.case(token))
				elif token not in story.role.functional_role.compound:
					role.append(NLPUtility.case(token))
			if token in story.means.text:
				if len(case) != 1:
					means.append(NLPUtility.case(token))
				elif token not in means_compounds:
					means.append(NLPUtility.case(token))
			if story.has_ends:
				if token in story.ends.text:
					if len(case) != 1:
						ends.append(NLPUtility.case(token))
					elif token not in ends_compounds:
						ends.append(NLPUtility.case(token))

		if Utility.is_sublist(case, role):
			rme.append('Role')

		if Utility.is_sublist(case, means):
			rme.append('Means')

		if Utility.is_sublist(case, ends):
			rme.append('Ends')

		return rme
Пример #2
0
	def to_stats_array(stories):
		stats = []
		sent_stats = []

		if stories:
			header = ['US_ID', 'User_Story', 'Words', 'Verbs', 'Nouns', 'NPs', 'Ind_R', 'Ind_M', 'Ind_E', 'FR_Type', 'MV_Type', 'DO_Type']
			stats.append(header)
			sent_header = ['US_ID', 'Role_NP', 'Role_Struct', 'Role_Struct_Detail', 'Means_NP', 'Means_Struct', 'Means_Struct_Detail', 'Ends_NP', 'Ends_Struct', 'Ends_Struct_Detail']
			sent_stats.append(sent_header)

		for us in stories:
			stats.append([us.number, us.text, us.stats.words, us.stats.verbs, us.stats.nouns, us.stats.noun_phrases, us.stats.indicators.role, us.stats.indicators.means, us.stats.indicators.ends, us.stats.fr_type, us.stats.mv_type, us.stats.do_type])
			sent_stats.append([us.number, Utility.text(us.stats.role.nps), Utility.text(us.stats.role.general), Utility.text(us.stats.role.detail), Utility.text(us.stats.means.nps), Utility.text(us.stats.means.general), Utility.text(us.stats.means.detail), Utility.text(us.stats.means.nps), Utility.text(us.stats.means.general), Utility.text(us.stats.means.detail)])

		return stats, sent_stats
Пример #3
0
	def get_noun_phrases(story, span, part='data'):
		phrases = []
		
		for chunk in eval('story.' + str(part) + '.noun_chunks'):
			chunk = MinerUtility.get_span(story, chunk)
			if Utility.is_sublist(chunk, span):
				phrases.append(MinerUtility.get_span(story, chunk))

		return phrases
Пример #4
0
	def get_noun_phrases(story, span, part='data'):
		phrases = []
		
		for chunk in eval('story.' + str(part) + '.noun_chunks'):
			chunk = MinerUtility.get_span(story, chunk)
			if Utility.is_sublist(chunk, span):
				phrases.append(MinerUtility.get_span(story, chunk))

		return phrases
Пример #5
0
def parse(text, id, systemname, nlp, miner):
	"""Create a new user story object and mines it to map all data in the user story text to a predefined model
	
	:param text: The user story text
	:param id: The user story ID, which can later be used to identify the user story
	:param systemname: Name of the system this user story belongs to
	:param nlp: Natural Language Processor (spaCy)
	:param miner: instance of class Miner
	:returns: A new user story object
	"""
	no_punct = Utility.remove_punct(text)
	no_double_space = ' '.join(no_punct.split())
	doc = nlp(no_double_space)
	user_story = UserStory(id, text, no_double_space)
	user_story.system.main = nlp(systemname)[0]
	user_story.data = doc
	#Printer.print_dependencies(user_story)
	#Printer.print_noun_phrases(user_story)
	miner.structure(user_story)
	user_story.old_data = user_story.data
	user_story.data = nlp(user_story.sentence)
	miner.mine(user_story, nlp)
	return user_story
Пример #6
0
def parse(text, id, systemname, nlp, miner):
    """Create a new user story object and mines it to map all data in the user story text to a predefined model
	
	:param text: The user story text
	:param id: The user story ID, which can later be used to identify the user story
	:param systemname: Name of the system this user story belongs to
	:param nlp: Natural Language Processor (spaCy)
	:param miner: instance of class Miner
	:returns: A new user story object
	"""
    no_punct = Utility.remove_punct(text)
    no_double_space = ' '.join(no_punct.split())
    doc = nlp(no_double_space)
    user_story = UserStory(id, text, no_double_space)
    user_story.system.main = nlp(systemname)[0]
    user_story.data = doc
    #Printer.print_dependencies(user_story)
    #Printer.print_noun_phrases(user_story)
    miner.structure(user_story)
    user_story.old_data = user_story.data
    user_story.data = nlp(user_story.sentence)
    miner.mine(user_story, nlp)
    return user_story
Пример #7
0
    def to_stats_array(stories):
        stats = []
        sent_stats = []

        if stories:
            header = [
                "US_ID",
                "User_Story",
                "Words",
                "Verbs",
                "Nouns",
                "NPs",
                "Ind_R",
                "Ind_M",
                "Ind_E",
                "FR_Type",
                "MV_Type",
                "DO_Type",
            ]
            stats.append(header)
            sent_header = [
                "US_ID",
                "Role_NP",
                "Role_Struct",
                "Role_Struct_Detail",
                "Means_NP",
                "Means_Struct",
                "Means_Struct_Detail",
                "Ends_NP",
                "Ends_Struct",
                "Ends_Struct_Detail",
            ]
            sent_stats.append(sent_header)

        for us in stories:
            stats.append(
                [
                    us.number,
                    us.text,
                    us.stats.words,
                    us.stats.verbs,
                    us.stats.nouns,
                    us.stats.noun_phrases,
                    us.stats.indicators.role,
                    us.stats.indicators.means,
                    us.stats.indicators.ends,
                    us.stats.fr_type,
                    us.stats.mv_type,
                    us.stats.do_type,
                ]
            )
            sent_stats.append(
                [
                    us.number,
                    Utility.text(us.stats.role.nps),
                    Utility.text(us.stats.role.general),
                    Utility.text(us.stats.role.detail),
                    Utility.text(us.stats.means.nps),
                    Utility.text(us.stats.means.general),
                    Utility.text(us.stats.means.detail),
                    Utility.text(us.stats.means.nps),
                    Utility.text(us.stats.means.general),
                    Utility.text(us.stats.means.detail),
                ]
            )

        return stats, sent_stats
Пример #8
0
def main(filename, systemname, print_us, print_ont, statistics, link, prolog, per_role, threshold, base, weights):
	"""General class to run the entire program
	"""

	# Initialize spaCy just once (this takes most of the time...)
	print("Initializing Natural Language Processor . . .")
	start_nlp_time = timeit.default_timer()
	nlp = English()
	nlp_time = timeit.default_timer() - start_nlp_time

	start_parse_time = timeit.default_timer()
	miner = StoryMiner()

	# Read the input file
	set = Reader.parse(filename)
	us_id = 1

	# Keep track of all errors	
	success = 0
	fail = 0
	list_of_fails = []
	errors = ""
	c = Counter()

	# Keeps track of all succesfully created User Stories objects
	us_instances = []  
	failed_stories = []
	success_stories = []

	# Parse every user story (remove punctuation and mine)
	for s in set:
		try:
			user_story = parse(s, us_id, systemname, nlp, miner)
			user_story = c.count(user_story)
			success = success + 1
			us_instances.append(user_story)
			success_stories.append(s)
		except ValueError as err:
			failed_stories.append([us_id, s, err.args])
			errors += "\n[User Story " + str(us_id) + " ERROR] " + str(err.args[0]) + "! (\"" + " ".join(str.split(s)) + "\")"
			fail = fail + 1
		us_id = us_id + 1

	# Print errors (if found)
	if errors:
		Printer.print_head("PARSING ERRORS")
		print(errors)

	parse_time = timeit.default_timer() - start_parse_time

	# Generate the term-by-user story matrix (m), and additional data in two other matrices
	start_matr_time = timeit.default_timer()

	matrix = Matrix(base, weights)
	matrices = matrix.generate(us_instances, ' '.join(success_stories), nlp)
	m = matrices[0]
	count_matrix = matrices[1]
	stories_list = matrices[2]
	rme = matrices[3]

	matr_time = timeit.default_timer() - start_matr_time

	# Print details per user story, if argument '-u'/'--print_us' is chosen
	if print_us:
		print("Details:\n")
		for us in us_instances:
			Printer.print_us_data(us)

	# Generate the ontology
	start_gen_time = timeit.default_timer()
	
	patterns = Constructor(nlp, us_instances, m)
	out = patterns.make(systemname, threshold, link)
	output_ontology = out[0]
	output_prolog = out[1]
	output_ontobj = out[2]
	output_prologobj = out[3]
	onto_per_role = out[4]

	# Print out the ontology in the terminal, if argument '-o'/'--print_ont' is chosen
	if print_ont:
		Printer.print_head("MANCHESTER OWL")
		print(output_ontology)

	gen_time = timeit.default_timer() - start_gen_time

	# Gather statistics and print the results
	stats_time = 0
	if statistics:
		start_stats_time = timeit.default_timer()

		statsarr = Statistics.to_stats_array(us_instances)

		Printer.print_head("USER STORY STATISTICS")
		Printer.print_stats(statsarr[0], True)
		Printer.print_stats(statsarr[1], True)
		Printer.print_subhead("Term - by - User Story Matrix ( Terms w/ total weight 0 hidden )")
		hide_zero = m[(m['sum'] > 0)]
		print(hide_zero)

		stats_time = timeit.default_timer() - start_stats_time	

	# Write output files
	w = Writer()

	folder = "output/" + str(systemname)
	reports_folder = folder + "/reports"
	stats_folder = reports_folder + "/stats"

	outputfile = w.make_file(folder + "/ontology", str(systemname), "omn", output_ontology)
	files = [["Manchester Ontology", outputfile]]

	outputcsv = ""
	sent_outputcsv = ""
	matrixcsv = ""

	if statistics:
		outputcsv = w.make_file(stats_folder, str(systemname), "csv", statsarr[0])
		matrixcsv = w.make_file(stats_folder, str(systemname) + "-term_by_US_matrix", "csv", m)
		sent_outputcsv = w.make_file(stats_folder, str(systemname) + "-sentences", "csv", statsarr[1])
		files.append(["General statistics", outputcsv])
		files.append(["Term-by-User Story matrix", matrixcsv])
		files.append(["Sentence statistics", sent_outputcsv])
	if prolog:
		outputpl = w.make_file(folder + "/prolog", str(systemname), "pl", output_prolog)
		files.append(["Prolog", outputpl])
	if per_role:
		for o in onto_per_role:
			name = str(systemname) + "-" + str(o[0])
			pont = w.make_file(folder + "/ontology", name, "omn", o[1])
			files.append(["Individual Ontology for '" + str(o[0]) + "'", pont])

	# Print the used ontology generation settings
	Printer.print_gen_settings(matrix, base, threshold)

	# Print details of the generation
	Printer.print_details(fail, success, nlp_time, parse_time, matr_time, gen_time, stats_time)

	report_dict = {
		"stories": us_instances,
		"failed_stories": failed_stories,
		"systemname": systemname,
		"us_success": success,
		"us_fail": fail,
		"times": [["Initializing Natural Language Processor (<em>spaCy</em> v" + pkg_resources.get_distribution("spacy").version + ")" , nlp_time], ["Mining User Stories", parse_time], ["Creating Factor Matrix", matr_time], ["Generating Manchester Ontology", gen_time], ["Gathering statistics", stats_time]],
		"dir": os.path.dirname(os.path.realpath(__file__)),
		"inputfile": filename,
		"inputfile_lines": len(set),
		"outputfiles": files,
		"threshold": threshold,
		"base": base,
		"matrix": matrix,
		"weights": m['sum'].copy().reset_index().sort_values(['sum'], ascending=False).values.tolist(),
		"counts": count_matrix.reset_index().values.tolist(),
		"classes": output_ontobj.classes,
		"relationships": output_prologobj.relationships,
		"types": list(count_matrix.columns.values),
		"ontology": Utility.multiline(output_ontology)
	}

	# Finally, generate a report
	report = w.make_file(reports_folder, str(systemname) + "_REPORT", "html", generate_report(report_dict))
	files.append(["Report", report])

	# Print the location and name of all output files
	for file in files:
		if str(file[1]) != "":
			print(str(file[0]) + " file succesfully created at: \"" + str(file[1]) + "\"")
Пример #9
0
def main(filename, systemname, print_us, print_ont, statistics, link, prolog,
         per_role, threshold, base, weights):
    """General class to run the entire program
	"""

    # Initialize spaCy just once (this takes most of the time...)
    print("Initializing Natural Language Processor . . .")
    start_nlp_time = timeit.default_timer()
    nlp = English()
    nlp_time = timeit.default_timer() - start_nlp_time

    start_parse_time = timeit.default_timer()
    miner = StoryMiner()

    # Read the input file
    set = Reader.parse(filename)
    us_id = 1

    # Keep track of all errors
    success = 0
    fail = 0
    list_of_fails = []
    errors = ""
    c = Counter()

    # Keeps track of all succesfully created User Stories objects
    us_instances = []
    failed_stories = []
    success_stories = []

    # Parse every user story (remove punctuation and mine)
    for s in set:
        try:
            user_story = parse(s, us_id, systemname, nlp, miner)
            user_story = c.count(user_story)
            success = success + 1
            us_instances.append(user_story)
            success_stories.append(s)
        except ValueError as err:
            failed_stories.append([us_id, s, err.args])
            errors += "\n[User Story " + str(us_id) + " ERROR] " + str(
                err.args[0]) + "! (\"" + " ".join(str.split(s)) + "\")"
            fail = fail + 1
        us_id = us_id + 1

    # Print errors (if found)
    if errors:
        Printer.print_head("PARSING ERRORS")
        print(errors)

    parse_time = timeit.default_timer() - start_parse_time

    # Generate the term-by-user story matrix (m), and additional data in two other matrices
    start_matr_time = timeit.default_timer()

    matrix = Matrix(base, weights)
    matrices = matrix.generate(us_instances, ' '.join(success_stories), nlp)
    m = matrices[0]
    count_matrix = matrices[1]
    stories_list = matrices[2]
    rme = matrices[3]

    matr_time = timeit.default_timer() - start_matr_time

    # Print details per user story, if argument '-u'/'--print_us' is chosen
    if print_us:
        print("Details:\n")
        for us in us_instances:
            Printer.print_us_data(us)

    # Generate the ontology
    start_gen_time = timeit.default_timer()

    patterns = Constructor(nlp, us_instances, m)
    out = patterns.make(systemname, threshold, link)
    output_ontology = out[0]
    output_prolog = out[1]
    output_ontobj = out[2]
    output_prologobj = out[3]
    onto_per_role = out[4]

    # Print out the ontology in the terminal, if argument '-o'/'--print_ont' is chosen
    if print_ont:
        Printer.print_head("MANCHESTER OWL")
        print(output_ontology)

    gen_time = timeit.default_timer() - start_gen_time

    # Gather statistics and print the results
    stats_time = 0
    if statistics:
        start_stats_time = timeit.default_timer()

        statsarr = Statistics.to_stats_array(us_instances)

        Printer.print_head("USER STORY STATISTICS")
        Printer.print_stats(statsarr[0], True)
        Printer.print_stats(statsarr[1], True)
        Printer.print_subhead(
            "Term - by - User Story Matrix ( Terms w/ total weight 0 hidden )")
        hide_zero = m[(m['sum'] > 0)]
        print(hide_zero)

        stats_time = timeit.default_timer() - start_stats_time

    # Write output files
    w = Writer()

    folder = "output/" + str(systemname)
    reports_folder = folder + "/reports"
    stats_folder = reports_folder + "/stats"

    outputfile = w.make_file(folder + "/ontology", str(systemname), "omn",
                             output_ontology)
    files = [["Manchester Ontology", outputfile]]

    outputcsv = ""
    sent_outputcsv = ""
    matrixcsv = ""

    if statistics:
        outputcsv = w.make_file(stats_folder, str(systemname), "csv",
                                statsarr[0])
        matrixcsv = w.make_file(stats_folder,
                                str(systemname) + "-term_by_US_matrix", "csv",
                                m)
        sent_outputcsv = w.make_file(stats_folder,
                                     str(systemname) + "-sentences", "csv",
                                     statsarr[1])
        files.append(["General statistics", outputcsv])
        files.append(["Term-by-User Story matrix", matrixcsv])
        files.append(["Sentence statistics", sent_outputcsv])
    if prolog:
        outputpl = w.make_file(folder + "/prolog", str(systemname), "pl",
                               output_prolog)
        files.append(["Prolog", outputpl])
    if per_role:
        for o in onto_per_role:
            name = str(systemname) + "-" + str(o[0])
            pont = w.make_file(folder + "/ontology", name, "omn", o[1])
            files.append(["Individual Ontology for '" + str(o[0]) + "'", pont])

    # Print the used ontology generation settings
    Printer.print_gen_settings(matrix, base, threshold)

    # Print details of the generation
    Printer.print_details(fail, success, nlp_time, parse_time, matr_time,
                          gen_time, stats_time)

    report_dict = {
        "stories":
        us_instances,
        "failed_stories":
        failed_stories,
        "systemname":
        systemname,
        "us_success":
        success,
        "us_fail":
        fail,
        "times": [[
            "Initializing Natural Language Processor (<em>spaCy</em> v" +
            pkg_resources.get_distribution("spacy").version + ")", nlp_time
        ], ["Mining User Stories", parse_time],
                  ["Creating Factor Matrix", matr_time],
                  ["Generating Manchester Ontology", gen_time],
                  ["Gathering statistics", stats_time]],
        "dir":
        os.path.dirname(os.path.realpath(__file__)),
        "inputfile":
        filename,
        "inputfile_lines":
        len(set),
        "outputfiles":
        files,
        "threshold":
        threshold,
        "base":
        base,
        "matrix":
        matrix,
        "weights":
        m['sum'].copy().reset_index().sort_values(
            ['sum'], ascending=False).values.tolist(),
        "counts":
        count_matrix.reset_index().values.tolist(),
        "classes":
        output_ontobj.classes,
        "relationships":
        output_prologobj.relationships,
        "types":
        list(count_matrix.columns.values),
        "ontology":
        Utility.multiline(output_ontology)
    }

    # Finally, generate a report
    report = w.make_file(reports_folder,
                         str(systemname) + "_REPORT", "html",
                         generate_report(report_dict))
    files.append(["Report", report])

    # Print the location and name of all output files
    for file in files:
        if str(file[1]) != "":
            print(
                str(file[0]) + " file succesfully created at: \"" +
                str(file[1]) + "\"")