Exemplo n.º 1
0
def main(filename, systemname, print_us, print_ont, statistics, link, prolog, per_role, threshold, base, weights):
	"""General class to run the entire program
	"""

	# Initialize spaCy just once (this takes most of the time...)
	print("Initializing Natural Language Processor . . .")
	start_nlp_time = timeit.default_timer()
	nlp = English()
	nlp_time = timeit.default_timer() - start_nlp_time

	start_parse_time = timeit.default_timer()
	miner = StoryMiner()

	# Read the input file
	set = Reader.parse(filename)
	us_id = 1

	# Keep track of all errors	
	success = 0
	fail = 0
	list_of_fails = []
	errors = ""
	c = Counter()

	# Keeps track of all succesfully created User Stories objects
	us_instances = []  
	failed_stories = []
	success_stories = []

	# Parse every user story (remove punctuation and mine)
	for s in set:
		try:
			user_story = parse(s, us_id, systemname, nlp, miner)
			user_story = c.count(user_story)
			success = success + 1
			us_instances.append(user_story)
			success_stories.append(s)
		except ValueError as err:
			failed_stories.append([us_id, s, err.args])
			errors += "\n[User Story " + str(us_id) + " ERROR] " + str(err.args[0]) + "! (\"" + " ".join(str.split(s)) + "\")"
			fail = fail + 1
		us_id = us_id + 1

	# Print errors (if found)
	if errors:
		Printer.print_head("PARSING ERRORS")
		print(errors)

	parse_time = timeit.default_timer() - start_parse_time

	# Generate the term-by-user story matrix (m), and additional data in two other matrices
	start_matr_time = timeit.default_timer()

	matrix = Matrix(base, weights)
	matrices = matrix.generate(us_instances, ' '.join(success_stories), nlp)
	m = matrices[0]
	count_matrix = matrices[1]
	stories_list = matrices[2]
	rme = matrices[3]

	matr_time = timeit.default_timer() - start_matr_time

	# Print details per user story, if argument '-u'/'--print_us' is chosen
	if print_us:
		print("Details:\n")
		for us in us_instances:
			Printer.print_us_data(us)

	# Generate the ontology
	start_gen_time = timeit.default_timer()
	
	patterns = Constructor(nlp, us_instances, m)
	out = patterns.make(systemname, threshold, link)
	output_ontology = out[0]
	output_prolog = out[1]
	output_ontobj = out[2]
	output_prologobj = out[3]
	onto_per_role = out[4]

	# Print out the ontology in the terminal, if argument '-o'/'--print_ont' is chosen
	if print_ont:
		Printer.print_head("MANCHESTER OWL")
		print(output_ontology)

	gen_time = timeit.default_timer() - start_gen_time

	# Gather statistics and print the results
	stats_time = 0
	if statistics:
		start_stats_time = timeit.default_timer()

		statsarr = Statistics.to_stats_array(us_instances)

		Printer.print_head("USER STORY STATISTICS")
		Printer.print_stats(statsarr[0], True)
		Printer.print_stats(statsarr[1], True)
		Printer.print_subhead("Term - by - User Story Matrix ( Terms w/ total weight 0 hidden )")
		hide_zero = m[(m['sum'] > 0)]
		print(hide_zero)

		stats_time = timeit.default_timer() - start_stats_time	

	# Write output files
	w = Writer()

	folder = "output/" + str(systemname)
	reports_folder = folder + "/reports"
	stats_folder = reports_folder + "/stats"

	outputfile = w.make_file(folder + "/ontology", str(systemname), "omn", output_ontology)
	files = [["Manchester Ontology", outputfile]]

	outputcsv = ""
	sent_outputcsv = ""
	matrixcsv = ""

	if statistics:
		outputcsv = w.make_file(stats_folder, str(systemname), "csv", statsarr[0])
		matrixcsv = w.make_file(stats_folder, str(systemname) + "-term_by_US_matrix", "csv", m)
		sent_outputcsv = w.make_file(stats_folder, str(systemname) + "-sentences", "csv", statsarr[1])
		files.append(["General statistics", outputcsv])
		files.append(["Term-by-User Story matrix", matrixcsv])
		files.append(["Sentence statistics", sent_outputcsv])
	if prolog:
		outputpl = w.make_file(folder + "/prolog", str(systemname), "pl", output_prolog)
		files.append(["Prolog", outputpl])
	if per_role:
		for o in onto_per_role:
			name = str(systemname) + "-" + str(o[0])
			pont = w.make_file(folder + "/ontology", name, "omn", o[1])
			files.append(["Individual Ontology for '" + str(o[0]) + "'", pont])

	# Print the used ontology generation settings
	Printer.print_gen_settings(matrix, base, threshold)

	# Print details of the generation
	Printer.print_details(fail, success, nlp_time, parse_time, matr_time, gen_time, stats_time)

	report_dict = {
		"stories": us_instances,
		"failed_stories": failed_stories,
		"systemname": systemname,
		"us_success": success,
		"us_fail": fail,
		"times": [["Initializing Natural Language Processor (<em>spaCy</em> v" + pkg_resources.get_distribution("spacy").version + ")" , nlp_time], ["Mining User Stories", parse_time], ["Creating Factor Matrix", matr_time], ["Generating Manchester Ontology", gen_time], ["Gathering statistics", stats_time]],
		"dir": os.path.dirname(os.path.realpath(__file__)),
		"inputfile": filename,
		"inputfile_lines": len(set),
		"outputfiles": files,
		"threshold": threshold,
		"base": base,
		"matrix": matrix,
		"weights": m['sum'].copy().reset_index().sort_values(['sum'], ascending=False).values.tolist(),
		"counts": count_matrix.reset_index().values.tolist(),
		"classes": output_ontobj.classes,
		"relationships": output_prologobj.relationships,
		"types": list(count_matrix.columns.values),
		"ontology": Utility.multiline(output_ontology)
	}

	# Finally, generate a report
	report = w.make_file(reports_folder, str(systemname) + "_REPORT", "html", generate_report(report_dict))
	files.append(["Report", report])

	# Print the location and name of all output files
	for file in files:
		if str(file[1]) != "":
			print(str(file[0]) + " file succesfully created at: \"" + str(file[1]) + "\"")
Exemplo n.º 2
0
def main(filename, systemname, print_us, print_ont, statistics, link, prolog,
         per_role, threshold, base, weights):
    """General class to run the entire program
	"""

    # Initialize spaCy just once (this takes most of the time...)
    print("Initializing Natural Language Processor . . .")
    start_nlp_time = timeit.default_timer()
    nlp = English()
    nlp_time = timeit.default_timer() - start_nlp_time

    start_parse_time = timeit.default_timer()
    miner = StoryMiner()

    # Read the input file
    set = Reader.parse(filename)
    us_id = 1

    # Keep track of all errors
    success = 0
    fail = 0
    list_of_fails = []
    errors = ""
    c = Counter()

    # Keeps track of all succesfully created User Stories objects
    us_instances = []
    failed_stories = []
    success_stories = []

    # Parse every user story (remove punctuation and mine)
    for s in set:
        try:
            user_story = parse(s, us_id, systemname, nlp, miner)
            user_story = c.count(user_story)
            success = success + 1
            us_instances.append(user_story)
            success_stories.append(s)
        except ValueError as err:
            failed_stories.append([us_id, s, err.args])
            errors += "\n[User Story " + str(us_id) + " ERROR] " + str(
                err.args[0]) + "! (\"" + " ".join(str.split(s)) + "\")"
            fail = fail + 1
        us_id = us_id + 1

    # Print errors (if found)
    if errors:
        Printer.print_head("PARSING ERRORS")
        print(errors)

    parse_time = timeit.default_timer() - start_parse_time

    # Generate the term-by-user story matrix (m), and additional data in two other matrices
    start_matr_time = timeit.default_timer()

    matrix = Matrix(base, weights)
    matrices = matrix.generate(us_instances, ' '.join(success_stories), nlp)
    m = matrices[0]
    count_matrix = matrices[1]
    stories_list = matrices[2]
    rme = matrices[3]

    matr_time = timeit.default_timer() - start_matr_time

    # Print details per user story, if argument '-u'/'--print_us' is chosen
    if print_us:
        print("Details:\n")
        for us in us_instances:
            Printer.print_us_data(us)

    # Generate the ontology
    start_gen_time = timeit.default_timer()

    patterns = Constructor(nlp, us_instances, m)
    out = patterns.make(systemname, threshold, link)
    output_ontology = out[0]
    output_prolog = out[1]
    output_ontobj = out[2]
    output_prologobj = out[3]
    onto_per_role = out[4]

    # Print out the ontology in the terminal, if argument '-o'/'--print_ont' is chosen
    if print_ont:
        Printer.print_head("MANCHESTER OWL")
        print(output_ontology)

    gen_time = timeit.default_timer() - start_gen_time

    # Gather statistics and print the results
    stats_time = 0
    if statistics:
        start_stats_time = timeit.default_timer()

        statsarr = Statistics.to_stats_array(us_instances)

        Printer.print_head("USER STORY STATISTICS")
        Printer.print_stats(statsarr[0], True)
        Printer.print_stats(statsarr[1], True)
        Printer.print_subhead(
            "Term - by - User Story Matrix ( Terms w/ total weight 0 hidden )")
        hide_zero = m[(m['sum'] > 0)]
        print(hide_zero)

        stats_time = timeit.default_timer() - start_stats_time

    # Write output files
    w = Writer()

    folder = "output/" + str(systemname)
    reports_folder = folder + "/reports"
    stats_folder = reports_folder + "/stats"

    outputfile = w.make_file(folder + "/ontology", str(systemname), "omn",
                             output_ontology)
    files = [["Manchester Ontology", outputfile]]

    outputcsv = ""
    sent_outputcsv = ""
    matrixcsv = ""

    if statistics:
        outputcsv = w.make_file(stats_folder, str(systemname), "csv",
                                statsarr[0])
        matrixcsv = w.make_file(stats_folder,
                                str(systemname) + "-term_by_US_matrix", "csv",
                                m)
        sent_outputcsv = w.make_file(stats_folder,
                                     str(systemname) + "-sentences", "csv",
                                     statsarr[1])
        files.append(["General statistics", outputcsv])
        files.append(["Term-by-User Story matrix", matrixcsv])
        files.append(["Sentence statistics", sent_outputcsv])
    if prolog:
        outputpl = w.make_file(folder + "/prolog", str(systemname), "pl",
                               output_prolog)
        files.append(["Prolog", outputpl])
    if per_role:
        for o in onto_per_role:
            name = str(systemname) + "-" + str(o[0])
            pont = w.make_file(folder + "/ontology", name, "omn", o[1])
            files.append(["Individual Ontology for '" + str(o[0]) + "'", pont])

    # Print the used ontology generation settings
    Printer.print_gen_settings(matrix, base, threshold)

    # Print details of the generation
    Printer.print_details(fail, success, nlp_time, parse_time, matr_time,
                          gen_time, stats_time)

    report_dict = {
        "stories":
        us_instances,
        "failed_stories":
        failed_stories,
        "systemname":
        systemname,
        "us_success":
        success,
        "us_fail":
        fail,
        "times": [[
            "Initializing Natural Language Processor (<em>spaCy</em> v" +
            pkg_resources.get_distribution("spacy").version + ")", nlp_time
        ], ["Mining User Stories", parse_time],
                  ["Creating Factor Matrix", matr_time],
                  ["Generating Manchester Ontology", gen_time],
                  ["Gathering statistics", stats_time]],
        "dir":
        os.path.dirname(os.path.realpath(__file__)),
        "inputfile":
        filename,
        "inputfile_lines":
        len(set),
        "outputfiles":
        files,
        "threshold":
        threshold,
        "base":
        base,
        "matrix":
        matrix,
        "weights":
        m['sum'].copy().reset_index().sort_values(
            ['sum'], ascending=False).values.tolist(),
        "counts":
        count_matrix.reset_index().values.tolist(),
        "classes":
        output_ontobj.classes,
        "relationships":
        output_prologobj.relationships,
        "types":
        list(count_matrix.columns.values),
        "ontology":
        Utility.multiline(output_ontology)
    }

    # Finally, generate a report
    report = w.make_file(reports_folder,
                         str(systemname) + "_REPORT", "html",
                         generate_report(report_dict))
    files.append(["Report", report])

    # Print the location and name of all output files
    for file in files:
        if str(file[1]) != "":
            print(
                str(file[0]) + " file succesfully created at: \"" +
                str(file[1]) + "\"")