Пример #1
0
    def _get_gen(self, us_instances, m, systemname, print_ont, **kw):
        patterns = Constructor(self.nlp, us_instances, m)
        out = patterns.make(systemname, self.threshold, self.link)

        # Print out the ontology in the terminal, if argument '-o'/'--print_ont' is chosen
        if print_ont:
            Printer._print_head("MANCHESTER OWL")
            print(out[0])

        return out
Пример #2
0
    def gen_concept_model(self):
        nlp = self.nlp

        miner = StoryMiner()
        counter = Counter()

        # Keep tracking the number of successes and fails
        success = 0
        fail = 0

        # Keeps tracking of all success and failed User Stories
        user_stories_lst = []
        failed_stories_lst = []
        success_stories_lst = []

        us_id = 1

        # Parse every user story (remove punctuation and mine)
        for story_line in self.messages:
            try:
                user_story = UserStory.parse(story_line, us_id,
                                             self.system_name, nlp, miner)
                user_story = counter.count(user_story)
                success = success + 1
                user_stories_lst.append(user_story)
                success_stories_lst.append(story_line)
            except ValueError as err:
                failed_stories_lst.append([us_id, story_line, err.args])
                fail = fail + 1
            us_id = us_id + 1

        # Generate the term-by-user story matrix (m), and additional data in two other matrices
        matrix = Matrix(self.base_weight, self.weights)
        matrices = matrix.generate(
            user_stories_lst, ' '.join([u.sentence for u in user_stories_lst]),
            nlp)
        m, count_matrix, stories_list, rme = matrices

        # Generate the ontology

        patterns = Constructor(nlp, user_stories_lst, m)
        out = patterns.make(self.system_name, self.threshold, self.link)
        output_ontology, output_prolog, output_ontobj, output_prologobj, onto_per_role = out

        # Return objects so that they can be used as input for other tools
        return {
            'stories': user_stories_lst,
            'ontology': output_ontology,
            'prolog': output_prolog,
            'matrix': m
        }
Пример #3
0
def main(filename, systemname, print_us, print_ont, statistics, link, prolog,
         json, per_role, threshold, base, weights, spacy_nlp):
    """General class to run the entire program
	"""

    start_nlp_time = timeit.default_timer()
    nlp = spacy_nlp
    nlp_time = timeit.default_timer() - start_nlp_time

    start_parse_time = timeit.default_timer()
    miner = StoryMiner()

    # Read the input file
    set = Reader.parse(filename)
    us_id = 1

    # Keep track of all errors
    success = 0
    fail = 0
    list_of_fails = []
    errors = ""
    c = Counter()

    # Keeps track of all succesfully created User Stories objects
    us_instances = []
    failed_stories = []
    success_stories = []

    # Parse every user story (remove punctuation and mine)
    for s in set:
        try:
            user_story = parse(s, us_id, systemname, nlp, miner)
            user_story = c.count(user_story)
            success = success + 1
            us_instances.append(user_story)
            success_stories.append(s)
        except ValueError as err:
            failed_stories.append([us_id, s, err.args])
            errors += "\n[User Story " + str(us_id) + " ERROR] " + str(
                err.args[0]) + "! (\"" + " ".join(str.split(s)) + "\")"
            fail = fail + 1
        us_id = us_id + 1

    # Print errors (if found)
    if errors:
        Printer.print_head("PARSING ERRORS")
        print(errors)

    parse_time = timeit.default_timer() - start_parse_time

    # Generate the term-by-user story matrix (m), and additional data in two other matrices
    start_matr_time = timeit.default_timer()

    matrix = Matrix(base, weights)
    matrices = matrix.generate(us_instances,
                               ' '.join([u.sentence for u in us_instances]),
                               nlp)
    m, count_matrix, stories_list, rme = matrices

    matr_time = timeit.default_timer() - start_matr_time

    # Print details per user story, if argument '-u'/'--print_us' is chosen
    if print_us:
        print("Details:\n")
        for us in us_instances:
            Printer.print_us_data(us)

    # Generate the ontology
    start_gen_time = timeit.default_timer()

    patterns = Constructor(nlp, us_instances, m)
    out = patterns.make(systemname, threshold, link)
    output_ontology, output_prolog, output_ontobj, output_prologobj, onto_per_role = out

    # Print out the ontology in the terminal, if argument '-o'/'--print_ont' is chosen
    if print_ont:
        Printer.print_head("MANCHESTER OWL")
        print(output_ontology)

    gen_time = timeit.default_timer() - start_gen_time

    # Gather statistics and print the results
    stats_time = 0
    if statistics:
        start_stats_time = timeit.default_timer()

        statsarr = Statistics.to_stats_array(us_instances)

        Printer.print_head("USER STORY STATISTICS")
        Printer.print_stats(statsarr[0], True)
        Printer.print_stats(statsarr[1], True)
        Printer.print_subhead(
            "Term - by - User Story Matrix ( Terms w/ total weight 0 hidden )")
        hide_zero = m[(m['sum'] > 0)]
        print(hide_zero)

        stats_time = timeit.default_timer() - start_stats_time

    # Write output files
    w = Writer()

    folder = "output/" + str(systemname)
    reports_folder = folder + "/reports"
    stats_folder = reports_folder + "/stats"

    outputfile = w.make_file(folder + "/ontology", str(systemname), "omn",
                             output_ontology)
    files = [["Manchester Ontology", outputfile]]

    outputcsv = ""
    sent_outputcsv = ""
    matrixcsv = ""

    if statistics:
        files.append([
            "General statistics",
            w.make_file(stats_folder, str(systemname), "csv", statsarr[0])
        ])
        files.append([
            "Term-by-User Story matrix",
            w.make_file(stats_folder,
                        str(systemname) + "-term_by_US_matrix", "csv", m)
        ])
        files.append([
            "Sentence statistics",
            w.make_file(stats_folder,
                        str(systemname) + "-sentences", "csv", statsarr[1])
        ])
    if prolog:
        files.append([
            "Prolog",
            w.make_file(folder + "/prolog", str(systemname), "pl",
                        output_prolog)
        ])
    if json:
        output_json_li = [str(us.toJSON()) for us in us_instances]
        output_json = "\n".join(output_json_li)
        files.append([
            "JSON",
            w.make_file(folder + "/json",
                        str(systemname) + "-user_stories", "json", output_json)
        ])
    if per_role:
        for o in onto_per_role:
            files.append([
                "Individual Ontology for '" + str(o[0]) + "'",
                w.make_file(folder + "/ontology",
                            str(systemname) + "-" + str(o[0]), "omn", o[1])
            ])

    # Print the used ontology generation settings
    Printer.print_gen_settings(matrix, base, threshold)

    # Print details of the generation
    Printer.print_details(fail, success, nlp_time, parse_time, matr_time,
                          gen_time, stats_time)

    report_dict = {
        "stories":
        us_instances,
        "failed_stories":
        failed_stories,
        "systemname":
        systemname,
        "us_success":
        success,
        "us_fail":
        fail,
        "times": [[
            "Initializing Natural Language Processor (<em>spaCy</em> v" +
            pkg_resources.get_distribution("spacy").version + ")", nlp_time
        ], ["Mining User Stories", parse_time],
                  ["Creating Factor Matrix", matr_time],
                  ["Generating Manchester Ontology", gen_time],
                  ["Gathering statistics", stats_time]],
        "dir":
        os.path.dirname(os.path.realpath(__file__)),
        "inputfile":
        filename,
        "inputfile_lines":
        len(set),
        "outputfiles":
        files,
        "threshold":
        threshold,
        "base":
        base,
        "matrix":
        matrix,
        "weights":
        m['sum'].copy().reset_index().sort_values(
            ['sum'], ascending=False).values.tolist(),
        "counts":
        count_matrix.reset_index().values.tolist(),
        "classes":
        output_ontobj.classes,
        "relationships":
        output_prologobj.relationships,
        "types":
        list(count_matrix.columns.values),
        "ontology":
        multiline(output_ontology)
    }

    # Finally, generate a report
    report = w.make_file(reports_folder,
                         str(systemname) + "_REPORT", "html",
                         generate_report(report_dict))
    files.append(["Report", report])

    # Print the location and name of all output files
    for file in files:
        if str(file[1]) != "":
            print(
                str(file[0]) + " file succesfully created at: \"" +
                str(file[1]) + "\"")

    # Return objects so that they can be used as input for other tools
    return {
        'us_instances': us_instances,
        'output_ontobj': output_ontobj,
        'output_prologobj': output_prologobj,
        'matrix': m
    }
Пример #4
0
def main(filename, systemname, print_us, print_ont, statistics, link, prolog,
         json, per_role, threshold, base, weights, spacy_nlp):
    """General class to run the entire program
	"""

    start_nlp_time = timeit.default_timer()
    nlp = spacy_nlp
    nlp_time = timeit.default_timer() - start_nlp_time

    start_parse_time = timeit.default_timer()
    miner = StoryMiner()

    # Read the input file
    set = Reader.parse(filename)
    us_id = 1

    # Keep track of all errors
    success = 0
    fail = 0
    list_of_fails = []
    errors = ""
    c = Counter()

    # Keeps track of all succesfully created User Stories objects
    us_instances = []
    failed_stories = []
    success_stories = []

    # Parse every user story (remove punctuation and mine)
    for s in set:
        try:
            user_story = parse(s, us_id, systemname, nlp, miner)
            user_story = c.count(user_story)
            success = success + 1
            us_instances.append(user_story)
            success_stories.append(s)
        except ValueError as err:
            failed_stories.append([us_id, s, err.args])
            errors += "\n[User Story " + str(us_id) + " ERROR] " + str(
                err.args[0]) + "! (\"" + " ".join(str.split(s)) + "\")"
            fail = fail + 1
        us_id = us_id + 1

    # Print errors (if found)
    if errors:
        Printer.print_head("PARSING ERRORS")
        print(errors)

    parse_time = timeit.default_timer() - start_parse_time

    # Generate the term-by-user story matrix (m), and additional data in two other matrices
    start_matr_time = timeit.default_timer()

    matrix = Matrix(base, weights)
    matrices = matrix.generate(us_instances,
                               ' '.join([u.sentence for u in us_instances]),
                               nlp)
    m, count_matrix, stories_list, rme = matrices

    matr_time = timeit.default_timer() - start_matr_time

    # Generate the ontology
    start_gen_time = timeit.default_timer()

    patterns = Constructor(nlp, us_instances, m)
    out = patterns.make(systemname, threshold, link)
    output_ontology, output_prolog, output_ontobj, output_prologobj, onto_per_role = out

    all_classes_list = []
    i = 0
    for class_vn in output_ontobj.classes:
        one_concept = {
            'id': i,
            'class_name': class_vn.name,
            'parent_name': class_vn.parent,
            'occurs_in': occurence_list(class_vn.stories),
            'weight': '0',
            'group': class_vn.is_role
        }
        all_classes_list.append(one_concept)
        i += 1
    nodes = [{
        "id": cl["id"],
        "label": cl["class_name"],
        "weight": cl["weight"]
    } for cl in all_classes_list]
    relationships_query = output_prologobj.relationships

    all_relationships_list = []
    for relationship in relationships_query:
        one_concept = {
            'relationship_domain': relationship.domain,
            'relationship_name': relationship.name,
            'relationship_range': relationship.range
        }
        all_relationships_list.append(one_concept)

    edges_id_list = []
    concepts_query = []
    concepts_dict = {}
    concepts_dict_list = []
    relationshipslist = []
    i = 0
    for class_vn in all_classes_list:

        one_concept = {
            'class_id': i,
            'class_name': class_vn['class_name'],
            'parent_name': class_vn['parent_name'],
            'weight': '0',
            'group': class_vn['group']
        }
        concepts_query.append(one_concept)
        i += 1
    for concept in concepts_query:
        # print(concept)
        concepts_dict[concept['class_id']] = concept['class_name']
        concepts_dict_list.append([concept['class_id'], concept['class_name']])
    i = 0
    for rel in all_relationships_list:  #app.py 868
        # print(rel)
        relationshipslist.append([
            rel['relationship_domain'], rel['relationship_range'],
            rel['relationship_name']
        ])
        for concept in concepts_dict_list:
            if rel['relationship_domain'] == concept[1]:
                x = concept[0]

        for concept in concepts_dict_list:
            if rel['relationship_range'] == concept[1]:
                y = concept[0]

        if rel['relationship_name'] == 'isa':
            edges_id_dict = {
                'from': x,
                'to': y,
                'label': rel['relationship_name'],
                'dashes': "true"
            }
        else:
            edges_id_dict = {
                'from': x,
                'to': y,
                'label': rel['relationship_name']
            }
        i += 1
        # ELSE??
        edges_id_list.append(edges_id_dict)

    print({'nodes': nodes, 'edges': edges_id_list})
    return ({'nodes': nodes, 'edges': edges_id_list})
Пример #5
0
def main(filename, systemname, print_us, print_ont, statistics, link, prolog,
         json, per_role, threshold, base, weights, spacy_nlp):
    """General class to run the entire program
	"""

    start_nlp_time = timeit.default_timer()
    nlp = spacy_nlp
    nlp_time = timeit.default_timer() - start_nlp_time

    start_parse_time = timeit.default_timer()
    miner = StoryMiner()

    # Read the input file
    set = Reader.parse(filename)
    us_id = 1

    # Keep track of all errors
    success = 0
    fail = 0
    list_of_fails = []
    errors = ""
    c = Counter()

    # Keeps track of all succesfully created User Stories objects
    us_instances = []
    failed_stories = []
    success_stories = []

    # Parse every user story (remove punctuation and mine)
    for s in set:
        try:
            user_story = parse(s, us_id, systemname, nlp, miner)
            user_story = c.count(user_story)
            success = success + 1
            us_instances.append(user_story)
            success_stories.append(s)
        except ValueError as err:
            failed_stories.append([us_id, s, err.args])
            errors += "\n[User Story " + str(us_id) + " ERROR] " + str(
                err.args[0]) + "! (\"" + " ".join(str.split(s)) + "\")"
            fail = fail + 1
        us_id = us_id + 1

    # Print errors (if found)
    if errors:
        Printer.print_head("PARSING ERRORS")
        print(errors)

    parse_time = timeit.default_timer() - start_parse_time

    # Generate the term-by-user story matrix (m), and additional data in two other matrices
    start_matr_time = timeit.default_timer()

    matrix = Matrix(base, weights)
    matrices = matrix.generate(us_instances,
                               ' '.join([u.sentence for u in us_instances]),
                               nlp)
    m, count_matrix, stories_list, rme = matrices

    matr_time = timeit.default_timer() - start_matr_time

    # Print details per user story, if argument '-u'/'--print_us' is chosen
    if print_us:
        print("Details:\n")
        for us in us_instances:
            Printer.print_us_data(us)

    # Generate the ontology
    start_gen_time = timeit.default_timer()

    patterns = Constructor(nlp, us_instances, m)
    out = patterns.make(systemname, threshold, link)
    output_ontology, output_prolog, output_ontobj, output_prologobj, onto_per_role = out

    print("HEY THIS IS THE OUTPUT_ONTOBJ WITH THE CLASSES APPARENTLY???")
    print(output_ontobj.classes)
    all_classes_list = []
    for class_vn in output_ontobj.classes:
        one_concept = {
            'class_name': class_vn.name,
            'parent_name': class_vn.parent,
            'occurs_in': occurence_list(class_vn.stories),
            'weight': '0',
            'group': class_vn.is_role
        }
        all_classes_list.append(one_concept)

    print(all_classes_list)
    # nodes = []
    # for cl in all_classes_list:
    # 	print(cl)
    # 	nodes.append({"label": cl['class_name']})
    # taking out class_id from the nodes. idk if this will bite me later.
    nodes = [{
        "label": cl["class_name"],
        "weight": cl["weight"]
    } for cl in all_classes_list]
    # print(nodes)
    print('IDK WHAT THIS IS BUT IMMA PRINT IT OUT TOO')
    relationships_query = output_prologobj.relationships

    all_relationships_list = []
    for relationship in relationships_query:
        one_concept = {
            'relationship_domain': relationship.domain,
            'relationship_name': relationship.name,
            'relationship_range': relationship.range
        }
        all_relationships_list.append(one_concept)

    print(all_relationships_list)

    edges_id_list = []
    concepts_query = []
    concepts_dict = {}
    concepts_dict_list = []
    relationshipslist = []
    i = 0
    for class_vn in all_classes_list:

        one_concept = {
            'class_id': i,
            'class_name': class_vn['class_name'],
            'parent_name': class_vn['parent_name'],
            'weight': '0',
            'group': class_vn['group']
        }
        concepts_query.append(one_concept)
        i += 1

    # print(concepts_query)
    for concept in concepts_query:
        print(concept)
        concepts_dict[concept['class_id']] = concept['class_name']
        concepts_dict_list.append([concept['class_id'], concept['class_name']])

    print('THIS IS WHAT UR CURRENTLY LOOKING AT')
    print(concepts_dict_list)
    i = 0
    for rel in all_relationships_list:  #app.py 868
        # print(rel)
        relationshipslist.append([
            rel['relationship_domain'], rel['relationship_range'],
            rel['relationship_name']
        ])
        for concept in concepts_dict_list:
            if rel['relationship_domain'] == concept[1]:
                x = concept[0]

        for concept in concepts_dict_list:
            if rel['relationship_range'] == concept[1]:
                y = concept[0]

        if rel['relationship_name'] == 'isa':
            edges_id_dict = {
                'id': i,
                'from': x,
                'to': y,
                'label': rel['relationship_name'],
                'dashes': "true"
            }
        else:
            edges_id_dict = {
                'id': i,
                'from': x,
                'to': y,
                'label': rel['relationship_name']
            }
        i += 1
        # ELSE??
        edges_id_list.append(edges_id_dict)

    print(edges_id_list)

    # Print out the ontology in the terminal, if argument '-o'/'--print_ont' is chosen
    if print_ont:
        Printer.print_head("MANCHESTER OWL")
        print(output_ontology)

    gen_time = timeit.default_timer() - start_gen_time

    # Gather statistics and print the results
    stats_time = 0
    if statistics:
        start_stats_time = timeit.default_timer()

        statsarr = Statistics.to_stats_array(us_instances)

        Printer.print_head("USER STORY STATISTICS")
        Printer.print_stats(statsarr[0], True)
        Printer.print_stats(statsarr[1], True)
        Printer.print_subhead(
            "Term - by - User Story Matrix ( Terms w/ total weight 0 hidden )")
        hide_zero = m[(m['sum'] > 0)]
        print(hide_zero)

        stats_time = timeit.default_timer() - start_stats_time

    # Write output files
    w = Writer()

    folder = "output/" + str(systemname)
    reports_folder = folder + "/reports"
    stats_folder = reports_folder + "/stats"

    outputfile = w.make_file(folder + "/ontology", str(systemname), "omn",
                             output_ontology)
    files = [["Manchester Ontology", outputfile]]

    outputcsv = ""
    sent_outputcsv = ""
    matrixcsv = ""

    if statistics:
        files.append([
            "General statistics",
            w.make_file(stats_folder, str(systemname), "csv", statsarr[0])
        ])
        files.append([
            "Term-by-User Story matrix",
            w.make_file(stats_folder,
                        str(systemname) + "-term_by_US_matrix", "csv", m)
        ])
        files.append([
            "Sentence statistics",
            w.make_file(stats_folder,
                        str(systemname) + "-sentences", "csv", statsarr[1])
        ])
    if prolog:
        files.append([
            "Prolog",
            w.make_file(folder + "/prolog", str(systemname), "pl",
                        output_prolog)
        ])
    if json:
        output_json_li = [str(us.toJSON()) for us in us_instances]
        output_json = "\n".join(output_json_li)
        files.append([
            "JSON",
            w.make_file(folder + "/json",
                        str(systemname) + "-user_stories", "json", output_json)
        ])
    if per_role:
        for o in onto_per_role:
            files.append([
                "Individual Ontology for '" + str(o[0]) + "'",
                w.make_file(folder + "/ontology",
                            str(systemname) + "-" + str(o[0]), "omn", o[1])
            ])

    # Print the used ontology generation settings
    Printer.print_gen_settings(matrix, base, threshold)

    # Print details of the generation
    Printer.print_details(fail, success, nlp_time, parse_time, matr_time,
                          gen_time, stats_time)

    report_dict = {
        "stories":
        us_instances,
        "failed_stories":
        failed_stories,
        "systemname":
        systemname,
        "us_success":
        success,
        "us_fail":
        fail,
        "times": [[
            "Initializing Natural Language Processor (<em>spaCy</em> v" +
            pkg_resources.get_distribution("spacy").version + ")", nlp_time
        ], ["Mining User Stories", parse_time],
                  ["Creating Factor Matrix", matr_time],
                  ["Generating Manchester Ontology", gen_time],
                  ["Gathering statistics", stats_time]],
        "dir":
        os.path.dirname(os.path.realpath(__file__)),
        "inputfile":
        filename,
        "inputfile_lines":
        len(set),
        "outputfiles":
        files,
        "threshold":
        threshold,
        "base":
        base,
        "matrix":
        matrix,
        "weights":
        m['sum'].copy().reset_index().sort_values(
            ['sum'], ascending=False).values.tolist(),
        "counts":
        count_matrix.reset_index().values.tolist(),
        "classes":
        output_ontobj.classes,
        "relationships":
        output_prologobj.relationships,
        "types":
        list(count_matrix.columns.values),
        "ontology":
        multiline(output_ontology)
    }

    # Finally, generate a report
    report = w.make_file(reports_folder,
                         str(systemname) + "_REPORT", "html",
                         generate_report(report_dict))
    files.append(["Report", report])

    # Print the location and name of all output files
    for file in files:
        if str(file[1]) != "":
            print(
                str(file[0]) + " file succesfully created at: \"" +
                str(file[1]) + "\"")

    # Return objects so that they can be used as input for other tools
    return {
        'us_instances': us_instances,
        'output_ontobj': output_ontobj,
        'output_prologobj': output_prologobj,
        'matrix': m
    }