示例#1
0
        if not categorize:
            profile.categorize = None
        profile.inputdir(jobfile.rootdir)

        if graph:
            for cpu in cpus:
                profile.cpu = cpu
                if funcdata:
                    name = 'funcstacks%d' % cpu
                else:
                    name = 'stacks%d' % cpu
                output = StatOutput(jobfile, info=profile)
                output.xlabel = 'System Configuration'
                output.ylabel = '% CPU utilization'
                output.stat = name
                output.graph(name, graph)

        if dodot:
            for cpu in cpus:
                profile.cpu = cpu
                profile.write_dot(jobfile=jobfile, threshold=threshold)

        if textout:
            for cpu in cpus:
                profile.cpu = cpu
                profile.write_txt(jobfile=jobfile)

        if not graph and not textout and not dodot:
            for cpu in cpus:
                if not categorize:
                    profile.categorize = None
def run_evaluation(embeddings, report = False, graphs = False, classifier_args = [],
        classifier = 'knn', selected_feature_areas = None,
        selected_features = None, folder = 'output'):

    '''
    This is the main function. It runs the evaluation on the embeddings, and
    returns the results and averages.

    Parameters
    ----------
    embeddings : dictionary (string: list (int/float))
        A dictionary with the embeddings to evaluate. The keys must be the
        language identifier, and the values are the language embeddings
    report : bool
        Whether to write text reports with the results and averages in the
        output folder
    graphs : bool
        Whether to create graphs of the results in the output folder. This
        includes bar graphs, maps, and count graphs. This can take several
        minutes to complete
    classifier_args : list (int)
        A list of arguments to the classifier. If using k-nearest neighbors,
        it's k, if using multilayer perceprton, it's the layer sizes, where
        the length of the list indicates the number of layers
    classifier : string
        Which classifier to use. Possible values are knn for k-nearest neighbors,
        mlp for multilayer perceptron, and svm for support vector machine.
    selected_feature_areas : list (int)
        List of feature areas to evalute for. Leave out for all, or use ant of the
        following integers
            0   - None (add individual features with selected features)
            1   - Phonology
            2   - Morphology
            3   - Nominal Categories
            4   - Nominal Syntax
            5   - Verbal Categories
            6   - Word Order
            7   - Simple Clauses
            8   - Complex Sentences
            9   - Lexicon
            10  - Sign Languages
            11  - Other
            12  - Word Order
    selected_features : list (string)
        Add individual features not included by selected_feature_areas
    folder : string
        The output folder where to place the text reports and graphs.

    Returns
    --------
    Tuple (True, (dictionary, dictionary)) or
    Tuple (False, string)
        If success, it results a tuple where the first value is true, and the
        second value is a tuple with a the results and the averages.
        If failes, the first value is false, and the second value is an error
        message.

    '''

    if not verify_embeddings(embeddings):
        return (False, "Wrong embeddings format. Format must be a dictionary where the keys are language IDs (ISO 639-3) and values are the language embeddings")

    if folder and not folder.endswith("/"):
        folder += "/"

    if folder and not os.path.isdir(folder):
        return (False, "Could not find the path {}".format(folder))

    print("Starting")

    with open('language.csv', 'rt', encoding='utf8') as file:
        reader = csv.reader(file)
        languages = list(reader)
    headers = languages.pop(0)
    # Remove languages we do not have embeddings for
    languages = [lang for lang in languages if lang[1] in embeddings]

    with open('feature_areas.csv', 'rt', encoding='utf8') as file:
        reader = csv.reader(file)
        feature_areas = {rows[0]:rows[1] for rows in reader}

    included_features = get_included_features(feature_areas, selected_feature_areas,
                                              selected_features)
    if classifier == 'knn':
        classifier = neighbors.KNeighborsClassifier(classifier_args[0]) if classifier_args \
                         else neighbors.KNeighborsClassifier(10)
    elif classifier == 'mlp':
        from sklearn.neural_network import MLPClassifier
        classifier = MLPClassifier(hidden_layer_sizes=tuple(classifier_args)) if classifier_args \
                        else MLPClassifier(hidden_layer_sizes=(50, 50, 50))
    else:
        classifier = svm.SVC()

    print("Evaluating embeddings")
    results = evaluate(languages, headers, embeddings, included_features, classifier)
    print("Calculating averages")
    averages = calculate_averages(results)

    if report:
        print("Writing text reports")
        write_report(folder, results, averages)

    if graphs:
        print("Creating bar graphs")
        graph(folder, results, averages)
        print("Creating maps")
        maps(folder, averages, languages)
        print("Creating count graphs")
        count_score_graph(folder, averages, languages)

    print("Finished\n")

    return (True, (results, averages))
示例#3
0
文件: profile.py 项目: AMDmi3/gem5
        if not categorize:
            profile.categorize = None
        profile.inputdir(jobfile.rootdir)

        if graph:
            for cpu in cpus:
                profile.cpu = cpu
                if funcdata:
                    name = 'funcstacks%d' % cpu
                else:
                    name = 'stacks%d' % cpu
                output = StatOutput(jobfile, info=profile)
                output.xlabel = 'System Configuration'
                output.ylabel = '% CPU utilization'
                output.stat = name
                output.graph(name, graph)

        if dodot:
            for cpu in cpus:
                profile.cpu = cpu
                profile.write_dot(jobfile=jobfile, threshold=threshold)

        if textout:
            for cpu in cpus:
                profile.cpu = cpu
                profile.write_txt(jobfile=jobfile)

        if not graph and not textout and not dodot:
            for cpu in cpus:
                if not categorize:
                    profile.categorize = None
示例#4
0
import input
import process
import output

#main
process.value(x)
process.constant(x)

process.Numpy(x)
output.result(f)
if n == 0:
    output.graph(f)
elif n == 1:
    output.approximation(f)