Пример #1
0
def main():
    """
    In pseudo-code:

       load data

       create graphs from data

       for {kfoldcross times}
           do {take sample from training data}
              predict results on remainder (= proteins not in sample)
              measure results on remainder  

       take average f-measure

    Parameters
    ----------
    See args.

    """
    data = load_data()
 
    hgraph = nx.Graph(data.humanppi)
    fgraph = nx.Graph(data.functions)

    if args.test2:
        print "run predictions on test2 set"
        proteins_test2 = data.test2
        predictions = predict_cps(proteins_test2, hgraph, fgraph)
        print zip(proteins_test2, predictions)
        write_test2_prediction(proteins_test2, predictions)

    else:
        sampler = Sampler(data.test1, args.samplesize)
        results = list() #to be populated by Results class objects 

        for i in range(args.kfoldcross):
            sample = zip(*sampler.remainder)[0]
            predictions = predict_cps(sample, hgraph, fgraph)
            result = Results(sampler.remainder, predictions)
            results.append(result)
            sampler.resample()
            if args.verbose:
                result.print_results()
            result.print_confusion_matrix()

        f_measures = [i.f_measure for i in results]
        avg_f_measure = sum(f_measures)/float(len(f_measures))
        print "\n\nAverage F-Measure:", avg_f_measure

    pause = raw_input("Hit Enter to continue... ")

    return 0
Пример #2
0
def sample(trained_weights_path, trained_model_name, trained_weights_name,
           input_path, output_path, search_method):
    meta_dataset = np.load("{}/meta_dataset.npy".format(trained_weights_path))
    input_shape = meta_dataset[0]
    output_size = meta_dataset[1]
    vocabulary_size = meta_dataset[2]

    # voc = Vocabulary()
    # voc.retrieve(trained_weights_path)
    # vocabulary_size = voc.size

    # model = pix2code(input_shape, output_size, trained_weights_path)
    model = newpix2code(input_shape, output_size, trained_weights_path,
                        vocabulary_size)
    # trained_weights_name 为带后缀名的,如 .hdf5
    model.load(name=trained_model_name, weights_name=trained_weights_name)

    sampler = Sampler(trained_weights_path, input_shape, output_size,
                      CONTEXT_LENGTH)

    file_name = basename(input_path)[:basename(input_path).find(".")]
    evaluation_img = Utils.get_preprocessed_img(input_path, IMAGE_SIZE)

    if search_method == "greedy":
        result, _ = sampler.predict_greedy(model,
                                           np.array([evaluation_img]),
                                           require_sparse_label=False)
        print("Result greedy: {}".format(result))
    else:
        beam_width = int(search_method)
        print("Search with beam width: {}".format(beam_width))
        result, _ = sampler.predict_beam_search(model,
                                                np.array([evaluation_img]),
                                                beam_width=beam_width,
                                                require_sparse_label=False)
        print("Result beam: {}".format(result))

    with open("{}/{}.gui".format(output_path, file_name), 'w') as out_f:
        out_f.write(result.replace(START_TOKEN, "").replace(END_TOKEN, ""))
Пример #3
0
    trained_weights_path = argv[0]
    trained_model_name = argv[1]
    input_path = argv[2]
    output_path = argv[3]
    encoding_type = argv[4]
    search_method = "greedy" if len(argv) < 6 else argv[5]

meta_dataset = np.load("{}/meta_dataset.npy".format(trained_weights_path),
                       allow_pickle=True)
input_shape = meta_dataset[0]
output_size = meta_dataset[1]

model = pix2code(input_shape, output_size, trained_weights_path, encoding_type)
model.load(trained_model_name)

sampler = Sampler(trained_weights_path, input_shape, output_size,
                  CONTEXT_LENGTH)

# dataset = Dataset()
# if encoding_type == "one_hot":
#     dataset.load_with_one_hot_encoding(input_path, generate_binary_sequences=True)
# elif encoding_type == "w2v":
#     dataset.load_with_word2vec(input_path, generate_binary_sequences=True)
# else:
#     raise Exception("Missing parameter")

# dataset = Dataset()
# dataset.load(input_path))
''' Metodo di valutazione cinese
model.compile()
score, loss = model.minevaluate(dataset)
print("accuracy: ", score)
Пример #4
0
np_load_old = np.load

# modify the default parameters of np.load
np.load = lambda *a,**k: np_load_old(*a, allow_pickle=True, **k)

# restore np.load for future normal usage
np.load = np_load_old

meta_dataset = np.load("{}/meta_dataset.npy".format(trained_weights_path))
input_shape = meta_dataset[0]
output_size = meta_dataset[1]

model = pix2code(input_shape, output_size, trained_weights_path)
model.load(trained_model_name)

sampler = Sampler(trained_weights_path, input_shape, output_size, CONTEXT_LENGTH)

file_name = basename(input_path_tablet)[:basename(input_path_tablet).find(".")]

#adjusted in order to deal with two input images
evaluation_img_tablet = Utils.get_preprocessed_img(input_path_tablet, IMAGE_SIZE)
evaluation_img_desktop = Utils.get_preprocessed_img(input_path_desktop, IMAGE_SIZE)

if search_method == "greedy":
    result, _ = sampler.predict_greedy(model, np.array([evaluation_img_tablet]), np.array([evaluation_img_desktop]))
    print("Result greedy: {}".format(result))
else:
    beam_width = int(search_method)
    print("Search with beam width: {}".format(beam_width))
    result, _ = sampler.predict_beam_search(model, np.array([evaluation_img_tablet]), np.array([evaluation_img_desktop]), beam_width=beam_width)
    print("Result beam: {}".format(result))
Пример #5
0
def evaluate_model(trained_weights_path, trained_model_name, trained_weights_name, input_path, output_path,search_method="greedy"):
    meta_dataset = np.load("{}/meta_dataset.npy".format(trained_weights_path))
    input_shape = meta_dataset[0]
    output_size = meta_dataset[1]

    # get vocabulary size
    voc = Vocabulary()
    voc.retrieve(weights_path)
    vocabulary_size = voc.size

    model = UItoPage(input_shape, output_size, trained_weights_path, vocabulary_size)
    # model.load(trained_model_name)
    # trained_weights_name 为带后缀名的,如 xx.hdf5
    model.load(name=trained_model_name, weights_name=trained_weights_name)

    sampler = Sampler(trained_weights_path, input_shape, output_size, CONTEXT_LENGTH)

    actual, predicted = list(), list()
    current_num = 0
    for f in os.listdir(input_path):
        if f.find(".gui") != -1:
            file_name = f[:f.find(".gui")]
            evaluation_img = None
            if os.path.isfile("{}/{}.png".format(input_path, file_name)):
                evaluation_img = Utils.get_preprocessed_img("{}/{}.png".format(input_path, file_name), IMAGE_SIZE)

            # 若output_path是提前predict过的所有测试集的结果,则使用已经已有的.gui文件
            if os.path.isfile("{}/{}.gui".format(output_path, file_name)):
                predict_result = load_doc("{}/{}.gui".format(output_path, file_name))
            else:
                if evaluation_img is not None:
                    if search_method == "greedy":
                        result, _ = sampler.predict_greedy(model, np.array([evaluation_img]),require_sparse_label=False)
                        predict_result = result.replace(START_TOKEN, "").replace(END_TOKEN, "")
                        current_num += 1
                        print("Num:{} Evaluate image: {}\n".format(current_num,file_name))
                    else:
                        beam_width = int(search_method)
                        print("Search with beam width: {}".format(beam_width))
                        result, _ = sampler.predict_beam_search(model, np.array([evaluation_img]), beam_width=beam_width, require_sparse_label=False)
                        predict_result =result.replace(START_TOKEN, "").replace(END_TOKEN, "")
                        current_num += 1
                        print("Num:{} Evaluate image: {}\n".format(current_num,file_name))

                    with open("{}/{}.gui".format(output_path, file_name), 'w') as out_f:
                        out_f.write(predict_result)
                else:
                    print("No image exists in input path.")
                    continue

            predict_result = ' '.join(predict_result.split())
            predict_result = predict_result.replace(",", " , ").replace("}", " } ").replace("{", " { ")
            predict_result = predict_result.replace("  ", " ").replace("  ", " ")
            predicted.append(predict_result.split())

            actual_gui = load_doc("{}/{}.gui".format(input_path, file_name))
            actual_gui = ' '.join(actual_gui.split())
            actual_gui = actual_gui.replace(",", " , ").replace("}", " } ").replace("{", " { ")
            actual_gui = actual_gui.replace("  ", " ").replace("  ", " ")
            actual.append([actual_gui.split()])


    assert len(actual) == len(predicted)
    bleu = corpus_bleu(actual, predicted)
    return bleu, actual, predicted
Пример #6
0
np_load_old = np.load

# modify the default parameters of np.load
np.load = lambda *a, **k: np_load_old(*a, allow_pickle=True, **k)

# restore np.load for future normal usage
np.load = np_load_old

meta_dataset = np.load("{}/meta_dataset.npy".format(trained_weights_path))
input_shape = meta_dataset[0]
output_size = meta_dataset[1]

model = pix2code(input_shape, output_size, trained_weights_path)
model.load(trained_model_name)

sampler = Sampler(trained_weights_path, input_shape, output_size,
                  CONTEXT_LENGTH)

file_name = basename(input_path_tablet)[:basename(input_path_tablet).find(".")]

#adjusted in order to deal with two input images
evaluation_img_tablet = Utils.get_preprocessed_img(input_path_tablet,
                                                   IMAGE_SIZE)
evaluation_img_desktop = Utils.get_preprocessed_img(input_path_desktop,
                                                    IMAGE_SIZE)

result = sampler.complete_sequence(model, np.array([evaluation_img_tablet]),
                                   np.array([evaluation_img_desktop]),
                                   input_path_gui)
print("Result greedy: {}".format(result))
with open("{}/{}.gui".format(output_path, file_name), 'w') as out_f:
    out_f.write(result.replace(START_TOKEN, "").replace(END_TOKEN, ""))