예제 #1
0
def run_search():    
    #G = load_30k_graph_object()
    article_pairs = load_article_pairs()
    adj_list_30k = load_30k_adj_list()

    num_fail = 0
    num_successes = 0

    results = []

    c = 0

    for (article1_name, article2_name) in article_pairs[:100]:
        print c
        #print "Article 1: %s, article 2: %s" % (article1_name, article2_name)

        src_id = int(title_to_linenum[article1_name])
        dst_id = int(title_to_linenum[article2_name])

        success_dec_path_lengths = []
        suc = 0
        fail = 0

        #try:
        (success_or_fail, dec_search_path_length) = util.run_decentralized_search(src_id, dst_id, \
            adj_list_30k, linenum_to_title, util.get_article_distance)

        #shortest_path_length = get_graph_shortest_path(G, src_id, dst_id)
        #(ont_dist, lca_height) = get_ontology_distance(article1_name, article2_name)

        # failure 
        if success_or_fail == "FAILURE":
            fail += 1
            num_fail += 1
        else:
            suc += 1
            num_successes += 1
            success_dec_path_lengths.append(dec_search_path_length)

        # except KeyError:
        #     continue

        x = (article1_name, article2_name, suc, fail, success_dec_path_lengths)
        results.append(x)
        c += 1

        #print success_or_fail

    print "%d successes, %d failures" % (num_successes, num_fail)

    # save object to file
    load_data.save_object(results, "bin/results/feat3.pk1")
예제 #2
0
def run_random_search(): 
    article_pairs = load_article_pairs()
    adj_list_30k = load_30k_adj_list()

    # tuple of (a1_name, a2_name, # success, # fail, success_dec_path_lengths)
    results = []

    for (article1_name, article2_name) in article_pairs[:100]:
        src_id = int(title_to_linenum[article1_name])
        dst_id = int(title_to_linenum[article2_name])

        success_dec_path_lengths = []
        num_successes = 0
        num_fail = 0

        try:
            for i in range(1000):
                (success_or_fail, dec_search_path_length) = util.run_decentralized_search(src_id, dst_id, \
                    adj_list_30k, linenum_to_title, util.get_article_distance)

                # failure 
                if success_or_fail == "FAILURE":
                    num_fail += 1
                else:
                    num_successes += 1
                    success_dec_path_lengths.append(dec_search_path_length)
        
        except KeyError:
            continue

        x = (article1_name, article2_name, num_successes, num_fail, success_dec_path_lengths)
        results.append(x)

        print "%d successes, %d failures" % (num_successes, num_fail)

    print "Number of pairs actually completed: %d" % len(results)

    # save object to file
    load_data.save_object(results, "bin/results/random_dec_search_1ktrials.pk1")
예제 #3
0
def run_experiment():
    N = 5
    num_success = 0

    # pick some number of random pairs of articles.
    # for each pair, compute the dist given by decentralized search: that's the predicted dist
    # for that same pair, also compute the distance using the ontology tree
    while num_success < N:
        article1_name = random.choice(articles)
        article2_name = random.choice(articles)
        while article1_name == article2_name:
            article2_name = random.choice(articles)

        print "Article 1: %s, article 2: %s" % (article1_name, article2_name)

        src_id = int(title_to_linenum[article1_name])
        dst_id = int(title_to_linenum[article2_name])

        # predicted distance
        (success_or_fail, predicted_distance) = util.run_decentralized_search(src_id, dst_id, \
            adj_list, linenum_to_title, util.get_article_distance)

        # failure or error
        if success_or_fail == None or success_or_fail == "FAILURE":
            if success_or_fail != None:
                print "%s. Article 1: %s, Article 2: %s" % \
                    (success_or_fail, article1_name, article2_name)
            else:
                print "KeyError..."
        # success
        else:
            num_success += 1

            # ontology distance
            ontology_distance = get_ontology_distance(article1_name, article2_name)

            print "%s. Article 1: %s, Article 2: %s, Predicted distance = %d, Ontology distance = %d" % \
                (success_or_fail, article1_name, article2_name, predicted_distance, ontology_distance)