Exemplo n.º 1
0
def test_session():
    s = Session()
    curr_dir = os.getcwd()
    path_bir = os.path.join(curr_dir, 'Data', 'bir.p')
    pscore_path = os.path.join(curr_dir, 'Data', 'PageRank_score.npy')
    tf_idf = os.path.join(curr_dir, 'Data', 'tf_idx.p')

    try:
        s.load(path_bir, pscore_path, tf_idf)
    except FileNotFoundError:
        print("Cannot find necessary files!!! Try running setup file??!")
        return False

    q1 = "Biologists AND long OR Grinnell"
    q2 = "biologists exciting education sports Grinnell"

    now = time.time()
    res1 = s.advance_search(q1)
    now2 = time.time()
    print("Results for advanced search: ")
    print(res1)
    print("Search takes {}".format(now2 - now))

    res2 = s.search(q2)
    now3 = time.time()
    print("Results for intelligent search: ")
    print(res2)
    print("Search takes {}".format(now3 - now))
    return True

    return
Exemplo n.º 2
0
def main():
    se = Session()
    curr_dir = os.getcwd()
    path_bir = os.path.join(curr_dir, 'Data', 'bir.p')
    pscore_path = os.path.join(curr_dir, 'Data', 'PageRank_score.npy')
    tf_idf = os.path.join(curr_dir, 'Data', 'tf_idx.p')

    try:
        print("Setting up Smoogle...")
        se.load(path_bir, pscore_path, tf_idf)
    except FileNotFoundError:
        print("Cannot find necessary files!!! Try running setup file??!")
        return False

    ciu = csv.reader(open(os.path.join(curr_dir, 'Data', 'c_id_url.csv')))
    crawl_id_url = {}
    for row in ciu:
        crawl_id_url.update({int(row[0]): row[1]})

    running = True
    print("Welcome to Smoogle Search: ")
    while running:
        print("Please enter options (s/a/q): ")
        print("s - search a - advanced search q - quit")
        arg = input()
        if arg != False:
            if arg == 's' or arg == 'a':
                query = input("Please enter search query: ")
                num = input("Please enter the number of web pages to show ")
                if (arg == 's'):
                    results = se.search(query, int(num))
                else:
                    results = se.advance_search(query, int(num))
                for result in results[0]:
                    print(crawl_id_url.get(result))
            elif arg == 'q':
                running = False
            else:
                print("This is wrong command!")
        else:
            return False