def main():
    g = Graph()
    g.set_directed(False)
    
    db = DBModel.get()
    g.vertex_properties["subscribers"] = g.new_vertex_property("int")
    g.vertex_properties["name"] = g.new_vertex_property("string")
    vertices = dict()

    # add all vertices to the graph
    for subreddit in db.get_subreddits():
        v = g.add_vertex()
        g.vertex_properties["name"][v] = subreddit[0]
        g.vertex_properties["subscribers"][v] = subreddit[1]
        vertices[subreddit[0]] = v

    # add all the edges
    for link in db.get_all_links():
        v1, v2 = link
        source = vertices[v1]
        target = vertices[v2]
        g.add_edge(source, target)

    db.close()
    g.save("data/reddit.gml")
Пример #2
0
def retry():
    global choice_bodies, candidates, top

    top += 1
    if top + 1 >= len(candidates):
        r = HTTPResponse(status=500)
        r.set_header('Content-Type', 'application/json')
        r.set_header('Access-Control-Allow-Origin', '*')
        return r

    restaurant_db = DBModel('gourmet', 'localhost', 'foo', 'bar')
    name_result = restaurant_db.select('SELECT name FROM restaurants WHERE id={}'.format(
        choice_bodies[top]['_source']['restaurant_id']
    ))
    address_result = restaurant_db.select(
        'SELECT address FROM restaurants WHERE id={}'.format(
            choice_bodies[top]['_source']['restaurant_id']
        )
    )

    restaurant = 'こちらのお店はいかがでしょうか?\n\n店名 : {}'.format(name_result[0])
    if address_result[0] != 'nan':
        restaurant += '\n住所 : {}'.format(address_result[0])

    if len(candidates[top]) == 0 or candidates[top] is None:
        recommend = 'このお店に行ったことがある人は,残念ながら見つかりませんでした.'
    else:
        recommend = 'このお店に行ったことがある人は次のような感想を述べています :'
        for candidate in candidates[top]:
            recommend += '\n\n・{}'.format(candidate)

    dicts = [
        {'restaurant': restaurant, 'recommend': recommend}
    ]

    r = HTTPResponse(status=200, body=json.dumps(dicts, ensure_ascii=False))
    r.set_header('Content-Type', 'application/json')
    r.set_header('Access-Control-Allow-Origin', '*')
    return r
Пример #3
0
def main():
    model = DBModel.get()
    print "[ ] Connecting.."
    r = praw.Reddit(user_agent="/u/benediktkr/")
    print "[ ] Logging in.."
    r.login(username="******", password="******")
    print "[ ] Starting.."
    try:
        while True:
            # praw follows the guidelines on ratelimits
            print "  [ ] Fetching comments.."
            comments = list(r.get_comments("all", limit=None))
            print "    [+] Fetched: {0} comments.".format(len(comments))
            print "    [ ] Processing"
            for comment in comments:
                username = str(comment.author)
                subreddit = "/r/" + str(comment.subreddit).lower() + "/"
                model.save_comment(username, subreddit)
            print "    [+] Done"
    except KeyboardInterrupt:
        model.close()
        print "\n[!]Exiting"
Пример #4
0
def bfs(start_node):
    f = open('data/reddit.txt', 'w')
    db = DBModel.get()
    todo = deque()
    visited = set()
    todo.append(start)
    while len(todo) > 0:
        here = todo.popleft()
        if here in visited:
            continue
        visited.add(here)
        this_subreddit = parse_sidebar(here)
        db.save_subreddit(this_subreddit)
        # Handle stuff about this node. 
        for subreddit in this_subreddit['links']:
            if subreddit not in visited:
                todo.append(subreddit)
            link = (here, subreddit)
            print link
            db.save_link(link)
    db.close()
    f.close()
Пример #5
0
def get_reply(query):
    global candidates, choice_bodies, top

    candidates = []
    choice_bodies = []
    top = 0

    parser.drop_morph(query)

    word_pairs = word2vec_model.similar_words(parser.words)
    word_pairs = word2vec_model.most_significant_word_pairs(word_pairs)

    terms = [word for word, _ in word_pairs]

    elastic_results = elastic_model.search_terms(terms)

    bodies = elastic_results[0]

    analyst = Analyst(word2vec_model, specific_parts=['普通名詞', '地名', '固有名詞', '組織名'])

    all_sum_scores = []
    all_scores = []
    all_candidates = []

    for body in elastic_results[0]:
        analyst.parse(body['_source']['body'])
        candidate_scores = np.array(analyst.calc_candidate_score())
        query_base_scores = np.array(analyst.calc_query_base_score(parser.words))
        scores = list(candidate_scores + query_base_scores)

        all_sum_scores.append(sum(scores) / (len(scores) if len(scores) != 0 else 1))
        all_scores.append(scores)
        all_candidates.append(analyst.candidates)

    indices = np.argsort(all_sum_scores)[::-1]

    for index in indices:
        choice_bodies.append(bodies[index])
        candidates_ = analyst.most_significant_candidates(
            all_scores[index],
            all_candidates[index]
        )
        candidates.append(candidates_)

    restaurant_db = DBModel('gourmet', 'localhost', 'foo', 'bar')
    name_result = restaurant_db.select('SELECT name FROM restaurants WHERE id={};'.format(
        choice_bodies[top]['_source']['restaurant_id']
    ))
    address_result = restaurant_db.select(
        'SELECT address FROM restaurants WHERE id={}'.format(
            choice_bodies[top]['_source']['restaurant_id']
        )
    )

    restaurant = 'こちらのお店はいかがでしょうか?\n\n店名 : {}'.format(name_result[0])
    if address_result[0] != 'nan':
        restaurant += '\n住所 : {}'.format(address_result[0])

    if len(candidates[top]) == 0 or candidates[top] is None:
        recommend = 'このお店に行ったことがある人は,残念ながら見つかりませんでした.'
    else:
        recommend = 'このお店に行った人は次のような感想を述べています :'
        for candidate in candidates[top]:
            recommend += '\n\n・{}'.format(candidate)

    dicts = [
        {'restaurant': restaurant, 'recommend': recommend}
    ]

    return json.dumps(dicts, ensure_ascii=False)