def main(): g = Graph() g.set_directed(False) db = DBModel.get() g.vertex_properties["subscribers"] = g.new_vertex_property("int") g.vertex_properties["name"] = g.new_vertex_property("string") vertices = dict() # add all vertices to the graph for subreddit in db.get_subreddits(): v = g.add_vertex() g.vertex_properties["name"][v] = subreddit[0] g.vertex_properties["subscribers"][v] = subreddit[1] vertices[subreddit[0]] = v # add all the edges for link in db.get_all_links(): v1, v2 = link source = vertices[v1] target = vertices[v2] g.add_edge(source, target) db.close() g.save("data/reddit.gml")
def retry(): global choice_bodies, candidates, top top += 1 if top + 1 >= len(candidates): r = HTTPResponse(status=500) r.set_header('Content-Type', 'application/json') r.set_header('Access-Control-Allow-Origin', '*') return r restaurant_db = DBModel('gourmet', 'localhost', 'foo', 'bar') name_result = restaurant_db.select('SELECT name FROM restaurants WHERE id={}'.format( choice_bodies[top]['_source']['restaurant_id'] )) address_result = restaurant_db.select( 'SELECT address FROM restaurants WHERE id={}'.format( choice_bodies[top]['_source']['restaurant_id'] ) ) restaurant = 'こちらのお店はいかがでしょうか?\n\n店名 : {}'.format(name_result[0]) if address_result[0] != 'nan': restaurant += '\n住所 : {}'.format(address_result[0]) if len(candidates[top]) == 0 or candidates[top] is None: recommend = 'このお店に行ったことがある人は,残念ながら見つかりませんでした.' else: recommend = 'このお店に行ったことがある人は次のような感想を述べています :' for candidate in candidates[top]: recommend += '\n\n・{}'.format(candidate) dicts = [ {'restaurant': restaurant, 'recommend': recommend} ] r = HTTPResponse(status=200, body=json.dumps(dicts, ensure_ascii=False)) r.set_header('Content-Type', 'application/json') r.set_header('Access-Control-Allow-Origin', '*') return r
def main(): model = DBModel.get() print "[ ] Connecting.." r = praw.Reddit(user_agent="/u/benediktkr/") print "[ ] Logging in.." r.login(username="******", password="******") print "[ ] Starting.." try: while True: # praw follows the guidelines on ratelimits print " [ ] Fetching comments.." comments = list(r.get_comments("all", limit=None)) print " [+] Fetched: {0} comments.".format(len(comments)) print " [ ] Processing" for comment in comments: username = str(comment.author) subreddit = "/r/" + str(comment.subreddit).lower() + "/" model.save_comment(username, subreddit) print " [+] Done" except KeyboardInterrupt: model.close() print "\n[!]Exiting"
def bfs(start_node): f = open('data/reddit.txt', 'w') db = DBModel.get() todo = deque() visited = set() todo.append(start) while len(todo) > 0: here = todo.popleft() if here in visited: continue visited.add(here) this_subreddit = parse_sidebar(here) db.save_subreddit(this_subreddit) # Handle stuff about this node. for subreddit in this_subreddit['links']: if subreddit not in visited: todo.append(subreddit) link = (here, subreddit) print link db.save_link(link) db.close() f.close()
def get_reply(query): global candidates, choice_bodies, top candidates = [] choice_bodies = [] top = 0 parser.drop_morph(query) word_pairs = word2vec_model.similar_words(parser.words) word_pairs = word2vec_model.most_significant_word_pairs(word_pairs) terms = [word for word, _ in word_pairs] elastic_results = elastic_model.search_terms(terms) bodies = elastic_results[0] analyst = Analyst(word2vec_model, specific_parts=['普通名詞', '地名', '固有名詞', '組織名']) all_sum_scores = [] all_scores = [] all_candidates = [] for body in elastic_results[0]: analyst.parse(body['_source']['body']) candidate_scores = np.array(analyst.calc_candidate_score()) query_base_scores = np.array(analyst.calc_query_base_score(parser.words)) scores = list(candidate_scores + query_base_scores) all_sum_scores.append(sum(scores) / (len(scores) if len(scores) != 0 else 1)) all_scores.append(scores) all_candidates.append(analyst.candidates) indices = np.argsort(all_sum_scores)[::-1] for index in indices: choice_bodies.append(bodies[index]) candidates_ = analyst.most_significant_candidates( all_scores[index], all_candidates[index] ) candidates.append(candidates_) restaurant_db = DBModel('gourmet', 'localhost', 'foo', 'bar') name_result = restaurant_db.select('SELECT name FROM restaurants WHERE id={};'.format( choice_bodies[top]['_source']['restaurant_id'] )) address_result = restaurant_db.select( 'SELECT address FROM restaurants WHERE id={}'.format( choice_bodies[top]['_source']['restaurant_id'] ) ) restaurant = 'こちらのお店はいかがでしょうか?\n\n店名 : {}'.format(name_result[0]) if address_result[0] != 'nan': restaurant += '\n住所 : {}'.format(address_result[0]) if len(candidates[top]) == 0 or candidates[top] is None: recommend = 'このお店に行ったことがある人は,残念ながら見つかりませんでした.' else: recommend = 'このお店に行った人は次のような感想を述べています :' for candidate in candidates[top]: recommend += '\n\n・{}'.format(candidate) dicts = [ {'restaurant': restaurant, 'recommend': recommend} ] return json.dumps(dicts, ensure_ascii=False)