def lexrank(sentences, build_matrix, target_wc_perc, tokenize_fn, min_score, include_quot, damping_factor=0.85, error=0.001, title=None): assert isinstance(min_score, (int, float)), 'min_score needs to be float, not %s' % type(min_score) assert isinstance(include_quot, bool) include_sent = [(include_quot or not is_quotation(s.text)) and \ not extractive_summary.exclude_sentence(s.text) for s in sentences] tokenized_sents = [s.map(text=lambda t:list(tokenize_fn(t)), length=lambda t:len(space_split(s.text))) for s in sentences] tokenized_title = tokenize_fn(title) if title != None else None M = build_matrix(sentences, tokenized_sents, include_sent, tokenized_title=tokenized_title) scores = pageRank(M, s=damping_factor, maxerr=error) assert len(scores) == len(sentences) ## normalize scores: 1 is equal to average page rank ## round to avoid float precision issues sentence_scores = [round(s*len(tokenized_sents), 5) for s in scores] sentence_wc = extractive_summary.count_words(tokenized_sents) summary = extractive_summary.build_summary(sentences, sentence_scores, sentence_wc, target_wc_perc, min_score) return summary, sentence_wc
def testDocMatrix(self): links = [ [0, 2, 2, 3], [0], [3, 2], [0], ] result = pagerank.pageRank(links) self.assertEqual(str(result), '[ 0.36723503 0.0375 0.33665007 0.25861487]')
def testOneCircle(self): links = [ [1, 1, 1, 1], [2], [3], [4], [0] ] result = pagerank.pageRank(links, alpha=1.0) self.assertEqual(str(result), '[ 0.2 0.2 0.2 0.2 0.2]')
def testTwoCircles(self): links = [ [1, 2], [2], [3], [4], [0] ] result = pagerank.pageRank(links) self.assertEqual(str(result), '[ 0.2116109 0.12411822 0.2296187 0.22099231 0.21365988]')
def format_data(self, dependencygroup): header = 'Filename', 'PageRank', 'PageID', 'Outgoing Links' converter = pagerank.DependenciesToLinkMatrix(dependencygroup.dependencies) matrix = converter.create_matrix() ranking = pagerank.pageRank(matrix) ids = [idx for idx in range(len(matrix))] filenames = [utils.prettify_path(converter.id_to_node_map[nid]) for nid in ids] rowinfos = zip(filenames, ranking, ids, matrix) rowinfos.sort(key=lambda item: item[1]) #sort by ranking rowinfos.reverse() rows = [] for rowi in rowinfos: row = (rowi[0], self._fmt_rank(rowi[1]), str(rowi[2]), str(rowi[3])) rows.append(row) tbl = tableprint.Table(header, rows) tbl.write(self._outstream)
def lexrank(sentences, build_matrix, target_wc_perc, tokenize_fn, min_score, include_quot, damping_factor=0.85, error=0.001, title=None): assert isinstance( min_score, (int, float)), 'min_score needs to be float, not %s' % type(min_score) assert isinstance(include_quot, bool) include_sent = [(include_quot or not is_quotation(s.text)) and \ not extractive_summary.exclude_sentence(s.text) for s in sentences] tokenized_sents = [ s.map(text=lambda t: list(tokenize_fn(t)), length=lambda t: len(space_split(s.text))) for s in sentences ] tokenized_title = tokenize_fn(title) if title != None else None M = build_matrix(sentences, tokenized_sents, include_sent, tokenized_title=tokenized_title) scores = pageRank(M, s=damping_factor, maxerr=error) assert len(scores) == len(sentences) ## normalize scores: 1 is equal to average page rank ## round to avoid float precision issues sentence_scores = [round(s * len(tokenized_sents), 5) for s in scores] sentence_wc = extractive_summary.count_words(tokenized_sents) summary = extractive_summary.build_summary(sentences, sentence_scores, sentence_wc, target_wc_perc, min_score) return summary, sentence_wc
def group_info(): algo_type = request.args.get('algo_type') cur = get_db().cursor() sec_since_start_of_day = time.localtime().tm_sec + \ time.localtime().tm_min*60 + time.localtime().tm_hour*3600 start_of_day_epoch = time.time() - sec_since_start_of_day week_ago_epoch = start_of_day_epoch - 7*24*3600 ids = get_db_identifications(cur) interact_dict = {} for (full_name, uniqname, id) in ids: interact_dict[id] = {} start_time = week_ago_epoch while start_time < time.time(): end_time = start_time + 24*3600 interactions = get_db_interactions(cur, id, start_time, end_time) interact_dict[id][time.localtime(start_time).tm_wday] = interactions start_time = end_time data = [] print(str(ids)) # pagerank points allocation if algo_type == 'pagerank': # calculate pagerank on a day-by-day basis results = {} for day in range(7): # create pagerank graph pr_graph = [] for (_, _, my_id) in ids: # find interactions with all other ids pr_row = [] for (_, _, their_id) in ids: # sum interactions with this id interactions = 0 if their_id in interact_dict[my_id][day]: interactions += interact_dict[my_id][day][their_id] # insert into row pr_row.append(interactions) # insert into pagerank graph pr_graph.append(pr_row) # run pagerank algorithm if 0: pr_graph = np.array([[0,0,1,0,0,0,0,0], [0,1,1,0,0,0,0,0], [1,0,1,1,0,0,0,0], [0,0,0,1,1,0,0,0], [0,0,0,0,0,0,1,0], [0,0,0,0,0,1,1,0], [0,0,0,1,1,0,1,0], [0,0,0,0,0,0,0,0]]) print("\nAt time: " + str(time.time())) print(str(pr_graph)) results[day] = pagerank.pageRank(np.array(pr_graph), maxerr = 0.1) print(str(results[day])) for value in results[day]: if math.isnan(value): results[day] = [1/float(len(ids))]*len(ids) break # create data for graphing id_index = 0 for (full_name, uniqname, id) in ids: total_points = 0 point_dict = {} point_dict['full_name'] = full_name point_dict['uniqname'] = uniqname point_counts = [] for day_num in range(7): day_points = {} day_points['day'] = days[day_num] day_points['points'] = round(results[day_num][id_index]*100000) total_points += day_points['points'] point_counts.append(day_points) # point_counts needs to be rotated so that the current day is last curr_day_num = time.localtime().tm_wday point_counts = point_counts[curr_day_num+1:] + point_counts[:curr_day_num+1] point_dict['point_counts'] = point_counts point_dict['total_points'] = total_points data.append(point_dict) id_index += 1 # default is linear points else: for (full_name, uniqname, id) in ids: total_points = 0 point_dict = {} point_dict['full_name'] = full_name point_dict['uniqname'] = uniqname point_counts = [] for day_num in range(7): day_points = {} day_points['day'] = days[day_num] points = 0 for pings in interact_dict[id][day_num].values(): points += pings total_points += pings day_points['points'] = points point_counts.append(day_points) # point_counts needs to be rotated so that the current day is last curr_day_num = time.localtime().tm_wday point_counts = point_counts[curr_day_num+1:] + point_counts[:curr_day_num+1] point_dict['point_counts'] = point_counts point_dict['total_points'] = total_points data.append(point_dict) # Sort data by total points data = sorted(data, key=lambda record: record['total_points'], reverse=True) return json.dumps(data)
intrain = [] print fileN intrain = extractFailureIntrain(fileintrain, outtrain) LOC = len(intrain[0]) numTest = len(intrain) cover = [] for i in range(0, LOC): for j in range(0, numTest): temp.append(intrain[j][i]) cover.append(temp) temp = [] print "start pagerank" pr = pagerank.pageRank(cover, MM) pr.calCompute() pr.calNormalized() save = 'C:/Users/Jeongsu Jang/Desktop/2018-1/paper/RBF+pageRank/experiment/tcas/Testv' + str( version) + '.txt' f = open(save, 'w+') for i in range(0, numTest): f.write(str(pr.norTest[i][0]) + "\n") f.close() save = 'C:/Users/Jeongsu Jang/Desktop/2018-1/paper/RBF+pageRank/experiment/tcas/Xv' + str( version) + '.txt' f = open(save, 'w+') for i in range(0, LOC):
print "---------------------" print "## computing histgrams" all_word_histgrams = {} for imagefname in all_features.keys(): word_histgram = computeHistograms(fcodebook, all_features[imagefname]) all_word_histgrams[imagefname] = word_histgram print "---------------------" print "## build matrix" matrix, matrixOrder = buildMatrix(all_word_histgrams) # randomWorkVector = [] : consider tfidf score of word in each photo randomWorkVector = buildRWVector(matrixOrder, photoTfIdfScore) print "---------------------" print "## computing pagerank" # refer https://gist.github.com/diogojc/1338222/download # also refer https://github.com/timothyasp/PageRank rank = pagerank.pageRank(matrix, s=.86, rwVector=randomWorkVector) rankIndex = rank.argsort()[::-1] for i in range(0,30): print( str(i) + ": " + str(rank[rankIndex[i]]) + ' - ' + matrixOrder[rankIndex[i]]) print(photoTfIdfScore[matrixOrder[rankIndex[i]]]) for title, tfidf in (photoTfIdfScore[matrixOrder[rankIndex[i]]]).iteritems() : print( title + ' - ' + str(tfidf)) # + ' - ' + all_files[rankIndex[i]])