def add_collection_novelty(): #based on title all_paper_titles = {} #average novelty is for user in any_user: print user stat = download_and_save_users.citulike_user_object_to_stats( json.load(open('citeulike_collection/users/' + user + '.json'))) for paper in stat['papers']: title = paper['title'] if not title in all_paper_titles: all_paper_titles[title] = 1 else: all_paper_titles[title] += 1 for user in any_user: print user stat = download_and_save_users.citulike_user_object_to_stats( json.load(open('citeulike_collection/users/' + user + '.json'))) paper_novelties = [] for paper in stat['papers']: title = paper['title'] paper_novelties += [1.0 / float(all_paper_titles[paper['title']])] nov = 0 if len(paper_novelties) > 0: nov = numpy.mean(paper_novelties) by_user[user]['collection_novelty'] = nov print nov, paper_novelties
def add_collection_novelty():#based on title all_paper_titles = {}#average novelty is for user in any_user: print user stat = download_and_save_users.citulike_user_object_to_stats(json.load(open('citeulike_collection/users/'+user+'.json'))) for paper in stat['papers']: title = paper['title'],paper
def add_collection_novelty(): #based on title all_paper_titles = {} #average novelty is for user in any_user: print user stat = download_and_save_users.citulike_user_object_to_stats( json.load(open('citeulike_collection/users/' + user + '.json'))) for paper in stat['papers']: title = paper['title'], paper
def generate_collected_csv(): for user in any_user: # print "w user",user stat = download_and_save_users.citulike_user_object_to_stats(json.load(open('citeulike_collection/users/'+user+'.json'))) # print stat out_row = [user[:-1]] for set in ['rank','log','web','ice']: if any_user[user][set]: out_row.append(set) out_row.append(stat['collected']) csv_out.writerow(out_row)
def generate_collected_csv(): for user in any_user: # print "w user",user stat = download_and_save_users.citulike_user_object_to_stats( json.load(open('citeulike_collection/users/' + user + '.json'))) # print stat out_row = [user[:-1]] for set in ['rank', 'log', 'web', 'ice']: if any_user[user][set]: out_row.append(set) out_row.append(stat['collected']) csv_out.writerow(out_row)
def add_collection_novelty():#based on title all_paper_titles = {}#average novelty is for user in any_user: print user stat = download_and_save_users.citulike_user_object_to_stats(json.load(open('citeulike_collection/users/'+user+'.json'))) for paper in stat['papers']: title = paper['title'] if not title in all_paper_titles: all_paper_titles[title] = 1 else: all_paper_titles[title] += 1 for user in any_user: print user stat = download_and_save_users.citulike_user_object_to_stats(json.load(open('citeulike_collection/users/'+user+'.json'))) paper_novelties = [] for paper in stat['papers']: title = paper['title'] paper_novelties += [1.0/ float(all_paper_titles[paper['title']])] nov = 0; if len(paper_novelties) > 0: nov = numpy.mean(paper_novelties) by_user[user]['collection_novelty'] = nov print nov,paper_novelties
def compute_collection_metrics(): for user in any_user: print "Computing collected for...", user stat = download_and_save_users.citulike_user_object_to_stats( json.load(open('citeulike_collection/users/' + user + '.json'))) out_row = [user[:-1]] #[user]#[user[:-1]] for set in ['rank', 'log', 'web', 'ice']: if any_user[user][set]: out_row.append(set) out_row.append(stat['collected']) out_row.append(user[len(user) - 1]) #out_row.append() keywords = {} for paper in stat['papers']: if "keywords" in paper: print "paper keywords", paper['keywords'] for keyword in paper['keywords']: keywords[keyword] = 1 by_user[user]['keyword_variety'] = len(keywords.keys()) print "out row", out_row for i in range(len(collection_metrics)): by_user[user][collection_metrics[i]] = out_row[i] collected_depths = [] for paper in stat['papers']: print paper['title'] #gbgbgbgbgb if paper['title'] in paper_title_to_depth: print paper_title_to_depth[paper['title']] collected_depths += [paper_title_to_depth[paper['title']]] else: title = "nonenone" if ":" in paper['title']: title = paper['title'].split(":")[0] if title in paper_title_to_depth: print paper_title_to_depth[title] collected_depths += [paper_title_to_depth[title]] else: print "???" if len(collected_depths) > 0: by_user[user]['collected_depth'] = numpy.mean(collected_depths) else: by_user[user]['collected_depth'] = 0
def compute_collection_metrics(): for user in any_user: print "Computing collected for...",user stat = download_and_save_users.citulike_user_object_to_stats(json.load(open('citeulike_collection/users/'+user+'.json'))) out_row = [user[:-1]]#[user]#[user[:-1]] for set in ['rank','log','web','ice']: if any_user[user][set]: out_row.append(set) out_row.append(stat['collected']) out_row.append(user[len(user)-1]) #out_row.append() keywords = {} for paper in stat['papers']: if "keywords" in paper: print "paper keywords",paper['keywords'] for keyword in paper['keywords']: keywords[keyword] = 1 by_user[user]['keyword_variety'] = len(keywords.keys()) print "out row",out_row for i in range(len(collection_metrics)): by_user[user][collection_metrics[i]] = out_row[i] collected_depths = [] for paper in stat['papers']: print paper['title']#gbgbgbgbgb if paper['title'] in paper_title_to_depth: print paper_title_to_depth[paper['title']] collected_depths += [paper_title_to_depth[paper['title']]] else: title = "nonenone" if ":" in paper['title']: title = paper['title'].split(":")[0] if title in paper_title_to_depth: print paper_title_to_depth[title] collected_depths += [paper_title_to_depth[title]] else: print "???" if len(collected_depths) > 0: by_user[user]['collected_depth'] = numpy.mean( collected_depths ) else: by_user[user]['collected_depth'] = 0