def generate_output(folder_name): measure1 = {} tty_names = load.loadLocNames() visitation_sum = '' visitation_avg = '' each_gang = '' series_rival = [] series_nonrival = [] with open('data/' + my.DATA_FOLDER + 'metrics/' + folder_name + '/' + 'visit_sets.json', 'rb') as fp1: measure1 = anyjson.deserialize(fp1.read()) for gang_id in measure1: if not (len(measure1[gang_id]['rival']) == 0 and len(measure1[gang_id]['nonrival']) == 0): visitation_sum += tty_names[int(gang_id)] + ',' + str(sum(measure1[gang_id]['rival'])) + ', ' + str(sum(measure1[gang_id]['nonrival'])) + '\n' visitation_avg += tty_names[int(gang_id)] + ',' + str(0 if len(measure1[gang_id]['rival']) == 0 else round(sum(measure1[gang_id]['rival'])/float(len(measure1[gang_id]['rival'])), 5)) + ', ' + str(0 if len(measure1[gang_id]['nonrival']) == 0 else round(sum(measure1[gang_id]['nonrival'])/float(len(measure1[gang_id]['nonrival'])), 5) ) + '\n' each_gang += "name = '" + tty_names[int(gang_id)] + "'\n" each_gang += 'rival = ' + arr_to_str(measure1[gang_id]['rival']) + '\n' each_gang += 'nonrival = ' + arr_to_str(measure1[gang_id]['nonrival']) + '\n\n' series_rival += measure1[gang_id]['rival'] series_nonrival += measure1[gang_id]['nonrival'] visit_series = 'rival = ' + arr_to_str(series_rival) + '\n' + 'nonrival = ' + arr_to_str(series_nonrival) with open('data/' + my.DATA_FOLDER + 'metrics/' + folder_name + '/' + 'visitation_sum' + '.csv', 'wb') as fp: fp.write(visitation_sum) with open('data/' + my.DATA_FOLDER + 'metrics/' + folder_name + '/' + 'visitation_avg' + '.csv', 'wb') as fp: fp.write(visitation_avg) with open('data/' + my.DATA_FOLDER + 'metrics/' + folder_name + '/' + 'each_gang' + '.txt', 'wb') as fp: fp.write(each_gang) with open('data/' + my.DATA_FOLDER + 'metrics/' + folder_name + '/' + 'visit_series' + '.txt', 'wb') as fp: fp.write(visit_series)
def generate_charts(folder_name): measure1 = {} tty_names = load.loadLocNames() names = [] sum_rival = [] sum_nonrival = [] avg_rival = [] avg_nonrival = [] with open('data/' + my.DATA_FOLDER + 'metrics/' + folder_name + '/' + 'visit_sets.json', 'rb') as fp1: measure1 = anyjson.deserialize(fp1.read()) for gang_id in measure1: if not (len(measure1[gang_id]['rival']) == 0 and len(measure1[gang_id]['nonrival']) == 0): names.append(str(tty_names[int(gang_id)].replace('_', ' '))) sum_rival.append(round(sum(measure1[gang_id]['rival']), 5)) sum_nonrival.append(round(sum(measure1[gang_id]['nonrival']),5)) avg_rival.append(0 if len(measure1[gang_id]['rival']) == 0 else round(sum(measure1[gang_id]['rival'])/float(len(measure1[gang_id]['rival'])), 5)) avg_nonrival.append(0 if len(measure1[gang_id]['nonrival']) == 0 else round(sum(measure1[gang_id]['nonrival'])/float(len(measure1[gang_id]['nonrival'])), 5)) '''print names print sum_rival print sum_nonrival print avg_rival print avg_nonrival''' plot_visits_chart(names, sum_rival, sum_nonrival, folder_name, 'visits_sum.png') plot_visits_chart(names, avg_rival, avg_nonrival, folder_name, 'visits_avg.png')
def see_rivalry_mat(): # See Rivalry matrix tty_names = load.loadLocNames() x = ['GANG NAME', '#'] x.extend(range(23,55)) print '%20s - %2s: %3s %3s %3s %3s %3s %3s %3s %3s %3s %3s %3s %3s %3s %3s %3s %3s %3s %3s %3s %3s %3s %3s %3s %3s %3s %3s %3s %3s %3s %3s %3s %3s' % tuple(x) for gang_id in range(23,55): x = [tty_names[gang_id], gang_id] y = ['X' if rival_id in my.HBK_GANG_AND_RIVAL_IDS[gang_id] else '.' for rival_id in range(23,55)] x.extend(y) print '%20s - %2s: %3s %3s %3s %3s %3s %3s %3s %3s %3s %3s %3s %3s %3s %3s %3s %3s %3s %3s %3s %3s %3s %3s %3s %3s %3s %3s %3s %3s %3s %3s %3s %3s' % tuple(x)
def see_visit_mat(): # See Visit matrix tty_polys, hbk_poly = load.loadLocPoly() hbk_all_tweets = load.loadAllTweets() hbk_user_home_loc = load.loadAllHomeLoc(hbk_poly) hbk_users_in_gang_t = load.loadUsersInGangTty(tty_polys, hbk_user_home_loc) tty_names = load.loadLocNames() visit_mat = calc.calcVisitationMat(hbk_all_tweets, tty_polys, hbk_users_in_gang_t) x = ['GANG NAME', '#'] x.extend(range(23,55)) print '%20s - %2s: %4s %4s %4s %4s %4s %4s %4s %4s %4s %4s %4s %4s %4s %4s %4s %4s %4s %4s %4s %4s %4s %4s %4s %4s %4s %4s %4s %4s %4s %4s %4s %4s' % tuple(x) for gang_id in visit_mat: x = [tty_names[gang_id], gang_id] y = dict([(to_id, visit_mat[gang_id][to_id]) if gang_id != to_id else (to_id, 0) for to_id in visit_mat[gang_id]]) y = dict([(to_id, y[to_id]) if y[to_id] != 0 else (to_id, '.') for to_id in y]) y = [str(y[to_id])+'r' if to_id in my.HBK_GANG_AND_RIVAL_IDS[gang_id] and y[to_id] !='.' else y[to_id] for to_id in y] x.extend(y) print '%20s - %2s: %4s %4s %4s %4s %4s %4s %4s %4s %4s %4s %4s %4s %4s %4s %4s %4s %4s %4s %4s %4s %4s %4s %4s %4s %4s %4s %4s %4s %4s %4s %4s %4s' % tuple(x)
def see_gang_tweet_counts(): # See each gang's tweet count tty_polys, hbk_poly = load.loadLocPoly() tty_names = load.loadLocNames() hbk_all_tweets = load.loadAllTweets() hbk_user_home_loc = load.loadAllHomeLoc(hbk_poly) hbk_users_in_gang_t = load.loadUsersInGangTty(tty_polys, hbk_user_home_loc) # read each gang's tweet count hbk_tweets_by_gang = {} print 'Finding tweet count by each gang...' for gang_id in hbk_users_in_gang_t: hbk_tweets_by_gang[gang_id] = len(prep.keepUserIds(hbk_all_tweets, hbk_users_in_gang_t[gang_id])) print 'Each gang\'s tweet count: %s' % hbk_tweets_by_gang print '%2s %15s %5s %5s %8s %6s' % ('ID', 'NAME', '#TWs', '#USERs', '#RIVALs', 'TW/USR') for gang_id in hbk_tweets_by_gang: if hbk_tweets_by_gang[gang_id] != 0: print '%2s %15s %5s %5s %8s %6s' % (gang_id, tty_names[gang_id], hbk_tweets_by_gang[gang_id], len(hbk_users_in_gang_t[gang_id]), len(my.HBK_GANG_AND_RIVAL_IDS[gang_id]), int(hbk_tweets_by_gang[gang_id]/float(len(hbk_users_in_gang_t[gang_id])))) print 'Total number of users: %s' % sum([len(hbk_users_in_gang_t[gang_id]) for gang_id in hbk_tweets_by_gang if hbk_tweets_by_gang[gang_id] != 0]) print 'Total tweets from all users: %s' % sum([hbk_tweets_by_gang[gang_id] for gang_id in hbk_tweets_by_gang])
def see_rivalry_list(): # See Rivalry list tty_polys, hbk_poly = load.loadLocPoly() hbk_all_tweets = load.loadAllTweets() hbk_user_home_loc = load.loadAllHomeLoc(hbk_poly) hbk_users_in_gang_t = load.loadUsersInGangTty(tty_polys, hbk_user_home_loc) tty_names = load.loadLocNames() visit_mat_1 = calc.calcVisitationMat(hbk_all_tweets, tty_polys, hbk_users_in_gang_t) dist_norm = calc.calcDistNormCDF() visit_mat = calc.calcVisitationMat(hbk_all_tweets, tty_polys, hbk_users_in_gang_t, dist_norm, hbk_user_home_loc) rivalry_list = {} for gang_id in my.HBK_GANG_AND_RIVAL_IDS: for rival_id in [to_id for to_id in my.HBK_GANG_ID_LIST if to_id != gang_id]: if visit_mat_1[gang_id][rival_id] >= 5 and str(gang_id)+str(rival_id) not in rivalry_list and str(rival_id)+str(gang_id) not in rivalry_list: this_row = [gang_id, tty_names[gang_id], rival_id, tty_names[rival_id], \ int(visit_mat[gang_id][rival_id]), \ int(visit_mat[rival_id][gang_id])] this_row.append('rival') if rival_id in my.HBK_GANG_AND_RIVAL_IDS[gang_id] else this_row.append('nonrival') affinity = round(1.0/abs(visit_mat[gang_id][rival_id]-visit_mat[rival_id][gang_id]), 3) if visit_mat[gang_id][rival_id] != visit_mat[rival_id][gang_id] else 0 this_row.append(affinity) this_row.append(int((visit_mat[gang_id][rival_id]+visit_mat[rival_id][gang_id])/2)) rivalry_list[str(gang_id)+str(rival_id)] = this_row rivalry_list = rivalry_list.values() rivals = [row for row in rivalry_list if row[6] == 'rival'] nonrivals = [row for row in rivalry_list if row[6] == 'nonrival'] rivalry_list = rivals + nonrivals val = ['A#', 'GANG A', 'B#', 'GANG B', 'A>B', 'B>A', 'RnR', 'Affinity', 'AvgTw'] print '%2s %20s => %2s %20s \t %4s \t %4s \t %8s \t %8s \t %5s' % tuple(val) for val in rivalry_list: print '%2s %20s => %2s %20s \t %4s \t %4s \t %8s \t %8s \t %5s' % tuple(val)