示例#1
0
def trim_near_lines():
# Trim tweets near border lines
	with open('data/' + my.DATA_FOLDER  + 'json/' + 'border_lines.json', 'rb') as fp1:
		border_lines = anyjson.loads(fp1.read())
	hbk_all_tweets = load.loadAllTweets()
	hbk_trimmed_tweets = []
	border_points = []

	for tweet in hbk_all_tweets:
		is_near = False
		for line in border_lines:
			if isNear([tweet[1], tweet[2]], line):
				is_near = True
				break

		if is_near:
			border_points.append([tweet[1], tweet[2]])
		else:
			hbk_trimmed_tweets.append(tweet)

	print 'After trimming: %s. %s removed.' % (len(hbk_trimmed_tweets), len(border_points))
	
	# replace old tweet list with trimmed list
	print 'Replacing old set of tweets...'
	with open('data/' + my.DATA_FOLDER + my.HBK_TWEET_LOC_FILE, 'wb') as fp1:
		csv_writer = csv.writer(fp1, delimiter=',')
		for tweet in hbk_trimmed_tweets:
			csv_writer.writerow(tweet)
	print str(len(hbk_trimmed_tweets)) + ' total instances written.'

	# write border tweets
	with open('data/' + my.DATA_FOLDER  + 'json/' + 'border_points.json', 'wb') as fp1:
		fp1.write(anyjson.dumps(border_points))
示例#2
0
def trim_low_tweet_gangs():
# Trim gang tweets with low non-home tweet counts
	tty_polys, hbk_poly = load.loadLocPoly()
	hbk_all_tweets = load.loadAllTweets()
	hbk_user_home_loc = load.loadAllHomeLoc(hbk_poly)
	hbk_users_in_gang_t = load.loadUsersInGangTty(tty_polys, hbk_user_home_loc)

	print 'Trimming tweets by low tweeting gangs...'
	# read each gang's tweet count
	hbk_tweets_by_gang = {}
	for gang_id in hbk_users_in_gang_t:
		this_gang_tweets = keepUserIds(hbk_all_tweets, hbk_users_in_gang_t[gang_id])
		hbk_tweets_by_gang[gang_id] = 0
		for foreign_id in my.HBK_GANG_ID_LIST:
			if gang_id != foreign_id:
				hbk_tweets_by_gang[gang_id] += len(keepPolygon(this_gang_tweets, tty_polys[foreign_id]))
	print 'Each gang\'s tweet count: %s' % hbk_tweets_by_gang

	print 'Removing users from gangs: %s' % [gang_id for gang_id in hbk_tweets_by_gang if hbk_tweets_by_gang[gang_id] < my.MIN_NON_HOME_TWEETS]
	remove_user_list = []
	for gang_id in hbk_tweets_by_gang:
		if hbk_tweets_by_gang[gang_id] < my.MIN_NON_HOME_TWEETS:
			remove_user_list += hbk_users_in_gang_t[gang_id]
	hbk_trimmed_tweets = removeUserIds(hbk_all_tweets, remove_user_list)

	# replace old tweet list with trimmed list
	print 'Replacing old set of tweets...'
	with open('data/' + my.DATA_FOLDER + my.HBK_TWEET_LOC_FILE, 'wb') as fp1:
		csv_writer = csv.writer(fp1, delimiter=',')
		for tweet in hbk_trimmed_tweets:
			csv_writer.writerow(tweet)
	print str(len(hbk_trimmed_tweets)) + ' total instances written.'
示例#3
0
def generate_gang_tweet_counts():
# Generate each gang's tweet count
	tty_polys, hbk_poly = load.loadLocPoly()
	hbk_all_tweets = load.loadAllTweets()
	hbk_user_home_loc = load.loadAllHomeLoc(hbk_poly)
	hbk_users_in_gang_t = load.loadUsersInGangTty(tty_polys, hbk_user_home_loc)

	# read each gang's tweet count
	hbk_tweets_by_gang = {}
	print 'Finding tweet count by each gang...'
	for gang_id in hbk_users_in_gang_t:
		#hbk_tweets_by_gang[gang_id] = len(prep.keepUserIds(hbk_all_tweets, hbk_users_in_gang_t[gang_id]))
		#hbk_tweets_by_gang[gang_id] = len(prep.removePolygon(prep.keepUserIds(hbk_all_tweets, hbk_users_in_gang_t[gang_id]), tty_polys[gang_id]))

		this_gang_tweets = prep.keepUserIds(hbk_all_tweets, hbk_users_in_gang_t[gang_id])
		hbk_tweets_by_gang[gang_id] = 0
		for foreign_id in my.HBK_GANG_ID_LIST:
			if gang_id != foreign_id:
				hbk_tweets_by_gang[gang_id] += len(prep.keepPolygon(this_gang_tweets, tty_polys[foreign_id]))
	print 'Each gang\'s tweet count: %s' % hbk_tweets_by_gang

	if not os.path.exists('data/' + my.DATA_FOLDER + 'json/'):
		os.makedirs('data/' + my.DATA_FOLDER + 'json/')
	with open('data/' + my.DATA_FOLDER  + 'json/' + 'gang_tweet_counts.json', 'wb') as fp1:
		fp1.write(anyjson.dumps(hbk_tweets_by_gang))
示例#4
0
def trim_non_gang_users():
# trim all non gang users in data
	tty_polys, hbk_poly = load.loadLocPoly()
	hbk_all_tweets = load.loadAllTweets()
	hbk_user_home_loc = load.loadAllHomeLoc(hbk_poly)
	hbk_users_in_gang_t = load.loadUsersInGangTty(tty_polys, hbk_user_home_loc)

	hbk_trimmed_tweets = []

	# tweet list with home clusters removed
	print 'Removing non gang user tweets...'

	gang_user_list = []
	for gang_id in hbk_users_in_gang_t:
		gang_user_list += hbk_users_in_gang_t[gang_id]
		
	hbk_trimmed_tweets = keepUserIds(hbk_all_tweets, gang_user_list)
	print str(len(hbk_trimmed_tweets)) + ' instances after non gang users removed.'

	# replace old tweet list with trimmed list
	print 'Replacing old set of tweets...'
	with open('data/' + my.DATA_FOLDER + my.HBK_TWEET_LOC_FILE, 'wb') as fp1:
		csv_writer = csv.writer(fp1, delimiter=',')
		for tweet in hbk_trimmed_tweets:
			csv_writer.writerow(tweet)
	print str(len(hbk_trimmed_tweets)) + ' total instances written.'
示例#5
0
def calc_visit_sets():
# Calculate all visit sets
	tty_polys, hbk_poly = load.loadLocPoly()
	hbk_all_tweets = load.loadAllTweets()
	hbk_user_home_loc = load.loadAllHomeLoc(hbk_poly)
	hbk_users_in_gang_t = load.loadUsersInGangTty(tty_polys, hbk_user_home_loc)

	# Visit matrix
	visit_mat = calcVisitationMat(hbk_all_tweets, tty_polys, hbk_users_in_gang_t)

	# Norm vectors
	non_home_norm = calcNonHomeNorm(visit_mat)
	tw_freq_norm = calcTwFreqNorm(visit_mat)
	#dist_norm = calcDistNorm()		# Different distance norm functions
	dist_norm = calcDistNormCDF()

	# Metrics- No normalization (absolute fractions)
	store_visit_set_output(calc_visit_sets_from_visit_mat(visit_mat), 'no_norm')

	# Metrics- Non-home normalized (TO-tty normalized)
	normalized_visit_mat = apply_non_home_norm(visit_mat, non_home_norm)
	store_visit_set_output(calc_visit_sets_from_visit_mat(normalized_visit_mat), 'non_home_norm')

	# Metrics- Tweet freq normalized (FROM-tty normalized)
	normalized_visit_mat = apply_tw_freq_norm(visit_mat, tw_freq_norm)
	store_visit_set_output(calc_visit_sets_from_visit_mat(normalized_visit_mat), 'tw_freq_norm')

	# Metrics- Distance normalized
	visit_mat_dist_norm = calcVisitationMat(hbk_all_tweets, tty_polys, hbk_users_in_gang_t, dist_norm, hbk_user_home_loc)
	store_visit_set_output(calc_visit_sets_from_visit_mat(visit_mat_dist_norm), 'dist_norm')

	# Metrics- Rival count normalized
	normalized_visit_mat = apply_rivals_norm(visit_mat)
	store_visit_set_output(calc_visit_sets_from_visit_mat(normalized_visit_mat), 'rivals_norm')

	# Metrics- Distance + Non-home norm
	normalized_visit_mat = apply_non_home_norm(visit_mat_dist_norm, non_home_norm)
	store_visit_set_output(calc_visit_sets_from_visit_mat(normalized_visit_mat), 'dist__non_home')

	# Metrics- Distance + Tweet freq norm
	normalized_visit_mat = apply_tw_freq_norm(visit_mat_dist_norm, tw_freq_norm)
	store_visit_set_output(calc_visit_sets_from_visit_mat(normalized_visit_mat), 'dist__tw_freq')

	# Metrics- Distance + Rivals normalized
	normalized_visit_mat = apply_rivals_norm(visit_mat_dist_norm)
	store_visit_set_output(calc_visit_sets_from_visit_mat(normalized_visit_mat), 'dist__rivals')

	# Metrics- Distance + Tweet freq + Non-home norm
	normalized_visit_mat = apply_tw_freq_norm(visit_mat_dist_norm, tw_freq_norm)
	normalized_visit_mat = apply_non_home_norm(normalized_visit_mat, non_home_norm)
	store_visit_set_output(calc_visit_sets_from_visit_mat(normalized_visit_mat), 'dist__tw_freq__non_home')
	# Metrics- Distance + Tweet freq + Non-home + Rivals norm
	normalized_visit_mat = apply_rivals_norm(normalized_visit_mat)
	store_visit_set_output(calc_visit_sets_from_visit_mat(normalized_visit_mat), 'dist__tw_freq__non_home__rivals')
示例#6
0
def generate_visit_mat():
# Generate visit matrix json
	tty_polys, hbk_poly = load.loadLocPoly()
	hbk_all_tweets = load.loadAllTweets()
	hbk_user_home_loc = load.loadAllHomeLoc(hbk_poly)
	hbk_users_in_gang_t = load.loadUsersInGangTty(tty_polys, hbk_user_home_loc)
	visit_mat = calc.calcVisitationMat(hbk_all_tweets, tty_polys, hbk_users_in_gang_t)

	if not os.path.exists('data/' + my.DATA_FOLDER + 'json/'):
		os.makedirs('data/' + my.DATA_FOLDER + 'json/')
	with open('data/' + my.DATA_FOLDER  + 'json/' + 'visit_matrix.json', 'wb') as fp1:
		fp1.write(anyjson.dumps(visit_mat))
示例#7
0
def test():

	'''dist_norm = calc.calcDistNormCDF()
	for k in dist_norm:
		print '%s, %s' % (k, dist_norm[k])'''
	tty_polys, hbk_poly = load.loadLocPoly()
	hbk_all_tweets = load.loadAllTweets()
	hbk_user_home_loc = load.loadAllHomeLoc(hbk_poly)
	hbk_users_in_gang_t = load.loadUsersInGangTty(tty_polys, hbk_user_home_loc)
	user_list = reduce(lambda x,y: x+y, hbk_users_in_gang_t.values())

	with open('data/' + my.DATA_FOLDER  + 'hbk_final_users.csv', 'wb') as fp1:
		csv_writer = csv.writer(fp1, delimiter=',')
		for user in user_list:
			csv_writer.writerow([user])
示例#8
0
def calc_rival_nonrival_matrics_dist_norm():
	tty_polys, hbk_poly = load.loadLocPoly()
	hbk_all_tweets = load.loadAllTweets()
	hbk_user_home_loc = load.loadAllHomeLoc(hbk_poly)
	hbk_users_in_gang_t = load.loadUsersInGangTty(tty_polys, hbk_user_home_loc)
	# Different distance norm functions
	#dist_norm = calcDistNorm()
	dist_norm = calcDistNormCDF()

	visit_mat = calcVisitationMat(hbk_all_tweets, tty_polys, hbk_users_in_gang_t, dist_norm, hbk_user_home_loc)
	#print visit_mat
	norm = calcNorm(calcVisitationMat(hbk_all_tweets, tty_polys, hbk_users_in_gang_t))

	measure1 = {}
	measure2 = {}
	
	for gang_id in my.HBK_GANG_AND_RIVAL_IDS:
		measure1[gang_id] = {
			'rival' : [],
			'nonrival' : []
			}
		measure2[gang_id] = {
			'rival' : [],
			'nonrival' : []
			}

		non_home_sum = sum(visit_mat[gang_id].values()) - visit_mat[gang_id][gang_id]

		for rival_id in my.HBK_GANG_AND_RIVAL_IDS[gang_id]:
			if gang_id != rival_id and visit_mat[gang_id][rival_id] != 0:
				frac = visit_mat[gang_id][rival_id]/float(non_home_sum)
				measure1[gang_id]['rival'].append(round(frac, 5))
				measure2[gang_id]['rival'].append(round(frac/norm[rival_id], 5))

		for non_rival_id in my.HBK_GANG_ID_LIST:
			if gang_id != non_rival_id and non_rival_id not in my.HBK_GANG_AND_RIVAL_IDS[gang_id]:
				if visit_mat[gang_id][non_rival_id] != 0 and norm[non_rival_id] != 0:
					frac = visit_mat[gang_id][non_rival_id]/float(non_home_sum)
					measure1[gang_id]['nonrival'].append(round(frac, 5))
					measure2[gang_id]['nonrival'].append(round(frac/norm[non_rival_id], 5))

	# Store metrics
	if not os.path.exists('data/' + my.DATA_FOLDER + 'metrics_dist-norm/'):
		os.makedirs('data/' + my.DATA_FOLDER + 'metrics_dist-norm/')
	with open('data/' + my.DATA_FOLDER + 'metrics_dist-norm/' + 'measure1.json', 'wb') as fp2:
		fp2.write(anyjson.serialize(measure1))
	with open('data/' + my.DATA_FOLDER + 'metrics_dist-norm/' + 'measure2.json', 'wb') as fp2:
		fp2.write(anyjson.serialize(measure2))
示例#9
0
def trim_inside_pols():
# Trim public/social location	polygons
	with open('data/' + my.DATA_FOLDER  + 'json/' + 'public_pols.json', 'rb') as fp1:
		public_pols = anyjson.loads(fp1.read())
	hbk_all_tweets = load.loadAllTweets()

	for pol in public_pols:
		hbk_all_tweets = removePolygon(hbk_all_tweets, pol)
	
	# replace old tweet list with trimmed list
	print 'Replacing old set of tweets...'
	with open('data/' + my.DATA_FOLDER + my.HBK_TWEET_LOC_FILE, 'wb') as fp1:
		csv_writer = csv.writer(fp1, delimiter=',')
		for tweet in hbk_all_tweets:
			csv_writer.writerow(tweet)
	print str(len(hbk_all_tweets)) + ' total instances written.'
示例#10
0
def see_visit_mat():
# See Visit matrix
	tty_polys, hbk_poly = load.loadLocPoly()
	hbk_all_tweets = load.loadAllTweets()
	hbk_user_home_loc = load.loadAllHomeLoc(hbk_poly)
	hbk_users_in_gang_t = load.loadUsersInGangTty(tty_polys, hbk_user_home_loc)
	tty_names = load.loadLocNames()

	visit_mat = calc.calcVisitationMat(hbk_all_tweets, tty_polys, hbk_users_in_gang_t)

	x = ['GANG NAME', '#']
	x.extend(range(23,55))
	print '%20s - %2s: %4s %4s %4s %4s %4s %4s %4s %4s %4s %4s %4s %4s %4s %4s %4s %4s %4s %4s %4s %4s %4s %4s %4s %4s %4s %4s %4s %4s %4s %4s %4s %4s' % tuple(x)
	for gang_id in visit_mat:
		x = [tty_names[gang_id], gang_id]
		y = dict([(to_id, visit_mat[gang_id][to_id]) if gang_id != to_id else (to_id, 0) for to_id in visit_mat[gang_id]])
		y = dict([(to_id, y[to_id]) if y[to_id] != 0 else (to_id, '.') for to_id in y])
		y = [str(y[to_id])+'r' if to_id in my.HBK_GANG_AND_RIVAL_IDS[gang_id] and y[to_id] !='.' else y[to_id] for to_id in y]
		x.extend(y)
		print '%20s - %2s: %4s %4s %4s %4s %4s %4s %4s %4s %4s %4s %4s %4s %4s %4s %4s %4s %4s %4s %4s %4s %4s %4s %4s %4s %4s %4s %4s %4s %4s %4s %4s %4s' % tuple(x)
示例#11
0
def trim_home_clusters():
# trim all home clusters
	_, hbk_poly = load.loadLocPoly()
	hbk_all_tweets = load.loadAllTweets()
	hbk_user_home_loc = load.loadAllHomeLoc(hbk_poly)

	hbk_trimmed_tweets = []		# home clusters removed

	# tweet list with home clusters removed
	print 'Removing home clusters...'
	hbk_home_list = dict([(user_home[0], [user_home[1], user_home[2]]) for user_home in hbk_user_home_loc])
	hbk_trimmed_tweets = removeNearPoints(hbk_all_tweets, hbk_home_list, my.HOME_RADIUS)
	print str(len(hbk_trimmed_tweets)) + ' instances after home clusters removed.'

	# replace old tweet list with trimmed list
	print 'Replacing old set of tweets...'
	with open('data/' + my.DATA_FOLDER + my.HBK_TWEET_LOC_FILE, 'wb') as fp1:
		csv_writer = csv.writer(fp1, delimiter=',')
		for tweet in hbk_trimmed_tweets:
			csv_writer.writerow(tweet)
	print str(len(hbk_trimmed_tweets)) + ' total instances written.'
示例#12
0
def see_gang_tweet_counts():
# See each gang's tweet count
	tty_polys, hbk_poly = load.loadLocPoly()
	tty_names = load.loadLocNames()
	hbk_all_tweets = load.loadAllTweets()
	hbk_user_home_loc = load.loadAllHomeLoc(hbk_poly)
	hbk_users_in_gang_t = load.loadUsersInGangTty(tty_polys, hbk_user_home_loc)

	# read each gang's tweet count
	hbk_tweets_by_gang = {}
	print 'Finding tweet count by each gang...'
	for gang_id in hbk_users_in_gang_t:
		hbk_tweets_by_gang[gang_id] = len(prep.keepUserIds(hbk_all_tweets, hbk_users_in_gang_t[gang_id]))
	print 'Each gang\'s tweet count: %s' % hbk_tweets_by_gang
	print '%2s %15s %5s %5s %8s %6s' % ('ID', 'NAME', '#TWs', '#USERs', '#RIVALs', 'TW/USR')
	for gang_id in hbk_tweets_by_gang:
		if hbk_tweets_by_gang[gang_id] != 0:
			print '%2s %15s %5s %5s %8s %6s' % (gang_id, tty_names[gang_id], hbk_tweets_by_gang[gang_id], len(hbk_users_in_gang_t[gang_id]), len(my.HBK_GANG_AND_RIVAL_IDS[gang_id]), int(hbk_tweets_by_gang[gang_id]/float(len(hbk_users_in_gang_t[gang_id]))))

	print 'Total number of users: %s' % sum([len(hbk_users_in_gang_t[gang_id]) for gang_id in hbk_tweets_by_gang if hbk_tweets_by_gang[gang_id] != 0])
	print 'Total tweets from all users: %s' % sum([hbk_tweets_by_gang[gang_id] for gang_id in hbk_tweets_by_gang])
示例#13
0
def generate_gang_locs_json():
# Generate each gang's locations json

	tty_polys, hbk_poly = load.loadLocPoly()
	hbk_all_tweets = load.loadAllTweets()
	hbk_user_home_loc = load.loadAllHomeLoc(hbk_poly)
	hbk_users_in_gang_t = load.loadUsersInGangTty(tty_polys, hbk_user_home_loc)

	# trim each gang's tweets
	hbk_tweets_by_gang = {}
	print 'Finding tweets by each gang...'
	for gang_id in my.HBK_GANG_ID_LIST:
		this_gang_tweets = prep.keepUserIds(hbk_all_tweets, hbk_users_in_gang_t[gang_id]) if gang_id in hbk_users_in_gang_t else []
		hbk_tweets_by_gang[gang_id] = [[tweet[1], tweet[2]] for tweet in this_gang_tweets]
	print 'Each gang\'s tweet count: %s' % dict([(gang_id, len(hbk_tweets_by_gang[gang_id])) for gang_id in hbk_tweets_by_gang])
	print 'Total tweets = %s' % (sum([len(hbk_tweets_by_gang[gang_id]) for gang_id in hbk_tweets_by_gang]))

	if not os.path.exists('data/' + my.DATA_FOLDER + 'json/'):
		os.makedirs('data/' + my.DATA_FOLDER + 'json/')
	with open('data/' + my.DATA_FOLDER  + 'json/' + 'gang_tweet_locs.json', 'wb') as fp1:
		fp1.write(anyjson.dumps(hbk_tweets_by_gang))
示例#14
0
def see_rivalry_list():
# See Rivalry list
	tty_polys, hbk_poly = load.loadLocPoly()
	hbk_all_tweets = load.loadAllTweets()
	hbk_user_home_loc = load.loadAllHomeLoc(hbk_poly)
	hbk_users_in_gang_t = load.loadUsersInGangTty(tty_polys, hbk_user_home_loc)
	tty_names = load.loadLocNames()
	visit_mat_1 = calc.calcVisitationMat(hbk_all_tweets, tty_polys, hbk_users_in_gang_t)
	dist_norm = calc.calcDistNormCDF()
	visit_mat = calc.calcVisitationMat(hbk_all_tweets, tty_polys, hbk_users_in_gang_t, dist_norm, hbk_user_home_loc)

	rivalry_list = {}

	for gang_id in my.HBK_GANG_AND_RIVAL_IDS:
		for rival_id in [to_id for to_id in my.HBK_GANG_ID_LIST if to_id != gang_id]:
			if visit_mat_1[gang_id][rival_id] >= 5 and str(gang_id)+str(rival_id) not in rivalry_list and str(rival_id)+str(gang_id) not in rivalry_list:
				this_row = [gang_id, tty_names[gang_id], rival_id, tty_names[rival_id], \
					int(visit_mat[gang_id][rival_id]), \
					int(visit_mat[rival_id][gang_id])]

				this_row.append('rival') if rival_id in my.HBK_GANG_AND_RIVAL_IDS[gang_id] else this_row.append('nonrival')

				affinity = round(1.0/abs(visit_mat[gang_id][rival_id]-visit_mat[rival_id][gang_id]), 3) if visit_mat[gang_id][rival_id] != visit_mat[rival_id][gang_id] else 0
				this_row.append(affinity)
				
				this_row.append(int((visit_mat[gang_id][rival_id]+visit_mat[rival_id][gang_id])/2))

				rivalry_list[str(gang_id)+str(rival_id)] = this_row

	rivalry_list = rivalry_list.values()
	rivals = [row for row in rivalry_list if row[6] == 'rival']
	nonrivals = [row for row in rivalry_list if row[6] == 'nonrival']
	rivalry_list = rivals + nonrivals

	val = ['A#', 'GANG A', 'B#', 'GANG B', 'A>B', 'B>A', 'RnR', 'Affinity', 'AvgTw']
	print '%2s %20s => %2s %20s \t %4s \t %4s \t %8s \t %8s \t %5s' % tuple(val)
	for val in rivalry_list:
		print '%2s %20s => %2s %20s \t %4s \t %4s \t %8s \t %8s \t %5s' % tuple(val)
示例#15
0
def trim_low_user_gangs():
# Trim gang tweets for gangs with low members
	tty_polys, hbk_poly = load.loadLocPoly()
	hbk_all_tweets = load.loadAllTweets()
	hbk_user_home_loc = load.loadAllHomeLoc(hbk_poly)
	hbk_users_in_gang_t = load.loadUsersInGangTty(tty_polys, hbk_user_home_loc)
	g_list = dict([(gang_id, len(hbk_users_in_gang_t[gang_id])) for gang_id in hbk_users_in_gang_t])

	print 'Trimming tweets by low user gangs...'
	
	print 'Removing users from gangs: %s' % [gang_id for gang_id in g_list if g_list[gang_id] < my.MIN_GANG_USERS]
	remove_user_list = []
	for gang_id in g_list:
		if g_list[gang_id] < my.MIN_GANG_USERS:
			remove_user_list += hbk_users_in_gang_t[gang_id]
	hbk_trimmed_tweets = removeUserIds(hbk_all_tweets, remove_user_list)

	# replace old tweet list with trimmed list
	print 'Replacing old set of tweets...'
	with open('data/' + my.DATA_FOLDER + my.HBK_TWEET_LOC_FILE, 'wb') as fp1:
		csv_writer = csv.writer(fp1, delimiter=',')
		for tweet in hbk_trimmed_tweets:
			csv_writer.writerow(tweet)
	print str(len(hbk_trimmed_tweets)) + ' total instances written.'
示例#16
0
def calc_rival_nonrival_matrics():
	tty_polys, hbk_poly = load.loadLocPoly()
	hbk_all_tweets = load.loadAllTweets()
	hbk_user_home_loc = load.loadAllHomeLoc(hbk_poly)
	hbk_users_in_gang_t = load.loadUsersInGangTty(tty_polys, hbk_user_home_loc)
	visit_mat = calcVisitationMat(hbk_all_tweets, tty_polys, hbk_users_in_gang_t)
	for gang_id in visit_mat:
		for to_id in visit_mat[gang_id]:
			if visit_mat[gang_id][to_id] != 0:
				print str(gang_id) + ' => ' + str(to_id) + ' : ' + str(visit_mat[gang_id][to_id])
	norm = calcNorm(visit_mat)
	for gang_id in norm:
		print str(gang_id) + ' : ' + str(norm[gang_id])

	measure1 = {}
	measure2 = {}
	#-CH 	measure3 stores [frac, norm] instead of absolute values
	measure3 = {}
	
	for gang_id in my.HBK_GANG_AND_RIVAL_IDS:
		measure1[gang_id] = {
			'rival' : [],
			'nonrival' : []
			}
		measure2[gang_id] = {
			'rival' : [],
			'nonrival' : []
			}
		#-CH
		measure3[gang_id] = {
			'rival' : [],
			'nonrival' : []
			}

		non_home_sum = sum(visit_mat[gang_id].values()) - visit_mat[gang_id][gang_id]

		for rival_id in my.HBK_GANG_AND_RIVAL_IDS[gang_id]:
			if gang_id != rival_id and visit_mat[gang_id][rival_id] != 0:
				frac = visit_mat[gang_id][rival_id]/float(non_home_sum)
				measure1[gang_id]['rival'].append(round(frac, 5))
				measure2[gang_id]['rival'].append(round(frac/norm[rival_id], 5))
				#-CH
				measure3[gang_id]['rival'].append([frac, norm[rival_id]])

		for non_rival_id in my.HBK_GANG_ID_LIST:
			if gang_id != non_rival_id and non_rival_id not in my.HBK_GANG_AND_RIVAL_IDS[gang_id]:
				if visit_mat[gang_id][non_rival_id] != 0 and norm[non_rival_id] != 0:
					frac = visit_mat[gang_id][non_rival_id]/float(non_home_sum)
					measure1[gang_id]['nonrival'].append(round(frac, 5))
					measure2[gang_id]['nonrival'].append(round(frac/norm[non_rival_id], 5))
					#-CH
					measure3[gang_id]['nonrival'].append([frac, norm[non_rival_id]])

	# Store metrics
	if not os.path.exists('data/' + my.DATA_FOLDER + 'metrics/'):
		os.makedirs('data/' + my.DATA_FOLDER + 'metrics/')
	with open('data/' + my.DATA_FOLDER + 'metrics/' + 'measure1.json', 'wb') as fp2:
		fp2.write(anyjson.serialize(measure1))
	with open('data/' + my.DATA_FOLDER + 'metrics/' + 'measure2.json', 'wb') as fp2:
		fp2.write(anyjson.serialize(measure2))
	#-CH
	with open('data/' + my.DATA_FOLDER + 'metrics/' + 'measure3.json', 'wb') as fp2:
		fp2.write(anyjson.serialize(measure3))
示例#17
0
def calcTweetDistances():
	print 'Calculating tweeting distances...'
	_, hbk_poly = load.loadLocPoly()
	hbk_all_tweets = load.loadAllTweets()
	hbk_user_home_loc = load.loadAllHomeLoc(hbk_poly)
	print [i[0] for i in hbk_user_home_loc]