def significant_nei_utm_ids(): output_folder = fld_google_drive_data_analysis%GeneralMethods.get_method_id()+'/%s.png' for i, data in enumerate(FileIO.iterateJsonFromFile(f_significant_nei_utm_ids, remove_params_dict=True)): utm_lat_long = UTMConverter.getLatLongUTMIdInLatLongForm(data['utm_id']) nei_utm_lat_longs = map( lambda nei_utm_id: UTMConverter.getLatLongUTMIdInLatLongForm(nei_utm_id), data['nei_utm_ids'] ) if nei_utm_lat_longs: output_file = output_folder%('%s_%s'%(utm_lat_long)) plotPointsOnWorldMap(nei_utm_lat_longs, blueMarble=False, bkcolor='#CFCFCF', lw = 0, color = '#EA00FF', alpha=1.) _, m = plotPointsOnWorldMap([utm_lat_long], blueMarble=False, bkcolor='#CFCFCF', lw = 0, color = '#2BFF00', s = 40, returnBaseMapObject=True, alpha=1.) for nei_utm_lat_long in nei_utm_lat_longs: m.drawgreatcircle(utm_lat_long[1], utm_lat_long[0], nei_utm_lat_long[1], nei_utm_lat_long[0], color='#FFA600', lw=1.5, alpha=1.0) print 'Saving %s'%(i+1) savefig(output_file)
def temporalLocalityTemporalDistanceExample(lattice=NEW_YORK): distances = defaultdict(dict) for latticeObject in FileIO.iterateJsonFromFile(hashtagsLatticeGraphFile%('training_world','%s_%s'%(2,11))): if latticeObject['id']==lattice: latticeHashtagsSet = set(latticeObject['hashtags']) for neighborLattice, neighborHashtags in latticeObject['links'].iteritems(): distances[neighborLattice] = {} neighborHashtags = filterOutNeighborHashtagsOutside1_5IQROfTemporalDistance(latticeObject['hashtags'], neighborHashtags, findLag=False) neighborHashtagsSet = set(neighborHashtags) distances[neighborLattice]['similarity']=len(latticeHashtagsSet.intersection(neighborHashtagsSet))/float(len(latticeHashtagsSet.union(neighborHashtagsSet))) distances[neighborLattice]['temporalDistance']=np.mean([abs(latticeObject['hashtags'][k][0]-neighborHashtags[k][0]) for k in neighborHashtags if k in latticeObject['hashtags']])/(60.*60.) distances[neighborLattice]['geoDistance']=getHaversineDistanceForLids(latticeObject['id'].replace('_', ' '), neighborLattice.replace('_', ' ')) break dataPoints = [] ax = plt.subplot(111) for k, data in distances.iteritems(): dataPoints.append((getLocationFromLid(k.replace('_', ' ')), data['temporalDistance'])) points, colors = zip(*sorted(dataPoints, key=itemgetter(1))) sc = plotPointsOnWorldMap(points, blueMarble=False, bkcolor='#CFCFCF', cmap='RdPu', c=colors, lw = 0, alpha=1.0) plotPointsOnWorldMap([getLocationFromLid(lattice.replace('_', ' '))], blueMarble=False, bkcolor='#CFCFCF', c='#64FF1C', lw = 0) divider = make_axes_locatable(ax) plt.title('Average time difference from New York') cax = divider.append_axes("right", size="5%", pad=0.05) plt.colorbar(sc, cax=cax) # plt.show() plt.savefig('../images/temporalDistanceExample.png')
def plotLatticeTemporalDistanceInHoursOnMap(latticeGraphType, latticeObject): latticeObject = latticeGraphType['method'](latticeObject) points, colors = zip(*sorted([(getLocationFromLid(neighborId.replace('_', ' ')), val) for neighborId, val in latticeObject['links'].iteritems()], key=itemgetter(1), reverse=True)) cm = matplotlib.cm.get_cmap('autumn') sc = plotPointsOnWorldMap(points, c=colors, cmap=cm, lw = 0, vmin=0) plotPointsOnWorldMap([getLocationFromLid(latticeObject['id'].replace('_', ' '))], c='#00FF00', lw = 0) plt.xlabel(latticeGraphType['title']) plt.colorbar(sc) return sc
def plotLatticeTemporalClosenessScoresOnMap(latticeGraphType, latticeObject): latticeObject = latticeGraphType['method'](latticeObject) LatticeGraph.normalizeNode(latticeObject) points, colors = zip(*sorted([(getLocationFromLid(neighborId.replace('_', ' ')), val) for neighborId, val in latticeObject['links'].iteritems()], key=itemgetter(1))) cm = matplotlib.cm.get_cmap('YlOrRd') sc = plotPointsOnWorldMap(points, c=colors, cmap=cm, lw = 0, vmin=0) plotPointsOnWorldMap([getLocationFromLid(latticeObject['id'].replace('_', ' '))], c='#00FF00', lw = 0) plt.xlabel(latticeGraphType['title']) plt.colorbar(sc) return sc
def probabilisticCoverageModelExample(hashtag, type): MINUTES, timeUnit = 5, 1 print len(CoverageBasedLatticeSelectionModel.lattices) for hashtagObject in FileIO.iterateJsonFromFile('/mnt/chevron/kykamath/data/geo/hashtags/analysis/all_world/2_11/hashtagsWithoutEndingWindow'): if hashtagObject['h']==hashtag: occsDistributionInTimeUnits = getOccurranceDistributionInEpochs(getOccuranesInHighestActiveRegion(hashtagObject), timeUnit=MINUTES*60, fillInGaps=True, occurancesCount=False) occurances = list(zip(*sorted(occsDistributionInTimeUnits.iteritems(), key=itemgetter(0)))[1]) occsInTimeunit = zip(*reduce(lambda aggList, l: aggList+l, occurances[:timeUnit], []))[0] allOccurances = zip(*reduce(lambda aggList, l: aggList+l, occurances, []))[0] if type=='5m': probabilityDistributionForObservedLattices = CoverageBasedLatticeSelectionModel.probabilityDistributionForLattices(occsInTimeunit) else: print getRadius(allOccurances) probabilityDistributionForObservedLattices = CoverageBasedLatticeSelectionModel.probabilityDistributionForLattices(allOccurances) latticeScores = CoverageBasedLatticeSelectionModel.spreadProbability(CoverageBasedLatticeSelectionModel.lattices, probabilityDistributionForObservedLattices) points, colors = zip(*map(lambda t: (getLocationFromLid(t[0].replace('_', ' ')), t[1]), sorted(latticeScores.iteritems(), key=itemgetter(1)))) # print points[0], colors[0] ax = plt.subplot(111) sc = plotPointsOnWorldMap(points, blueMarble=False, bkcolor='#CFCFCF', c=colors, cmap='cool', lw = 0) divider = make_axes_locatable(ax) # plt.title('Jaccard similarity with New York') cax = divider.append_axes("right", size="5%", pad=0.05) plt.colorbar(sc, cax=cax) plt.show() # plt.savefig('../images/coverage_examples/%s_%s.png'%(hashtag, type)) plt.clf() break
def top_k_locations_on_world_map(): output_file = fld_data_analysis_results%GeneralMethods.get_method_id() + '.png' ltuo_location_and_occurrence_count = [] for location_object in\ FileIO.iterateJsonFromFile(f_dense_hashtag_distribution_in_locations, remove_params_dict=True): ltuo_location_and_occurrence_count.append([ location_object['location'], location_object['occurrences_count'] ]) ltuo_lid_and_r_occurrence_count = sorted(ltuo_location_and_occurrence_count, key=itemgetter(1), reverse=True) # for i, d in enumerate(ltuo_lid_and_r_occurrence_count): # print i, d # exit() lids = zip(*ltuo_lid_and_r_occurrence_count)[0][:200] points = map(UTMConverter.getLatLongUTMIdInLatLongForm, lids) plotPointsOnWorldMap(points, blueMarble=False, bkcolor='#CFCFCF', c='m', lw = 0, alpha=1.) savefig(output_file)
def plot_global_influencers(ltuo_model_id_and_hashtag_tag): tuples_of_boundary_and_boundary_label = [ ([[-90,-180], [90, 180]], 'World', 'm'), ] for model_id, hashtag_tag in ltuo_model_id_and_hashtag_tag: print model_id, hashtag_tag tuples_of_location_and_color = [] for boundary, boundary_label, boundary_color in tuples_of_boundary_and_boundary_label: tuo_location_and_influence_scores = Experiments.load_tuo_location_and_boundary_influence_score(model_id, hashtag_tag, boundary) tuo_location_and_influence_scores = sorted(tuo_location_and_influence_scores, key=itemgetter(1))[:10] locations = zip(*tuo_location_and_influence_scores)[0] for location in locations: tuples_of_location_and_color.append([getLocationFromLid(location.replace('_', ' ')), boundary_color]) locations, colors = zip(*tuples_of_location_and_color) plotPointsOnWorldMap(locations, blueMarble=False, bkcolor='#CFCFCF', c=colors, lw = 0, alpha=1.) for _, boundary_label, boundary_color in tuples_of_boundary_and_boundary_label: plt.scatter([0], [0], label=boundary_label, c=boundary_color, lw = 0) # plt.legend(loc=3, ncol=4, mode="expand",) # plt.show() savefig(fld_results%(GeneralMethods.get_method_id()) +'%s_%s.png'%(model_id, hashtag_tag))
def plotDistributionGraphs(occurences, validTimeUnits, title, startingEpoch=None): occurences = getOccurencesFilteredByDistributionInTimeUnits(occurences, validTimeUnits) occurancesGroupedByLattice = [(getLocationFromLid(lid.replace('_', ' ')), sorted(zip(*occs)[1])) for lid, occs in groupby(sorted([(getLatticeLid(l, ACCURACY), t) for l, t in occurences], key=itemgetter(0)), key=itemgetter(0))] plt.subplot(211) pointsForNumberOfOccurances, numberOfOccurancesList = zip(*sorted(occurancesGroupedByLattice, key=lambda t: len(t[1]))) numberOfOccurancesList = [len(ocs) for ocs in numberOfOccurancesList] cm = matplotlib.cm.get_cmap('cool') sc = plotPointsOnWorldMap(pointsForNumberOfOccurances, c=numberOfOccurancesList, cmap=cm, lw = 0, alpha=1.0) plt.colorbar(sc), plt.title(title), plt.xlabel('Number of mentions') plt.subplot(212) pointsForNumberOfOccurances, occuranceTime = zip(*sorted(occurancesGroupedByLattice, key=lambda t: min(t[1]), reverse=True)) occuranceTime=[min(t) for t in occuranceTime] if not startingEpoch: startingEpoch = occuranceTime[-1] occuranceTime=[(t-startingEpoch)/TIME_UNIT_IN_SECONDS for t in occuranceTime] cm = matplotlib.cm.get_cmap('autumn') sc = plotPointsOnWorldMap(pointsForNumberOfOccurances, c=occuranceTime, cmap=cm, lw = 0, alpha=1.0) plt.colorbar(sc), plt.xlabel('Speed of hashtag arrival') return startingEpoch
def utm_ids_on_map(): ''' Plots utm ids on world map. The color indicates the log(total_hashtag_count) ''' output_file = fld_google_drive_data_analysis%GeneralMethods.get_method_id()+'.png' ltuo_point_and_total_hashtag_count = [] for utm_object in FileIO.iterateJsonFromFile(f_hashtags_by_utm_id, remove_params_dict=True): point = UTMConverter.getLatLongUTMIdInLatLongForm(utm_object['utm_id']) total_hashtag_count = log(utm_object['total_hashtag_count']) ltuo_point_and_total_hashtag_count.append((point, total_hashtag_count)) points, total_hashtag_counts = zip(*sorted(ltuo_point_and_total_hashtag_count, key=itemgetter(1))) plotPointsOnWorldMap(points, blueMarble=False, bkcolor='#CFCFCF', c=total_hashtag_counts, cmap=matplotlib.cm.cool, lw = 0, alpha=1.) savefig(output_file)
def temp(): hashtags, MINUTES = [], 60 for hashtagObject in FileIO.iterateJsonFromFile('americanhorrorstory'): if hashtagObject['h']=='americanhorrorstory': print unicode(hashtagObject['h']).encode('utf-8'), len(hashtagObject['oc']) occsDistributionInTimeUnits = getOccurranceDistributionInEpochs(getOccuranesInHighestActiveRegion(hashtagObject, timeUnit=60*60), timeUnit=MINUTES*60, fillInGaps=True, occurancesCount=False) totalOccurances = [] for interval, t in enumerate(sorted(occsDistributionInTimeUnits)): occs = occsDistributionInTimeUnits[t] if occs: fileName = '../images/plotsOnMap/%s/%s.png'%(hashtagObject['h'], (interval+1)*MINUTES); FileIO.createDirectoryForFile(fileName) # print interval, t, len(occs) print fileName occurancesGroupedByLattice = [(getLocationFromLid(lid.replace('_', ' ')), 'm') for lid, occ in groupby(sorted([(getLatticeLid(l, LATTICE_ACCURACY), t) for l, t in occs], key=itemgetter(0)), key=itemgetter(0))] occurancesGroupedByLattice = sorted(occurancesGroupedByLattice, key=itemgetter(1)) points, colors = zip(*occurancesGroupedByLattice) plotPointsOnWorldMap(points, blueMarble=False, bkcolor='#CFCFCF', c=colors, lw = 0) # plt.show() plt.savefig(fileName) plt.clf() exit()
def plot_local_influencers(ltuo_model_id_and_hashtag_tag): tuples_of_boundary_and_boundary_label = [ ([[24.527135,-127.792969], [49.61071,-59.765625]], 'USA', GeneralMethods.getRandomColor()), ([[10.107706,-118.660469], [26.40009,-93.699531]], 'Mexico', GeneralMethods.getRandomColor()), ([[-16.6695,88.409841], [30.115057,119.698904]], 'SE-Asia', GeneralMethods.getRandomColor()), ([[-29.565473,-58.191719], [7.327985,-30.418282]], 'Brazil', GeneralMethods.getRandomColor()), ] for model_id, hashtag_tag in ltuo_model_id_and_hashtag_tag: print model_id, hashtag_tag tuples_of_location_and_color = [] for boundary, boundary_label, boundary_color in tuples_of_boundary_and_boundary_label: tuo_location_and_influence_scores = Experiments.load_tuo_location_and_boundary_influence_score(model_id, hashtag_tag, boundary) tuo_location_and_influence_scores = sorted(tuo_location_and_influence_scores, key=itemgetter(1))[:10] locations = zip(*tuo_location_and_influence_scores)[0] for location in locations: tuples_of_location_and_color.append([getLocationFromLid(location.replace('_', ' ')), boundary_color]) locations, colors = zip(*tuples_of_location_and_color) plotPointsOnWorldMap(locations, blueMarble=False, bkcolor='#CFCFCF', c=colors, lw = 0, alpha=1.) for _, boundary_label, boundary_color in tuples_of_boundary_and_boundary_label: plt.scatter([0], [0], label=boundary_label, c=boundary_color, lw = 0) plt.legend(loc=3, ncol=4, mode="expand",) # plt.show() savefig(fld_results%(GeneralMethods.get_method_id()) +'%s_%s.png'%(model_id, hashtag_tag))
def plot_locations_influence_on_world_map(ltuo_model_id_and_hashtag_tag, noOfInfluencers=10, percentage_of_locations=0.15): input_locations = [ ('40.6000_-73.9500', 'new_york'), ('33.3500_-118.1750', 'los_angeles'), ('29.7250_-97.1500', 'austin'), ('30.4500_-95.7000', 'college_station'), ('-22.4750_-42.7750', 'rio'), ('51.4750_0.0000', 'london'), ('-23.2000_-46.4000', 'sao_paulo') ] for model_id, hashtag_tag in ltuo_model_id_and_hashtag_tag: tuo_location_and_tuo_neighbor_location_and_locations_influence_score = \ Experiments.load_tuo_location_and_tuo_neighbor_location_and_locations_influence_score(model_id, hashtag_tag, noOfInfluencers=None, influence_type=InfluenceMeasuringModels.TYPE_INCOMING_INFLUENCE) for input_location, label in input_locations: for location, tuo_neighbor_location_and_locations_influence_score in \ tuo_location_and_tuo_neighbor_location_and_locations_influence_score: if input_location==location: input_location = getLocationFromLid(input_location.replace('_', ' ')) output_file = fld_results%GeneralMethods.get_method_id() + '/%s_%s/%s.png'%(model_id, hashtag_tag, label) number_of_outgoing_influences = int(len(tuo_neighbor_location_and_locations_influence_score)*percentage_of_locations) if number_of_outgoing_influences==0: number_of_outgoing_influences=len(tuo_neighbor_location_and_locations_influence_score) locations = zip(*tuo_neighbor_location_and_locations_influence_score)[0][:number_of_outgoing_influences] locations = [getLocationFromLid(location.replace('_', ' ')) for location in locations] # locations = filter(lambda location: isWithinBoundingBox(location, PARTIAL_WORLD_BOUNDARY), locations) if locations: _, m = plotPointsOnWorldMap(locations, resolution='c', blueMarble=False, bkcolor='#000000', c='#FF00FF', returnBaseMapObject=True, lw = 0) # _, m = plotPointsOnWorldMap(locations, resolution='c', blueMarble=False, bkcolor='#CFCFCF', c='#FF00FF', returnBaseMapObject=True, lw = 0) for location in locations: # if isWithinBoundingBox(location, PARTIAL_WORLD_BOUNDARY): m.drawgreatcircle(location[1], location[0], input_location[1], input_location[0], color='#FAA31B', lw=1., alpha=0.5) # plotPointsOnWorldMap([input_location], blueMarble=False, bkcolor='#CFCFCF', c='#003CFF', s=40, lw = 0) plotPointsOnWorldMap([input_location], resolution='c', blueMarble=False, bkcolor='#000000', c='#003CFF', s=40, lw = 0) # plotPointsOnWorldMap([input_location], resolution='c', blueMarble=False, bkcolor='#CFCFCF', c='#003CFF', s=40, lw = 0) FileIO.createDirectoryForFile(output_file) print output_file savefig(output_file) plt.clf() else: GeneralMethods.runCommand('rm -rf %s'%output_file) break
def plotGraphsForHashtag(hashtag): for hashtagObject in FileIO.iterateJsonFromFile('/mnt/chevron/kykamath/data/geo/hashtags/analysis/all_world/2_11/hashtagsWithoutEndingWindow'): MINUTES = 5 if hashtagObject['h']==hashtag: print unicode(hashtagObject['h']).encode('utf-8'), len(hashtagObject['oc']) occsDistributionInTimeUnits = getOccurranceDistributionInEpochs(getOccuranesInHighestActiveRegion(hashtagObject), timeUnit=MINUTES*60, fillInGaps=True, occurancesCount=False) totalOccurances = [] for interval, t in enumerate(sorted(occsDistributionInTimeUnits)): occs = occsDistributionInTimeUnits[t] totalOccurances+=occs if occs: fileName = '../images/plotsOnMap/%s/%s.png'%(hashtagObject['h'], (interval+1)*MINUTES); FileIO.createDirectoryForFile(fileName) print fileName occurancesGroupedByLattice = [(getLocationFromLid(lid.replace('_', ' ')), 'm') for lid, occs in groupby(sorted([(getLatticeLid(l, LATTICE_ACCURACY), t) for l, t in totalOccurances], key=itemgetter(0)), key=itemgetter(0))] occurancesGroupedByLattice = sorted(occurancesGroupedByLattice, key=itemgetter(1)) points, colors = zip(*occurancesGroupedByLattice) plotPointsOnWorldMap(points, blueMarble=False, bkcolor='#CFCFCF', c=colors, lw = 0) plt.show() # plt.savefig(fileName) plt.clf() if (interval+1)*MINUTES>=120: break break
def plot_maps_for_every_hour(): MINUTES = 15 hashtags = ['ripstevejobs', 'cnbcdebate'] map_from_hashtag_to_subplot = dict([('ripstevejobs', 211), ('cnbcdebate', 212)]) map_from_epoch_lag_to_map_from_hashtag_to_tuples_of_location_and_epoch_lag = defaultdict(dict) for hashtag in hashtags: for hashtag_object in FileIO.iterateJsonFromFile('./data/%s.json'%hashtag): map_from_epoch_time_unit_to_tuples_of_location_and_epoch_occurrence_time = getOccurranceDistributionInEpochs(getOccuranesInHighestActiveRegion(hashtag_object), timeUnit=MINUTES*60, fillInGaps=True, occurancesCount=False) tuples_of_epoch_time_unit_and_tuples_of_location_and_epoch_occurrence_time = sorted(map_from_epoch_time_unit_to_tuples_of_location_and_epoch_occurrence_time.iteritems(), key=itemgetter(0)) epoch_starting_time_unit = tuples_of_epoch_time_unit_and_tuples_of_location_and_epoch_occurrence_time[0][0] epoch_ending_time_unit = epoch_starting_time_unit+24*60*60 for epoch_time_unit, tuples_of_location_and_epoch_occurrence_time in tuples_of_epoch_time_unit_and_tuples_of_location_and_epoch_occurrence_time: if epoch_time_unit<=epoch_ending_time_unit: if tuples_of_location_and_epoch_occurrence_time: epoch_lag = epoch_time_unit - epoch_starting_time_unit tuples_of_location_and_epoch_occurrence_time = sorted(tuples_of_location_and_epoch_occurrence_time, key=itemgetter(1)) map_from_epoch_lag_to_map_from_hashtag_to_tuples_of_location_and_epoch_lag[epoch_lag][hashtag] = [(getLatticeLid(location, 0.145), epoch_occurrence_time-epoch_starting_time_unit)for location, epoch_occurrence_time in tuples_of_location_and_epoch_occurrence_time] map_from_hashtag_to_accumulated_tuples_of_location_and_epoch_lag = defaultdict(list) GeneralMethods.runCommand('rm -rf ./images/plot_maps_for_every_hour/') for epoch_lag in sorted(map_from_epoch_lag_to_map_from_hashtag_to_tuples_of_location_and_epoch_lag): file_world_map_plot = './images/plot_maps_for_every_hour/%s.png'%(epoch_lag) print file_world_map_plot map_from_hashtag_to_tuples_of_location_and_epoch_lag = map_from_epoch_lag_to_map_from_hashtag_to_tuples_of_location_and_epoch_lag[epoch_lag] for hashtag, tuples_of_location_and_epoch_lag in map_from_hashtag_to_tuples_of_location_and_epoch_lag.iteritems(): map_from_hashtag_to_accumulated_tuples_of_location_and_epoch_lag[hashtag]+=tuples_of_location_and_epoch_lag for hashtag, accumulated_tuples_of_location_and_epoch_lag in map_from_hashtag_to_accumulated_tuples_of_location_and_epoch_lag.iteritems(): plt.subplot(map_from_hashtag_to_subplot[hashtag]) tuples_of_location_and_epoch_max_lag= [(location, max(zip(*iterator_of_tuples_of_location_and_epoch_lag)[1])) for location, iterator_of_tuples_of_location_and_epoch_lag in groupby(sorted(accumulated_tuples_of_location_and_epoch_lag, key=itemgetter(0)), key=itemgetter(0)) ] locations, colors = zip(*[(getLocationFromLid(location.replace('_', ' ')), (epoch_lag+MINUTES*60)-epoch_max_lag) for location, epoch_max_lag in sorted(tuples_of_location_and_epoch_max_lag, key=itemgetter(1), reverse=True)]) plotPointsOnWorldMap(locations, blueMarble=False, bkcolor='#CFCFCF', c=colors, cmap=matplotlib.cm.cool, lw = 0, vmax=epoch_lag+MINUTES*60) plt.title('%s (%s hours)'%(hashtag, (epoch_lag+MINUTES*60)/(60.*60))) # plt.show() FileIO.createDirectoryForFile(file_world_map_plot) plt.savefig(file_world_map_plot) plt.clf()
def plotHashtagSourcesOnMap(timeRange, outputFolder): i = 1 distribution = defaultdict(int) for hashtagObject in FileIO.iterateJsonFromFile(hashtagsFile%(outputFolder,'%s_%s'%timeRange)): occuranesInHighestActiveRegion, isFirstActiveRegion = getOccuranesInHighestActiveRegion(hashtagObject, True) if occuranesInHighestActiveRegion: source, count = getSourceLattice(occuranesInHighestActiveRegion) print i, source;i+=1 distribution[getLidFromLocation(source)]+=1 # if i==10: break points, colors = zip(*[(getLocationFromLid(k),v) for k, v in sorted(distribution.iteritems(), key=itemgetter(1))]) cm = matplotlib.cm.get_cmap('Paired') sc = plotPointsOnWorldMap(points, c=colors, cmap=cm, lw = 0) plt.colorbar(sc) plt.show()
def plot_geo_distribution_in_social_networks(): total_checkins = 0.0 for social_network in [FOURSQUARE_ID, BRIGHTKITE_ID, GOWALLA_ID]: print social_network ax = plt.subplot(111) tuples_of_location_and_location_occurences_count = [(getLocationFromLid(data['key'].replace('_', ' ')), data['distribution'][social_network]) for i, data in enumerate(iterateJsonFromFile(lidsToDistributionInSocialNetworksMapFile%BOUNDARY_ID))\ if social_network in data['distribution'] and data['distribution'][social_network]>25] tuples_of_location_and_location_occurences_count = sorted(tuples_of_location_and_location_occurences_count, key=itemgetter(1)) locations, colors = zip(*tuples_of_location_and_location_occurences_count) sc = plotPointsOnWorldMap(locations, blueMarble=False, bkcolor='#CFCFCF', c=colors, cmap='cool', lw = 0) divider = make_axes_locatable(ax) # plt.title('Jaccard similarity with New York') cax = divider.append_axes("right", size="5%", pad=0.05) plt.colorbar(sc, cax=cax) # for k, v in tuples_of_location_and_location_occurences_count: # print social_network, k, v # print len(tuples_of_location_and_location_occurences_count) plt.show()
def influence_clusters(model_ids, min_cluster_size=15): influence_type = InfluenceMeasuringModels.TYPE_INCOMING_INFLUENCE for model_id in model_ids: digraph_of_location_and_location_similarity = nx.DiGraph() for line_count, (location, tuo_neighbor_location_and_mf_influence_type_and_similarity) in \ enumerate(FileIO.iterateJsonFromFile(tuo_location_and_tuo_neighbor_location_and_mf_influence_type_and_similarity_file%model_id)): # print line_count for neighbor_location, mf_influence_type_to_similarity in tuo_neighbor_location_and_mf_influence_type_and_similarity: if isWithinBoundingBox(getLocationFromLid(location.replace('_', ' ')), PARTIAL_WORLD_BOUNDARY) and \ isWithinBoundingBox(getLocationFromLid(neighbor_location.replace('_', ' ')), PARTIAL_WORLD_BOUNDARY): digraph_of_location_and_location_similarity.add_edge(location, neighbor_location, {'w': mf_influence_type_to_similarity[influence_type]}) no_of_clusters, tuo_location_and_cluster_id = clusterUsingAffinityPropagation(digraph_of_location_and_location_similarity) tuo_cluster_id_to_locations = [ (cluster_id, zip(*ito_tuo_location_and_cluster_id)[0]) for cluster_id, ito_tuo_location_and_cluster_id in groupby( sorted(tuo_location_and_cluster_id, key=itemgetter(1)), key=itemgetter(1) ) ] mf_location_to_cluster_id = dict(tuo_location_and_cluster_id) mf_cluster_id_to_cluster_color = dict([(i, GeneralMethods.getRandomColor()) for i in range(no_of_clusters)]) mf_valid_locations_to_color = {} for cluster_id, locations in \ sorted(tuo_cluster_id_to_locations, key=lambda (cluster_id, locations): len(locations))[-10:]: # if len(locations)>min_cluster_size: print cluster_id, len(locations) for location in locations: mf_valid_locations_to_color[location] \ = mf_cluster_id_to_cluster_color[mf_location_to_cluster_id[location]] locations, colors = zip(*mf_valid_locations_to_color.iteritems()) locations = [getLocationFromLid(location.replace('_', ' ')) for location in locations] _, m = plotPointsOnWorldMap(locations, blueMarble=False, bkcolor='#CFCFCF', c=colors, s=0, returnBaseMapObject=True, lw = 0) for u, v, data in digraph_of_location_and_location_similarity.edges(data=True): if u in mf_valid_locations_to_color and v in mf_valid_locations_to_color \ and mf_location_to_cluster_id[u]==mf_location_to_cluster_id[v]: color, u, v, w = mf_cluster_id_to_cluster_color[mf_location_to_cluster_id[u]], getLocationFromLid(u.replace('_', ' ')), getLocationFromLid(v.replace('_', ' ')), data['w'] m.drawgreatcircle(u[1], u[0], v[1], v[0], color=color, alpha=0.6) plt.show()
def load_checkins_graph(checkins_graph_file): graph = nx.Graph() for data in iterateJsonFromFile(checkins_graph_file): (u, v) = data['e'].split('__') graph.add_edge(u , v, {'w': data['w']}) noOfClusters, clusters = clusterUsingAffinityPropagation(graph) # for cluster in clusters: # print len(cluster), cluster nodeToClusterIdMap = dict(clusters) colorMap = dict([(i, GeneralMethods.getRandomColor()) for i in range(noOfClusters)]) clusters = [(c, list(l)) for c, l in groupby(sorted(clusters, key=itemgetter(1)), key=itemgetter(1))] points, colors = zip(*map(lambda l: (getLocationFromLid(l.replace('_', ' ')), colorMap[nodeToClusterIdMap[l]]), graph.nodes())) _, m =plotPointsOnWorldMap(points[:1], s=0, lw=0, c=colors[:1], returnBaseMapObject=True) for u, v, data in graph.edges(data=True): if nodeToClusterIdMap[u]==nodeToClusterIdMap[v]: color, u, v, w = colorMap[nodeToClusterIdMap[u]], getLocationFromLid(u.replace('_', ' ')), getLocationFromLid(v.replace('_', ' ')), data['w'] m.drawgreatcircle(u[1],u[0],v[1],v[0],color=color, alpha=1.5) # plt.title(title) plt.show() print noOfClusters print graph.number_of_edges() print graph.number_of_nodes()
def plotLatticesOnMap(timeRange, outputFolder): points = [getLocationFromLid(latticeObject['id'].replace('_', ' ')) for latticeObject in FileIO.iterateJsonFromFile(hashtagsLatticeGraphFile%(outputFolder,'%s_%s'%timeRange))] plotPointsOnWorldMap(points, c='m', lw=0) plt.show()
def plotPoints(links, xlabel): cm = matplotlib.cm.get_cmap('cool') points, colors = zip(*sorted([(getLocationFromLid(k.replace('_', ' ')), v)for k, v in links.iteritems()], key=itemgetter(1))) sc = plotPointsOnWorldMap(points, c=colors, cmap=cm, lw=0, vmin=0, vmax=1) plotPointsOnWorldMap([getLocationFromLid(locationObject['id'].replace('_', ' '))], c='k', s=20, lw=0) plt.xlabel(xlabel), plt.colorbar(sc)
def plotHastagClasses(timeRange, folderType): def getFileName(): for i in combinations('abcedfghijklmnopqrstuvwxyz',2): yield ''.join(i)+'.png' count=1 # for hashtagObject in FileIO.iterateJsonFromFile(hashtagsWithoutEndingWindowFile%(folderType,'%s_%s'%timeRange)): for hashtagObject in FileIO.iterateJsonFromFile(hashtagsFile%('testing_world','%s_%s'%(2,11))): # HashtagsClassifier.classify(hashtagObject) print count; count+=1 # if hashtagObject['h']=='ripamy': classId = HashtagsClassifier.classify(hashtagObject) if classId!=None: classId = 1 outputFile = hashtagsImagesHashtagsClassFolder%folderType+'%s/%s.png'%(classId, hashtagObject['h']); FileIO.createDirectoryForFile(outputFile) fileNameIterator = getFileName() timeUnits, timeSeries = getTimeUnitsAndTimeSeries(hashtagObject['oc'], timeUnit=HashtagsClassifier.CLASSIFIER_TIME_UNIT_IN_SECONDS) occurancesInActivityRegions = [[getOccuranesInHighestActiveRegion(hashtagObject), 'm']] # for hashtagPropagatingRegion in HashtagsClassifier._getActivityRegionsWithActivityAboveThreshold(hashtagObject): # validTimeUnits = [timeUnits[i] for i in range(hashtagPropagatingRegion[0], hashtagPropagatingRegion[1]+1)] # occurancesInActiveRegion = [(p,t) for p,t in hashtagObject['oc'] if GeneralMethods.approximateEpoch(t, TIME_UNIT_IN_SECONDS) in validTimeUnits] # occurancesInActivityRegions.append([occurancesInActiveRegion, GeneralMethods.getRandomColor()]) currentMainRangeId = 0 for occurances1, color1 in occurancesInActivityRegions: # outputFile=outputFolder+fileNameIterator.next();FileIO.createDirectoryForFile(outputFile) print outputFile ax = plt.subplot(312) subRangeId = 0 for occurances, color in occurancesInActivityRegions: if subRangeId==currentMainRangeId: color='m' timeUnits, timeSeries = getTimeUnitsAndTimeSeries(occurances, timeUnit=HashtagsClassifier.CLASSIFIER_TIME_UNIT_IN_SECONDS) # if len(timeUnits)<24: # difference = 24-len(timeUnits) # timeUnits=list(timeUnits)+[timeUnits[-1]+(i+1)*HashtagsClassifier.CLASSIFIER_TIME_UNIT_IN_SECONDS for i in range(difference)] # timeSeries=list(timeSeries)+[0 for i in range(difference)] # print len(timeUnits[:24]), len(timeSeries[:24]) plt.plot_date([datetime.datetime.fromtimestamp(t) for t in timeUnits], timeSeries, '-o', c=color) subRangeId+=1 # plt.ylim(ymax=1) plt.setp(ax.get_xticklabels(), rotation=10, fontsize=7) ax=plt.subplot(313) subRangeId = 0 timeUnits, timeSeries = getTimeUnitsAndTimeSeries(hashtagObject['oc'], timeUnit=HashtagsClassifier.CLASSIFIER_TIME_UNIT_IN_SECONDS) plt.plot_date([datetime.datetime.fromtimestamp(t) for t in timeUnits], timeSeries, '-') for occurances, color in occurancesInActivityRegions: if subRangeId==currentMainRangeId: color='m' timeUnits, timeSeries = getTimeUnitsAndTimeSeries(occurances, timeUnit=HashtagsClassifier.CLASSIFIER_TIME_UNIT_IN_SECONDS) plt.plot_date([datetime.datetime.fromtimestamp(t) for t in timeUnits], timeSeries, '-o', c=color) subRangeId+=1 plt.setp(ax.get_xticklabels(), rotation=10, fontsize=7) plt.subplot(311) occurancesGroupedByLattice = sorted( [(getLocationFromLid(lid.replace('_', ' ')), len(list(occs))) for lid, occs in groupby(sorted([(getLatticeLid(l, LATTICE_ACCURACY), t) for l, t in occurances1], key=itemgetter(0)), key=itemgetter(0))], key=itemgetter(1) ) points, colors = zip(*occurancesGroupedByLattice) cm = matplotlib.cm.get_cmap('cool') if len(points)>1: sc = plotPointsOnWorldMap(points, c=colors, cmap=cm, lw=0, alpha=1.0) plt.colorbar(sc) else: sc = plotPointsOnWorldMap(points, c='m', lw=0) plt.title(hashtagObject['h']+ '(%d)'%len(occurancesGroupedByLattice)) # plt.show() try: plt.savefig(outputFile); plt.clf() except: pass currentMainRangeId+=1