def mapper(self, key, hashtag_object):
     hashtag = hashtag_object['hashtag']
     ltuo_occ_time_and_occ_location = hashtag_object['ltuo_occ_time_and_occ_location']
     if ltuo_occ_time_and_occ_location:
         ltuo_intvl_time_and_occ_location = [(
                                            GeneralMethods.approximateEpoch(occ_time, TIME_UNIT_IN_SECONDS),
                                            occ_location
                                             ) 
                                           for occ_time, occ_location in ltuo_occ_time_and_occ_location]
         points = [UTMConverter.getLatLongUTMIdInLatLongForm(loc) for _, loc in ltuo_occ_time_and_occ_location]
         ltuo_intvl_time_and_items =\
                                 GeneralMethods.group_items_by(ltuo_intvl_time_and_occ_location, key=itemgetter(0))
         ltuo_intvl_time_and_items.sort(key=itemgetter(0))
         first_time = ltuo_intvl_time_and_items[0][0]
         ltuo_iid_and_occ_count = map(lambda (t, it): ((t-first_time)/TIME_UNIT_IN_SECONDS, len(it)), ltuo_intvl_time_and_items)
         ltuo_location_and_items =\
                                 GeneralMethods.group_items_by(ltuo_intvl_time_and_occ_location, key=itemgetter(1))
         mf_location_to_occ_count = dict(map(lambda (l, it): (l, len(it)), ltuo_location_and_items))
         spatial_metrics = {
                              'hashtag': hashtag,
                              'num_of_occurrenes': len(ltuo_occ_time_and_occ_location),
                              'peak_iid': max(ltuo_iid_and_occ_count, key=itemgetter(1))[0],
                              'focus': focus(mf_location_to_occ_count),
                              'entropy': entropy(mf_location_to_occ_count, as_bits=False),
                              'spread': getRadiusOfGyration(points)
                          }
         yield hashtag, spatial_metrics
 def reducer(self, location, it_performance_values):
     performance_values = list(chain(*it_performance_values))
     performance_summary = defaultdict(list)
     for prediction_method, pvs_for_prediction_method in \
                             GeneralMethods.group_items_by(performance_values, key=itemgetter('prediction_method')):
         for metric, pvs_for_prediction_method_and_metric in \
                         GeneralMethods.group_items_by(pvs_for_prediction_method, key=itemgetter('metric')):
             performance_summary[metric].append([
                                                 prediction_method,
                                                 pvs_for_prediction_method_and_metric[0]['metric_value']
                                         ])
     yield '', dict(location=location, performance_summary=performance_summary)
 def mapper(self, key, hashtag_object):
     ltuo_occ_time_and_occ_location = hashtag_object['ltuo_occ_time_and_occ_location']
     if ltuo_occ_time_and_occ_location:
         ltuo_intvl_time_and_occ_location = [(
                                            GeneralMethods.approximateEpoch(occ_time, TIME_UNIT_IN_SECONDS),
                                            occ_location
                                             ) 
                                           for occ_time, occ_location in ltuo_occ_time_and_occ_location]
         ltuo_intvl_time_and_items =\
                                 GeneralMethods.group_items_by(ltuo_intvl_time_and_occ_location, key=itemgetter(0))
         ltuo_intvl_time_and_items.sort(key=itemgetter(0))
         first_time = ltuo_intvl_time_and_items[0][0]
         intvl_method = lambda (t, it): ((t-first_time)/TIME_UNIT_IN_SECONDS, (t, len(it)))
         ltuo_iid_and_tuo_interval_and_occurrence_count = map(intvl_method, ltuo_intvl_time_and_items)
         peak_tuo_iid_and_tuo_interval_and_occurrence_count = \
                                                         max(
                                                             ltuo_iid_and_tuo_interval_and_occurrence_count,
                                                             key=lambda (_, (__, occurrence_count)): occurrence_count
                                                         )
         peak_iid = peak_tuo_iid_and_tuo_interval_and_occurrence_count[0]
         current_val = 0.0
         total_occurrences = sum(data[1][1] for data in ltuo_iid_and_tuo_interval_and_occurrence_count)
         for iid, (_, occurrence_count) in ltuo_iid_and_tuo_interval_and_occurrence_count:
             is_peak = 0.0
             if iid==peak_iid: is_peak=1.0
             current_val+=occurrence_count
             yield iid, [is_peak, occurrence_count/total_occurrences, current_val/total_occurrences]
 def mapper(self, key, hashtag_object):
     if False: yield
     hashtag = hashtag_object['hashtag']
     ltuo_occ_time_and_occ_location = hashtag_object['ltuo_occ_time_and_occ_location']
     ltuo_location_and_items = GeneralMethods.group_items_by(ltuo_occ_time_and_occ_location, key=itemgetter(1))
     for location, items in ltuo_location_and_items:
         self.mf_location_to_unique_hashtags[location].add(hashtag)
         self.mf_location_to_occurrences_count[location]+=len(items)
 def reducer(self, location, it_ltuo_occ_time_and_count):
     ltuo_occ_time_and_count = [(t, sum(zip(*l)[1]))
                                 for t, l in GeneralMethods.group_items_by(
                                                                           list(chain(*it_ltuo_occ_time_and_count)),
                                                                           key=itemgetter(0)
                                                                           )
                                ]
     yield location, {'location': location, 'ltuo_occ_time_and_count': ltuo_occ_time_and_count}
示例#6
0
 def get_components_by_clustering(self, graph):
     _, ltuo_node_and_cluster_id = clusterUsingAffinityPropagation(graph)
     ltuo_cluster_id_and_ltuo_node_id_and_cluster_id = GeneralMethods.group_items_by(
         ltuo_node_and_cluster_id, itemgetter(1)
     )
     ltuo_cluster_id_and_nodes = map(
         lambda (c_i, l_n_c): (c_i, zip(*l_n_c)[0]), ltuo_cluster_id_and_ltuo_node_id_and_cluster_id
     )
     return zip(*ltuo_cluster_id_and_nodes)[1]
 def mapper(self, key, hashtag_object):
     if 'hashtag' in hashtag_object:
         hashtag_object = cjson.decode(hashtag_object)
         ltuo_occ_time_and_occ_location = hashtag_object.get('ltuo_occ_time_and_occ_location', [])
         ltuo_location_and_items = GeneralMethods.group_items_by(ltuo_occ_time_and_occ_location, key=itemgetter(1))
         ltuo_location_and_items = filter(
                                          lambda (location, items): len(items)>=MIN_HASHTAG_OCCURRENCES_PER_LOCATION,
                                          ltuo_location_and_items
                                          )
         hashtag_object['ltuo_occ_time_and_occ_location'] =\
                                                 list(chain(*map(lambda (_, items): items, ltuo_location_and_items)))
         yield hashtag_object['hashtag'], hashtag_object
 def mapper(self, hashtag, hashtag_object):
     def distance_from_overall_locality_stat(overall_stat, current_stat): return overall_stat-current_stat
     ltuo_occ_time_and_occ_location = hashtag_object['ltuo_occ_time_and_occ_location']
     if ltuo_occ_time_and_occ_location:
         ltuo_intvl_time_and_occ_location = [(
                                            GeneralMethods.approximateEpoch(occ_time, TIME_UNIT_IN_SECONDS),
                                            occ_location
                                             ) 
                                           for occ_time, occ_location in ltuo_occ_time_and_occ_location]
         ltuo_intvl_time_and_items =\
                                 GeneralMethods.group_items_by(ltuo_intvl_time_and_occ_location, key=itemgetter(0))
         ltuo_intvl_time_and_items.sort(key=itemgetter(0))
         first_time = ltuo_intvl_time_and_items[0][0]
         intvl_method = lambda (t, it): ((t-first_time)/TIME_UNIT_IN_SECONDS, (t, map(itemgetter(1), it)))
         ltuo_iid_and_tuo_interval_and_lids = map(intvl_method, ltuo_intvl_time_and_items)
         peak_tuo_iid_and_tuo_interval_and_lids = \
             max(ltuo_iid_and_tuo_interval_and_lids, key=lambda (_, (__, lids)): len(lids))
         peak_iid = peak_tuo_iid_and_tuo_interval_and_lids[0]
         ltuo_location_and_items =\
                                 GeneralMethods.group_items_by(ltuo_intvl_time_and_occ_location, key=itemgetter(1))
         overall_mf_lid_to_occurrence_count = dict(map(lambda (l, it): (l, len(it)), ltuo_location_and_items))
         overall_points =\
                     [UTMConverter.getLatLongUTMIdInLatLongForm(loc) for _, loc in ltuo_occ_time_and_occ_location]
         overall_entropy = entropy(overall_mf_lid_to_occurrence_count, False)
         overall_focus = focus(overall_mf_lid_to_occurrence_count)[1]
         overall_coverage = getRadiusOfGyration(overall_points)
         total_occurrences = sum(len(lids) for (iid, (interval, lids)) in ltuo_iid_and_tuo_interval_and_lids)
         for iid, (_, lids) in ltuo_iid_and_tuo_interval_and_lids:
             mf_lid_to_occurrence_count = defaultdict(float)
             for lid in lids: mf_lid_to_occurrence_count[lid]+=1
             points = [UTMConverter.getLatLongUTMIdInLatLongForm(lid) for lid in lids]
             current_entropy = entropy(mf_lid_to_occurrence_count, False)
             current_focus = focus(mf_lid_to_occurrence_count)[1]
             current_coverage = getRadiusOfGyration(points)
             
             yield iid-peak_iid, [len(lids)/total_occurrences, current_entropy, current_focus, current_coverage, 
                                     distance_from_overall_locality_stat(overall_entropy, current_entropy),
                                     distance_from_overall_locality_stat(overall_focus, current_focus),
                                     distance_from_overall_locality_stat(overall_coverage, current_coverage),]
示例#9
0
 def entropy_examples():
     output_file_format = fld_data_analysis_results%GeneralMethods.get_method_id()+'/%s.png'
     data = [d for d in FileIO.iterateJsonFromFile(f_hashtag_spatial_metrics, remove_params_dict=True)]
     ltuo_hashtag_and_num_of_occurrences_and_entropy =\
                                                 map(
                                                     itemgetter('hashtag', 'num_of_occurrenes', 'entropy'),
                                                     data
                                                     )
     ltuo_hashtag_and_num_of_occurrences_and_entropy =\
                                                 map(
                                                     lambda (h, n, e): (h, n, round(e,0)),
                                                     ltuo_hashtag_and_num_of_occurrences_and_entropy
                                                     )
     for entropy, entropy_data in \
             GeneralMethods.group_items_by(ltuo_hashtag_and_num_of_occurrences_and_entropy, itemgetter(2)):
         entropy_data.sort(key=itemgetter(1))
         hashtags = map(itemgetter(0), entropy_data)
         print entropy, len(entropy_data), hashtags[:25]
 def mapper1(self, key, hashtag_object):
     if False: yield
     hashtag = hashtag_object['hashtag']
     ltuo_occ_time_and_occ_location = hashtag_object['ltuo_occ_time_and_occ_location']
     ltuo_location_and_items = GeneralMethods.group_items_by(ltuo_occ_time_and_occ_location, key=itemgetter(1))
     ltuo_location_and_occurrence_time =\
                         [(location, min(items, key=itemgetter(0))[0])for location, items in ltuo_location_and_items]
     ltuo_location_and_occurrence_time = [(
                                           location, 
                                           GeneralMethods.approximateEpoch(occurrence_time, TIME_UNIT_IN_SECONDS)
                                           ) 
                                          for location, occurrence_time in ltuo_location_and_occurrence_time]
     if ltuo_location_and_occurrence_time:
         occurrence_times = filter_outliers(zip(*ltuo_location_and_occurrence_time)[1])
         ltuo_location_and_occurrence_time =\
                                         filter(lambda (l, o): o in occurrence_times, ltuo_location_and_occurrence_time)
         for location, occurrence_time in ltuo_location_and_occurrence_time:
             self.mf_location_to_ltuo_hashtag_and_min_occ_time[location].append([hashtag, occurrence_time])
             for neighbor_location, _ in ltuo_location_and_occurrence_time:
                 if location!=neighbor_location:
                     self.mf_location_to_neighbor_locations[location].add(neighbor_location)
示例#11
0
    def example_for_caverlee():
#        valid_locations = ['18T_585E_4512N', '18T_587E_4514N']
        mf_lid_to_location = dict([
                                   ('18T_585E_4512N', 'Times Square'),
                                   ('18T_587E_4514N', 'Central Park'),
                                   ('18T_584E_4511N', 'Penn Station'),
                                   ('18T_585E_4511N', 'Empire State Building'),
                                   ])
        output_file_format = fld_data_analysis_results%GeneralMethods.get_method_id()+'/%s.png'
        subplot_num = 221
#        plt.figure(num=None, figsize=(6,3))
        for data in FileIO.iterateJsonFromFile(f_example_for_caverlee, remove_params_dict=True):
            location = data['location']
            if location in mf_lid_to_location:
                td = timedelta(hours=-5)
                ltuo_occ_time_and_count = data['ltuo_occ_time_and_count']
                ltuo_occ_time_and_count.sort(key=itemgetter(0))
                occ_times, counts = zip(*ltuo_occ_time_and_count)
                occ_times = map(datetime.fromtimestamp, occ_times)
                occ_times = map(lambda d: d+td, occ_times)
                occ_hours = map(lambda d: d.hour, occ_times)
                ltuo_occ_hour_and_count = zip(occ_hours, counts)
                ltuo_occ_hour_and_count = [(h, sum(zip(*h_c)[1])) for h, h_c in
                                            GeneralMethods.group_items_by(ltuo_occ_hour_and_count, key=itemgetter(0))]
                occ_hours, counts = zip(*ltuo_occ_hour_and_count)
                total_counts = sum(counts)+0.0
                counts = map(lambda c: c/total_counts, counts)
                plt.subplot(subplot_num)
#                plt.subplots_adjust(bottom=0.2, top=0.9)
                subplot_num+=1
                plt.plot(occ_hours, counts, color='#EA00FF', lw=1)
                plt.fill_between(occ_hours, counts, color='#EA00FF', alpha=0.25)
#                plt.ylabel('% of tweets')
                plt.xlabel('Time of day')
                plt.xlim(xmax=23)
                plt.ylim(ymax=0.09)
                plot_anchored_text(mf_lid_to_location[location], loc=2)
                plt.grid(True)
#                savefig(output_file_format%mf_lid_to_location[location].replace(' ', '_'))
        savefig(output_file_format%'ny_locations')
 def mapper_final(self):
     for location, occ_times in self.mf_location_to_occ_times.iteritems():
         ltuo_occ_time_and_count = [(t, len(l))
                                             for t, l in GeneralMethods.group_items_by(occ_times, lambda item: item)]
         yield location, ltuo_occ_time_and_count