Пример #1
0
    def map_hashtag_object_to_tuo_norm_iid_and_interval_stats(self, hashtag, hashtag_object):
        def distance_from_overall_locality_stat(overall_stat, current_stat): return overall_stat-current_stat
        ltuo_iid_and_tuo_interval_and_lids = \
            get_ltuo_iid_and_tuo_interval_and_lids(hashtag_object)
        peak_tuo_iid_and_tuo_interval_and_lids = \
            max(ltuo_iid_and_tuo_interval_and_lids, key=lambda (_, (__, lids)): len(lids))
        peak_iid = peak_tuo_iid_and_tuo_interval_and_lids[0]
#        total_occurrences = sum(len(data[1][1]) for data in peak_tuo_iid_and_tuo_interval_and_lids)
        # Overall locality stats
        overall_mf_lid_to_occurrence_count = get_mf_lid_to_occurrence_count(hashtag_object)
        overall_points = [ getLocationFromLid(lid.replace('_', ' ')) for lid,_ in hashtag_object['ltuo_lid_and_s_interval']]
        overall_entropy = entropy(overall_mf_lid_to_occurrence_count, False)
        overall_focus = focus(overall_mf_lid_to_occurrence_count)[1]
        overall_coverage = getRadiusOfGyration(overall_points)
        total_occurrences = sum(len(lids) for (iid, (interval, lids)) in ltuo_iid_and_tuo_interval_and_lids)
        for iid, (_, lids) in ltuo_iid_and_tuo_interval_and_lids:
            mf_lid_to_occurrence_count = defaultdict(float)
            for lid in lids: mf_lid_to_occurrence_count[lid]+=1
            points = [getLocationFromLid(lid.replace('_', ' ')) for lid in lids]
            
            current_entropy = entropy(mf_lid_to_occurrence_count, False)
            current_focus = focus(mf_lid_to_occurrence_count)[1]
            current_coverage = getRadiusOfGyration(points)
            
            yield iid-peak_iid, [len(lids)/total_occurrences, current_entropy, current_focus, current_coverage, 
                                    distance_from_overall_locality_stat(overall_entropy, current_entropy),
                                    distance_from_overall_locality_stat(overall_focus, current_focus),
                                    distance_from_overall_locality_stat(overall_coverage, current_coverage),]
 def mapper(self, key, hashtag_object):
     hashtag = hashtag_object['hashtag']
     ltuo_occ_time_and_occ_location = hashtag_object['ltuo_occ_time_and_occ_location']
     if ltuo_occ_time_and_occ_location:
         ltuo_intvl_time_and_occ_location = [(
                                            GeneralMethods.approximateEpoch(occ_time, TIME_UNIT_IN_SECONDS),
                                            occ_location
                                             ) 
                                           for occ_time, occ_location in ltuo_occ_time_and_occ_location]
         points = [UTMConverter.getLatLongUTMIdInLatLongForm(loc) for _, loc in ltuo_occ_time_and_occ_location]
         ltuo_intvl_time_and_items =\
                                 GeneralMethods.group_items_by(ltuo_intvl_time_and_occ_location, key=itemgetter(0))
         ltuo_intvl_time_and_items.sort(key=itemgetter(0))
         first_time = ltuo_intvl_time_and_items[0][0]
         ltuo_iid_and_occ_count = map(lambda (t, it): ((t-first_time)/TIME_UNIT_IN_SECONDS, len(it)), ltuo_intvl_time_and_items)
         ltuo_location_and_items =\
                                 GeneralMethods.group_items_by(ltuo_intvl_time_and_occ_location, key=itemgetter(1))
         mf_location_to_occ_count = dict(map(lambda (l, it): (l, len(it)), ltuo_location_and_items))
         spatial_metrics = {
                              'hashtag': hashtag,
                              'num_of_occurrenes': len(ltuo_occ_time_and_occ_location),
                              'peak_iid': max(ltuo_iid_and_occ_count, key=itemgetter(1))[0],
                              'focus': focus(mf_location_to_occ_count),
                              'entropy': entropy(mf_location_to_occ_count, as_bits=False),
                              'spread': getRadiusOfGyration(points)
                          }
         yield hashtag, spatial_metrics
 def mapper(self, hashtag, hashtag_object):
     def distance_from_overall_locality_stat(overall_stat, current_stat): return overall_stat-current_stat
     ltuo_occ_time_and_occ_location = hashtag_object['ltuo_occ_time_and_occ_location']
     if ltuo_occ_time_and_occ_location:
         ltuo_intvl_time_and_occ_location = [(
                                            GeneralMethods.approximateEpoch(occ_time, TIME_UNIT_IN_SECONDS),
                                            occ_location
                                             ) 
                                           for occ_time, occ_location in ltuo_occ_time_and_occ_location]
         ltuo_intvl_time_and_items =\
                                 GeneralMethods.group_items_by(ltuo_intvl_time_and_occ_location, key=itemgetter(0))
         ltuo_intvl_time_and_items.sort(key=itemgetter(0))
         first_time = ltuo_intvl_time_and_items[0][0]
         intvl_method = lambda (t, it): ((t-first_time)/TIME_UNIT_IN_SECONDS, (t, map(itemgetter(1), it)))
         ltuo_iid_and_tuo_interval_and_lids = map(intvl_method, ltuo_intvl_time_and_items)
         peak_tuo_iid_and_tuo_interval_and_lids = \
             max(ltuo_iid_and_tuo_interval_and_lids, key=lambda (_, (__, lids)): len(lids))
         peak_iid = peak_tuo_iid_and_tuo_interval_and_lids[0]
         ltuo_location_and_items =\
                                 GeneralMethods.group_items_by(ltuo_intvl_time_and_occ_location, key=itemgetter(1))
         overall_mf_lid_to_occurrence_count = dict(map(lambda (l, it): (l, len(it)), ltuo_location_and_items))
         overall_points =\
                     [UTMConverter.getLatLongUTMIdInLatLongForm(loc) for _, loc in ltuo_occ_time_and_occ_location]
         overall_entropy = entropy(overall_mf_lid_to_occurrence_count, False)
         overall_focus = focus(overall_mf_lid_to_occurrence_count)[1]
         overall_coverage = getRadiusOfGyration(overall_points)
         total_occurrences = sum(len(lids) for (iid, (interval, lids)) in ltuo_iid_and_tuo_interval_and_lids)
         for iid, (_, lids) in ltuo_iid_and_tuo_interval_and_lids:
             mf_lid_to_occurrence_count = defaultdict(float)
             for lid in lids: mf_lid_to_occurrence_count[lid]+=1
             points = [UTMConverter.getLatLongUTMIdInLatLongForm(lid) for lid in lids]
             current_entropy = entropy(mf_lid_to_occurrence_count, False)
             current_focus = focus(mf_lid_to_occurrence_count)[1]
             current_coverage = getRadiusOfGyration(points)
             
             yield iid-peak_iid, [len(lids)/total_occurrences, current_entropy, current_focus, current_coverage, 
                                     distance_from_overall_locality_stat(overall_entropy, current_entropy),
                                     distance_from_overall_locality_stat(overall_focus, current_focus),
                                     distance_from_overall_locality_stat(overall_coverage, current_coverage),]
Пример #4
0
 def map_hashtag_object_to_tuo_hashtag_and_occurrence_count_and_entropy_and_focus_and_coverage_and_peak(self, hashtag, hashtag_object):
     mf_lid_to_occurrence_count = get_mf_lid_to_occurrence_count(hashtag_object)
     points = [ getLocationFromLid(lid.replace('_', ' ')) for lid,_ in hashtag_object['ltuo_lid_and_s_interval']]
     # Determine peak
     ltuo_iid_and_tuo_interval_and_occurrence_count = get_ltuo_iid_and_tuo_interval_and_occurrence_count(hashtag_object)
     peak_tuo_iid_and_tuo_interval_and_occurrence_count = \
         max(ltuo_iid_and_tuo_interval_and_occurrence_count, key=lambda (_, (__, occurrence_count)): occurrence_count)
     peak_iid = peak_tuo_iid_and_tuo_interval_and_occurrence_count[0]
     yield hashtag_object['hashtag'], [hashtag_object['hashtag'], 
                                       len(hashtag_object['ltuo_lid_and_s_interval']), 
                                       entropy(mf_lid_to_occurrence_count, False), 
                                       focus(mf_lid_to_occurrence_count), 
                                       getRadiusOfGyration(points),
                                       peak_iid]
 def reducer(self, user, occurrences): 
     points = list(occurrences)
     yield user, {'u': user, 'c': len(points), 'cm': list(getCenterOfMass(points)), 'rog': getRadiusOfGyration(points)}