def map_hashtag_object_to_tuo_norm_iid_and_interval_stats(self, hashtag, hashtag_object): def distance_from_overall_locality_stat(overall_stat, current_stat): return overall_stat-current_stat ltuo_iid_and_tuo_interval_and_lids = \ get_ltuo_iid_and_tuo_interval_and_lids(hashtag_object) peak_tuo_iid_and_tuo_interval_and_lids = \ max(ltuo_iid_and_tuo_interval_and_lids, key=lambda (_, (__, lids)): len(lids)) peak_iid = peak_tuo_iid_and_tuo_interval_and_lids[0] # total_occurrences = sum(len(data[1][1]) for data in peak_tuo_iid_and_tuo_interval_and_lids) # Overall locality stats overall_mf_lid_to_occurrence_count = get_mf_lid_to_occurrence_count(hashtag_object) overall_points = [ getLocationFromLid(lid.replace('_', ' ')) for lid,_ in hashtag_object['ltuo_lid_and_s_interval']] overall_entropy = entropy(overall_mf_lid_to_occurrence_count, False) overall_focus = focus(overall_mf_lid_to_occurrence_count)[1] overall_coverage = getRadiusOfGyration(overall_points) total_occurrences = sum(len(lids) for (iid, (interval, lids)) in ltuo_iid_and_tuo_interval_and_lids) for iid, (_, lids) in ltuo_iid_and_tuo_interval_and_lids: mf_lid_to_occurrence_count = defaultdict(float) for lid in lids: mf_lid_to_occurrence_count[lid]+=1 points = [getLocationFromLid(lid.replace('_', ' ')) for lid in lids] current_entropy = entropy(mf_lid_to_occurrence_count, False) current_focus = focus(mf_lid_to_occurrence_count)[1] current_coverage = getRadiusOfGyration(points) yield iid-peak_iid, [len(lids)/total_occurrences, current_entropy, current_focus, current_coverage, distance_from_overall_locality_stat(overall_entropy, current_entropy), distance_from_overall_locality_stat(overall_focus, current_focus), distance_from_overall_locality_stat(overall_coverage, current_coverage),]
def mapper(self, key, hashtag_object): hashtag = hashtag_object['hashtag'] ltuo_occ_time_and_occ_location = hashtag_object['ltuo_occ_time_and_occ_location'] if ltuo_occ_time_and_occ_location: ltuo_intvl_time_and_occ_location = [( GeneralMethods.approximateEpoch(occ_time, TIME_UNIT_IN_SECONDS), occ_location ) for occ_time, occ_location in ltuo_occ_time_and_occ_location] points = [UTMConverter.getLatLongUTMIdInLatLongForm(loc) for _, loc in ltuo_occ_time_and_occ_location] ltuo_intvl_time_and_items =\ GeneralMethods.group_items_by(ltuo_intvl_time_and_occ_location, key=itemgetter(0)) ltuo_intvl_time_and_items.sort(key=itemgetter(0)) first_time = ltuo_intvl_time_and_items[0][0] ltuo_iid_and_occ_count = map(lambda (t, it): ((t-first_time)/TIME_UNIT_IN_SECONDS, len(it)), ltuo_intvl_time_and_items) ltuo_location_and_items =\ GeneralMethods.group_items_by(ltuo_intvl_time_and_occ_location, key=itemgetter(1)) mf_location_to_occ_count = dict(map(lambda (l, it): (l, len(it)), ltuo_location_and_items)) spatial_metrics = { 'hashtag': hashtag, 'num_of_occurrenes': len(ltuo_occ_time_and_occ_location), 'peak_iid': max(ltuo_iid_and_occ_count, key=itemgetter(1))[0], 'focus': focus(mf_location_to_occ_count), 'entropy': entropy(mf_location_to_occ_count, as_bits=False), 'spread': getRadiusOfGyration(points) } yield hashtag, spatial_metrics
def mapper(self, hashtag, hashtag_object): def distance_from_overall_locality_stat(overall_stat, current_stat): return overall_stat-current_stat ltuo_occ_time_and_occ_location = hashtag_object['ltuo_occ_time_and_occ_location'] if ltuo_occ_time_and_occ_location: ltuo_intvl_time_and_occ_location = [( GeneralMethods.approximateEpoch(occ_time, TIME_UNIT_IN_SECONDS), occ_location ) for occ_time, occ_location in ltuo_occ_time_and_occ_location] ltuo_intvl_time_and_items =\ GeneralMethods.group_items_by(ltuo_intvl_time_and_occ_location, key=itemgetter(0)) ltuo_intvl_time_and_items.sort(key=itemgetter(0)) first_time = ltuo_intvl_time_and_items[0][0] intvl_method = lambda (t, it): ((t-first_time)/TIME_UNIT_IN_SECONDS, (t, map(itemgetter(1), it))) ltuo_iid_and_tuo_interval_and_lids = map(intvl_method, ltuo_intvl_time_and_items) peak_tuo_iid_and_tuo_interval_and_lids = \ max(ltuo_iid_and_tuo_interval_and_lids, key=lambda (_, (__, lids)): len(lids)) peak_iid = peak_tuo_iid_and_tuo_interval_and_lids[0] ltuo_location_and_items =\ GeneralMethods.group_items_by(ltuo_intvl_time_and_occ_location, key=itemgetter(1)) overall_mf_lid_to_occurrence_count = dict(map(lambda (l, it): (l, len(it)), ltuo_location_and_items)) overall_points =\ [UTMConverter.getLatLongUTMIdInLatLongForm(loc) for _, loc in ltuo_occ_time_and_occ_location] overall_entropy = entropy(overall_mf_lid_to_occurrence_count, False) overall_focus = focus(overall_mf_lid_to_occurrence_count)[1] overall_coverage = getRadiusOfGyration(overall_points) total_occurrences = sum(len(lids) for (iid, (interval, lids)) in ltuo_iid_and_tuo_interval_and_lids) for iid, (_, lids) in ltuo_iid_and_tuo_interval_and_lids: mf_lid_to_occurrence_count = defaultdict(float) for lid in lids: mf_lid_to_occurrence_count[lid]+=1 points = [UTMConverter.getLatLongUTMIdInLatLongForm(lid) for lid in lids] current_entropy = entropy(mf_lid_to_occurrence_count, False) current_focus = focus(mf_lid_to_occurrence_count)[1] current_coverage = getRadiusOfGyration(points) yield iid-peak_iid, [len(lids)/total_occurrences, current_entropy, current_focus, current_coverage, distance_from_overall_locality_stat(overall_entropy, current_entropy), distance_from_overall_locality_stat(overall_focus, current_focus), distance_from_overall_locality_stat(overall_coverage, current_coverage),]
def map_hashtag_object_to_tuo_hashtag_and_occurrence_count_and_entropy_and_focus_and_coverage_and_peak(self, hashtag, hashtag_object): mf_lid_to_occurrence_count = get_mf_lid_to_occurrence_count(hashtag_object) points = [ getLocationFromLid(lid.replace('_', ' ')) for lid,_ in hashtag_object['ltuo_lid_and_s_interval']] # Determine peak ltuo_iid_and_tuo_interval_and_occurrence_count = get_ltuo_iid_and_tuo_interval_and_occurrence_count(hashtag_object) peak_tuo_iid_and_tuo_interval_and_occurrence_count = \ max(ltuo_iid_and_tuo_interval_and_occurrence_count, key=lambda (_, (__, occurrence_count)): occurrence_count) peak_iid = peak_tuo_iid_and_tuo_interval_and_occurrence_count[0] yield hashtag_object['hashtag'], [hashtag_object['hashtag'], len(hashtag_object['ltuo_lid_and_s_interval']), entropy(mf_lid_to_occurrence_count, False), focus(mf_lid_to_occurrence_count), getRadiusOfGyration(points), peak_iid]
def reducer(self, user, occurrences): points = list(occurrences) yield user, {'u': user, 'c': len(points), 'cm': list(getCenterOfMass(points)), 'rog': getRadiusOfGyration(points)}