Exemplo n.º 1
0
 def generate_tuo_location_and_tuo_neighbor_location_and_mf_influence_type_and_similarity(model_ids, startTime, endTime, outputFolder):
     def location_similarity(location_vector_1, location_vector_2): 
         return reduce(lambda total, k: total+(location_vector_1.get(k,0)*location_vector_2.get(k,0)), set(location_vector_1.keys()).union(location_vector_2.keys()),0.)
     influence_types=[InfluenceMeasuringModels.TYPE_COMPLETE_INFLUENCE, InfluenceMeasuringModels.TYPE_OUTGOING_INFLUENCE, InfluenceMeasuringModels.TYPE_INCOMING_INFLUENCE]
     for model_id in model_ids:
         mf_location_to_mf_influence_type_to_influence_vector = dict(Experiments.load_tuo_location_and_mf_influence_type_to_influence_vector(model_id))
         GeneralMethods.runCommand('rm -rf %s'%tuo_location_and_tuo_neighbor_location_and_mf_influence_type_and_similarity_file%model_id)
         for line_count, location_object in enumerate(iterateJsonFromFile(
                      location_objects_file%(outputFolder, startTime.strftime('%Y-%m-%d'), endTime.strftime('%Y-%m-%d'))
                  )):
             print line_count
             location = location_object['id']
             tuo_neighbor_location_and_mf_influence_type_and_similarity = []
             for neighbor_location in location_object['links'].keys(): 
                 mf_influence_type_and_similarity = {}
                 for influence_type in influence_types:
                     similarity = location_similarity( 
                                                          mf_location_to_mf_influence_type_to_influence_vector[location][influence_type],
                                                          mf_location_to_mf_influence_type_to_influence_vector[neighbor_location][influence_type]
                                                   )
                     mf_influence_type_and_similarity[influence_type] = similarity
                 so_hashtags_for_location = set(location_object['hashtags'].keys())
                 so_hashtags_for_neighbor_location = set(location_object['links'][neighbor_location].keys())
                 numerator = len(so_hashtags_for_location.intersection(so_hashtags_for_neighbor_location)) + 0.
                 denominator = len(so_hashtags_for_location.union(so_hashtags_for_neighbor_location)) + 0.
                 mf_influence_type_and_similarity[JACCARD_SIMILARITY] = numerator/denominator                
                 tuo_neighbor_location_and_mf_influence_type_and_similarity.append([neighbor_location, mf_influence_type_and_similarity])
             FileIO.writeToFileAsJson(
                                      [location, tuo_neighbor_location_and_mf_influence_type_and_similarity],
                                      tuo_location_and_tuo_neighbor_location_and_mf_influence_type_and_similarity_file%model_id
                                      )
Exemplo n.º 2
0
    def generate_tuo_location_and_tuo_neighbor_location_and_pure_influence_score(models_ids, startTime, endTime, outputFolder, hashtag_tag):
        for model_id in models_ids:
#            if w_extra_hashtags: output_file = tuo_location_and_tuo_neighbor_location_and_pure_influence_score_file%(model_id, hashtag_tag)
#            else: output_file = tuo_location_and_tuo_neighbor_location_and_pure_influence_score_file%(model_id, wout_extra_hashtags_tag)
            output_file = tuo_location_and_tuo_neighbor_location_and_pure_influence_score_file%(model_id, hashtag_tag)
            GeneralMethods.runCommand('rm -rf %s'%output_file)
            for line_count, location_object in enumerate(iterateJsonFromFile(
                     location_objects_file%(outputFolder, startTime.strftime('%Y-%m-%d'), endTime.strftime('%Y-%m-%d'))
                     )):
                print line_count, model_id
                tuo_neighbor_location_and_pure_influence_score = []
                location_hashtag_set = set(location_object['hashtags'])
                for neighbor_location, mf_hashtag_to_tuo_occurrences_and_time_range in location_object['links'].iteritems():
                    pure_influence_scores = []
                    for hashtag, (neighbor_location_occurrences, time_range) in mf_hashtag_to_tuo_occurrences_and_time_range.iteritems():
                        if hashtag in location_object['hashtags']:
                            location_occurrences = location_object['hashtags'][hashtag][0]
                            pure_influence_scores.append(MF_INFLUENCE_MEASURING_MODELS_TO_MODEL_ID[model_id](location_occurrences, neighbor_location_occurrences))
                    neighbor_location_hashtag_set = set(mf_hashtag_to_tuo_occurrences_and_time_range.keys())
                    if hashtag_tag==w_extra_hashtags_tag:
                        for hashtag in location_hashtag_set.difference(neighbor_location_hashtag_set): pure_influence_scores.append(1.0)
                        for hashtag in neighbor_location_hashtag_set.difference(location_hashtag_set): pure_influence_scores.append(-1.0)
                    mean_pure_influence_score = np.mean(pure_influence_scores)
                    tuo_neighbor_location_and_pure_influence_score.append([neighbor_location, mean_pure_influence_score])
                tuo_neighbor_location_and_pure_influence_score = sorted(tuo_neighbor_location_and_pure_influence_score, key=itemgetter(1))
                FileIO.writeToFileAsJson([location_object['id'], tuo_neighbor_location_and_pure_influence_score], output_file)
Exemplo n.º 3
0
 def load_ltuo_hashtag_and_ltuo_location_and_occurrence_time(startTime=datetime(2012, 1, 1), endTime=datetime(2012, 3, 31), outputFolder='complete_prop'):
     ltuo_hashtag_and_ltuo_location_and_occurrence_time = []
     for hashtag_object in \
             iterateJsonFromFile(f_hashtag_objects%(outputFolder, startTime.strftime('%Y-%m-%d'), endTime.strftime('%Y-%m-%d'))):
         ltuo_location_and_occurrence_time = [
                                              (getLatticeLid(point, LOCATION_ACCURACY), occurrence_time)
                                              for point, occurrence_time in hashtag_object['oc']
                                              ]
         ltuo_hashtag_and_ltuo_location_and_occurrence_time.append([hashtag_object['h'], ltuo_location_and_occurrence_time])
     return ltuo_hashtag_and_ltuo_location_and_occurrence_time
Exemplo n.º 4
0
 def generate_tuo_location_and_tuo_neighbor_location_and_sharing_affinity_score(model_ids, startTime, endTime, outputFolder):
     for model_id in model_ids:
         for line_count, location_object in enumerate(iterateJsonFromFile(
                      location_objects_file%(outputFolder, startTime.strftime('%Y-%m-%d'), endTime.strftime('%Y-%m-%d'))
                      )):
             print line_count
             mf_from_neighbor_location_to_sharing_affinity_score = {}
             so_hashtags = set(location_object['hashtags'])
             for neighbor_location, neighbor_hashtags in location_object['links'].iteritems():
                 so_neighbor_hashtags = set(neighbor_hashtags)
                 mf_from_neighbor_location_to_sharing_affinity_score[neighbor_location]=len(so_hashtags.intersection(so_neighbor_hashtags))/float(len(so_hashtags))
             FileIO.writeToFileAsJson([
                                       location_object['id'], 
                                       sorted(mf_from_neighbor_location_to_sharing_affinity_score.iteritems(), key=itemgetter(1), reverse=True)
                                     ], 
                                     tuo_location_and_tuo_neighbor_location_and_sharing_affinity_score_file%model_id)
Exemplo n.º 5
0
 def generate_tuo_location_and_tuo_neighbor_location_and_influence_score(models_ids, startTime, endTime, outputFolder, hashtag_tag):
     def get_hashtag_weights(map_from_hashtag_to_tuples_of_occurrences_and_time_range):
         total_occurrences = sum([len(occurrences) 
                                  for hashtag, (occurrences, time_range) in 
                                  map_from_hashtag_to_tuples_of_occurrences_and_time_range.iteritems()]) + 0.
         return dict([(hashtag, len(occurrences)/total_occurrences)
             for hashtag, (occurrences, time_range) in 
             map_from_hashtag_to_tuples_of_occurrences_and_time_range.iteritems()])
     def get_location_weights(hashtags_for_source_location, map_from_location_to_hashtags):
         set_of_hashtags_for_source_location = set(hashtags_for_source_location.keys())
         return dict([(location, len(set(hashtags.keys()).intersection(set_of_hashtags_for_source_location))/(len(set_of_hashtags_for_source_location)+0.))
                      for location, hashtags in 
                      map_from_location_to_hashtags.iteritems()])
     for model_id in models_ids:
         output_file = tuo_location_and_tuo_neighbor_location_and_influence_score_file%(model_id, hashtag_tag)
         GeneralMethods.runCommand('rm -rf %s'%output_file)
         for line_count, location_object in enumerate(iterateJsonFromFile(
                  location_objects_file%(outputFolder, startTime.strftime('%Y-%m-%d'), endTime.strftime('%Y-%m-%d'))
                  )):
             print line_count, model_id
             tuo_neighbor_location_and_influence_score = []
             mf_hashtag_to_hashtag_weights = get_hashtag_weights(location_object['hashtags'])
             mf_location_to_location_weights = get_location_weights(location_object['hashtags'], location_object['links'])
             location_hashtag_set = set(location_object['hashtags'])
             for neighbor_location, mf_hashtag_to_tuo_occurrences_and_time_range in location_object['links'].iteritems():
                 influence_scores = []
                 mf_neighbor_location_hashtag_to_hashtag_weights = get_hashtag_weights(mf_hashtag_to_tuo_occurrences_and_time_range)
                 neighbor_location_hashtag_set = set(mf_hashtag_to_tuo_occurrences_and_time_range.keys())
                 for hashtag, (neighbor_location_occurrences, time_range) in mf_hashtag_to_tuo_occurrences_and_time_range.iteritems():
                     if hashtag in location_object['hashtags']:
                         location_occurrences = location_object['hashtags'][hashtag][0]
                         pure_influence_score = MF_INFLUENCE_MEASURING_MODELS_TO_MODEL_ID[model_id](location_occurrences, neighbor_location_occurrences)
                         influence_scores.append(mf_hashtag_to_hashtag_weights[hashtag]*pure_influence_score)
                 if hashtag_tag==w_extra_hashtags_tag:
                     for hashtag in location_hashtag_set.difference(neighbor_location_hashtag_set): 
                         influence_scores.append(mf_hashtag_to_hashtag_weights[hashtag]*1.0)
 #                        influence_scores.append(1.0)
                     for hashtag in neighbor_location_hashtag_set.difference(location_hashtag_set): 
                         influence_scores.append(mf_neighbor_location_hashtag_to_hashtag_weights[hashtag]*-1.0)
 #                        influence_scores.append(-1.0)
                 mean_influence_scores = np.mean(influence_scores)
                 tuo_neighbor_location_and_influence_score.append([neighbor_location, 
                                                                    mf_location_to_location_weights[neighbor_location]*mean_influence_scores])
             tuo_neighbor_location_and_influence_score = sorted(tuo_neighbor_location_and_influence_score, key=itemgetter(1))
             FileIO.writeToFileAsJson([location_object['id'], tuo_neighbor_location_and_influence_score], output_file)
Exemplo n.º 6
0
 def load_ltuo_test_hashtag_and_ltuo_location_and_pure_influence_score(test_model_id):
     return [ (hashtag, ltuo_location_and_pure_influence_score)
              for hashtag, ltuo_location_and_pure_influence_score in 
                 iterateJsonFromFile(f_ltuo_hashtag_and_ltuo_location_and_pure_influence_score%(test_model_id))
             ]
Exemplo n.º 7
0
 def load_ltuo_location_and_no_of_occurrences(START_TIME=datetime(2011, 5, 1), END_TIME=datetime(2011, 12, 31), WINDOW_OUTPUT_FOLDER='complete_prop'):
     ltuo_location_and_no_of_occurrences = []
     for location, ltuo_hashtag_and_occurrence_time in \
             iterateJsonFromFile(f_ltuo_location_and_ltuo_hashtag_and_occurrence_time%(WINDOW_OUTPUT_FOLDER, START_TIME.strftime('%Y-%m-%d'), END_TIME.strftime('%Y-%m-%d'))):
         ltuo_location_and_no_of_occurrences.append([location, len(ltuo_hashtag_and_occurrence_time)])
     return ltuo_location_and_no_of_occurrences
Exemplo n.º 8
0
 def load_tuo_location_and_tuo_neighbor_location_and_sharing_affinity_score(model_id):
     return [(location, tuo_neighbor_location_and_sharing_affinity_score)
              for location, tuo_neighbor_location_and_sharing_affinity_score in 
              iterateJsonFromFile(tuo_location_and_tuo_neighbor_location_and_sharing_affinity_score_file%model_id)]
Exemplo n.º 9
0
 def load_tuo_location_and_tuo_neighbor_location_and_influence_score(model_id, hashtag_tag):
     return [(location, tuo_neighbor_location_and_influence_score)
              for location, tuo_neighbor_location_and_influence_score in 
              iterateJsonFromFile(tuo_location_and_tuo_neighbor_location_and_influence_score_file%(model_id, hashtag_tag))]