Exemplo n.º 1
0
def calculate_diversity(data):
 
################################################## 
###    this part calculates user diversity
##################################################
    div = defaultdict(int)
    
    # for each user calculate diversity using formula from Finger on the Pulse
    for usr in range(500001):
        # total places visits
        S = float(sum(data[usr].values()))
        
        if S > 0:
            for subpref in data[usr].iterkeys():
                if subpref <> -1:
                    # frequency of visits to this place
                    v = data[usr][subpref] / float(S)
                    div[usr] -= v * math.log(v)
            div[usr] = div[usr] / math.log(S)
            
        
#    file_name = "/home/sscepano/D4D res/allstuff/diversity of travel/usr_diversity_index.tsv"
#    f = open(file_name,"w")
#    
#    for usr in range(1,500001):
#        f.write(str(usr) + '\t' + str(div[usr]) + '\n')
#        
#    print file_name
#    f.close()

################################################################
### subpref diversity
################################################################
    num_subpref_usrs = rd.read_in_subpref_num_users()
    div_subpref = defaultdict(int)
    for subpref in range(1,256):
        # for places where we have users
        if num_subpref_usrs[subpref] > 0:
            # read ids of those users
            subpref_usrs = rd.read_in_subpref_users(subpref)
            # for each user check if he belongs here (not the fastest way but still fine)
            for usr in range(500001):
                # total places visits
                if subpref_usrs[usr] == 1:
                    # we sum all users diversity indexes and then avg
                    div_subpref[subpref] += div[usr]
            div_subpref[subpref] = div_subpref[subpref] / float(num_subpref_usrs[subpref])
                
    file_name2 = "/home/sscepano/D4D res/allstuff/diversity of travel/subpref_diversity_index.tsv"
    f2 = open(file_name2,"w")
        
    for subpref in range(1,256):
        if num_subpref_usrs[subpref] > 0:
            f2.write(str(subpref) + '\t' + str(div_subpref[subpref]) + '\n')
            
    print file_name2
    f2.close()
            
    return div, div_subpref
Exemplo n.º 2
0
def from_file_radius_gyr3(file_name, subpref):
    
    users_list = rd.read_in_subpref_users(subpref)
    
    total = float(rd.read_in_subpref_num_users()[subpref])
    
    if total > 0:
    
        nits = []
        its = []
        
        # a loop where we populate those two arrays from the file
        i = 0
        f = open(file_name, 'r')    
        # read the file
        for line in f:
            i = i + 1
            it, nit = line.split('\t')
            nit = float(nit)
            it = int(it)
            if users_list[it] == 1:
                nit = int(nit)
                nits.append(nit)
                its.append(it)
    
        mi = min(nits)
        mx = max(nits)
        print("Minimum radius of gyr ", mi)
        print("Maximum radius of gyr ", mx)
        
        total_nit = float(sum(nits))
        print("Total radius of gyr ", total_nit)
        
        pdf_nits = defaultdict(int)
        
        for j in range(0, len(nits)):
            pdf_nits[nits[j]] += 1
            
        ordered = OrderedDict(sorted(pdf_nits.items(), key=lambda t: t[0]))
        
        nits7s = []
        its7s = []
        
        test = 0
        #total = 500000.0
        
        for j in ordered.iterkeys():
            nits7s.append(ordered[j]/total)
            test += ordered[j]/total
            its7s.append(j)
            
        print test
            
    ############################################################################################################################
    # THIS is to plot number of users pdf
    ############################################################################################################################
    
        plt.figure(7)
    
        plt.plot(its7s, nits7s, 'o', linewidth=0.5, label= 'distribution of Rg')
        
        plt.xlabel('rg [km]')
        plt.ylabel('P(rg)')
        plt.legend()   
        
        # this is if we want loglog lot, otheriwse comment and uncomment next line for regular plot file   
        plt.yscale('log')
        plt.xscale('log')
        figure_name = "/home/sscepano/D4D res/allstuff/rg/1/rg_" + str(subpref) + ".png"
              
        print(figure_name)
        plt.savefig(figure_name, format = "png", dpi=300)      
        
        plt.clf()
    
    return