示例#1
0
def near_edges(daily_ats, user_locs, in_paths):
    """
    keep edges from daily_ats between users who live within 25 miles
    """
    # FIXME: I'm really starting to dislike in_paths
    day_name = in_paths[0].split('.')[-1]
    dt = datetime.datetime.strptime(day_name,"%Y-%m-%d")
    day = dt.date() - datetime.date(2012,8,1)

    edges = collections.defaultdict(lambda: collections.defaultdict(int))
    for kind,frm,to in daily_ats:
        if frm in user_locs and to in user_locs:
            edges[frm,to][kind]+=1
    def _as_array(is_to,is_lat):
        return np.array([user_locs[edge[is_to]][is_lat] for edge in edges])
    flngs = _as_array(0,0)
    flats = _as_array(0,1)
    tlngs = _as_array(1,0)
    tlats = _as_array(1,1)
    dists = utils.np_haversine(flngs,tlngs,flats,tlats)

    for dist,(frm,to) in izip(dists,edges):
        if dist<25:
            edge = edges[frm,to]
            yield NearEdge(
                frm,
                to,
                dist,
                day.days,
                edge.get('at',0),
                edge.get('rt',0),
            )
示例#2
0
 def predict(self,nebrs_d,vect_fit):
     lats = [r['lat'] for r in nebrs_d['nebrs']]
     lngs = [r['lng'] for r in nebrs_d['nebrs']]
     mlat = np.median(lats)
     mlng = np.median(lngs)
     dists = utils.np_haversine(mlng,lngs,mlat,lats)
     return np.argmin(dists)
示例#3
0
def graph_example_probs(vect_fit, in_paths):
    """
    create an example of maximum likeliehood estimation for four friends
    """
    if in_paths[0][-1] != '0':
        return
    curves = [fit for vers,cutoff,fit in vect_fit if vers=='leaf']

    lat_range = np.linspace(27.01,32.99,5*60)
    lng_range = np.linspace(-100.99,-93.01,5*80)
    lat_grid, lng_grid = np.meshgrid(lat_range, lng_range)
    print lat_range, lng_range

    probs = np.zeros_like(lat_grid)

    spots = (
        (-95.31, 29.73, 0), # Houston
        (-96.37, 30.67, 1), # Bryan, TX
        (-99.25, 31.25, 5), # Texas
        (-97.74, 30.27, 3), # Austin
    )
    for lng, lat, curve in spots:
        dists = utils.np_haversine(lng, lng_grid, lat, lat_grid)
        probs+=np.log(peek.contact_curve(dists,*(curves[curve])))

    clipped = 255.999*(np.max(probs)-probs)/np.ptp(probs)
    buff = np.require(np.transpose(clipped),np.uint8,['C_CONTIGUOUS'])
    img = PIL.Image.frombuffer('L',(clipped.shape),buff)
    img.save('example_probs.png')
示例#4
0
def _calc_dists(nebrs_d):
    gnp = nebrs_d['gnp']
    lats = [r['lat'] for r in nebrs_d['nebrs']]
    lngs = [r['lng'] for r in nebrs_d['nebrs']]
    all_lats = lats+[gnp['lat']] if gnp else lats
    all_lngs = lngs+[gnp['lng']] if gnp else lngs
    lat1,lat2 = np.meshgrid(all_lats,lats)
    lng1,lng2 = np.meshgrid(all_lngs,lngs)
    return utils.np_haversine(lng1,lng2,lat1,lat2)
示例#5
0
def exact_strange_bins(uids,mlocs):
    """find the distance between every contact and every target user"""
    mlngs,mlats = np.transpose(mlocs)
    bins = utils.dist_bins(120)
    counts = np.zeros(len(bins)-1)
    for contact in _paged_users(set(uids),fields=['gnp']):
        clat = contact.geonames_place.lat
        clng = contact.geonames_place.lng
        dists= utils.np_haversine(clng, mlngs, clat, mlats)
        hist,b = np.histogram(dists,bins)
        counts+=hist
    return enumerate(counts)
示例#6
0
def _dists_for_lat(lat):
    lat_range = np.linspace(-89.95,89.95,1800)
    lng_range = np.linspace(.05,180.05,1801)
    lat_grid,lng_grid = np.meshgrid(lat_range, lng_range)

    centered_lat = .05 + .1*_tile(lat)
    lat_ar = np.empty_like(lat_grid)
    lat_ar.fill(centered_lat)
    lng_0 = np.empty_like(lat_grid)
    lng_0.fill(.05)

    return utils.np_haversine(lng_0, lng_grid, lat_ar, lat_grid)
示例#7
0
def mdist_real(nebrs_d):
    """
    compare median location error to the actual location error for the target
    users after adding noise to home location in mloc_blur
    """
    data = collections.defaultdict(list)
    for nebr_d in nebrs_d:
        if not nebr_d['gnp']:
            continue
        data['glat'].append(nebr_d['gnp']['lat'])
        data['glng'].append(nebr_d['gnp']['lng'])
        data['mlng'].append(nebr_d['mloc'][0])
        data['mlat'].append(nebr_d['mloc'][1])
        data['mdist'].append(nebr_d['gnp']['mdist'])

    dists = utils.np_haversine(
                data['mlng'], data['glng'],
                data['mlat'], data['glat'])
    return itertools.izip(data['mdist'],dists)
示例#8
0
def stranger_prob(lat_tile,contact_count):
    """
    Calculate pStrangers for every longitude tile at a specific latitude.
    pStrangers is the probability that a user lives at a location given the
    locations of people they are not connected to. This step of FreindlyLocation
    took about 2 weeks on a machine with 8 cores.
        lat_tile should be between -900 and 900 and represents a latitude
        contact_count is a matrix of the locations of the contacts
    """
    lat_range = np.linspace(-89.95,89.95,1800)
    lng_range = np.linspace(.05,359.95,3600)
    lat_grid,lng_grid = np.meshgrid(lat_range, lng_range)

    dists = utils.np_haversine(.05, lng_grid, .1*lat_tile+.05, lat_grid)
    # FIXME: the name of a slurped command-line argument should not have to
    # match the file name
    contact_mat = contact_count
    dists[0,lat_tile+900] = 2

    for lng_tile in xrange(-1800,1800):
        probs = np.log(1-utils.contact_prob(dists))
        prob = np.sum(contact_mat*probs)
        yield (lng_tile,lat_tile),prob
        dists = np.roll(dists,1,0)