Exemplo n.º 1
0
Arquivo: gmm.py Projeto: shikhach/quac
 def build(self):
     self.warn_if_parallel()
     allpoints = geos.MultiPoint(
         [pts for sublist in self.tokens.itervalues() for pts in sublist],
         srid=self.srid)
     l.debug('fitting All_Tweets to %d points...' % len(allpoints))
     self.global_model = gmm_fit_tokenpoints('_all_tweets_', allpoints)[1]
Exemplo n.º 2
0
 def group_tokens(self, tweets, trim_head_frac, min_instance_ct):
     # list of (token, point) pairs
     tps = []
     for tw in tweets:
         for tok in tw.tokens:
             tps.append((tok, tw.geom))
     tps.sort(key=operator.itemgetter(0))
     # grouped
     tokens = list()
     for (key, group) in itertools.groupby(tps, key=operator.itemgetter(0)):
         tokens.append((key, [i[1] for i in group]))
     l.debug('%d tokens total' % (len(tokens)))
     # remove infrequent
     tokens = filter(lambda t: len(t[1]) >= min_instance_ct, tokens)
     l.debug('%d tokens appear >= %d times' %
             (len(tokens), min_instance_ct))
     # convert to multipoint
     tokens = [(tok, geos.MultiPoint(pts, srid=tweets[0].geom.srid))
               for (tok, pts) in tokens]
     l.info('created %d multipoint token groups, %d total points' %
            (len(tokens), sum(len(t[1]) for t in tokens)))
     # remove frequent
     assert (0 <= trim_head_frac < 1)
     tokens.sort(key=lambda i: len(i[1]), reverse=True)
     tokens = tokens[int(round(trim_head_frac * len(tokens))):]
     l.debug('%d tokens after head trim' % (len(tokens)))
     assert (len(tokens) > 0)
     # done
     return dict(tokens)
Exemplo n.º 3
0
Arquivo: srs.py Projeto: shikhach/quac
def geodesic_distance_sph(a, b):
    '''Return the spherical geodesic distance in kilometers from geos.Point a
      to geos.Point b. E.g. (this is in error by about 0.2%):

      >>> geodesic_distance_sph(geos.Point(-86.67, 36.12, srid=4326),
      ...                       geos.Point(-118.40, 33.94, srid=4326))
      2886.44...'''
    return geodesic_distance_mp_sph(a, geos.MultiPoint([b], srid=b.srid))[0]
Exemplo n.º 4
0
Arquivo: srs.py Projeto: shikhach/quac
def geodesic_distance_ell(a, b):
    '''Return the ellipsoidal geodesic distance in kilometers from geos.Point a
      to geos.Point b, which can be in any SRS. For example, to compute
      distance from BNA to LAX
      (http://en.wikipedia.org/wiki/Great-circle_distance):

      >>> geodesic_distance_ell(geos.Point(-86.67, 36.12, srid=4326),
      ...                       geos.Point(-118.40, 33.94, srid=4326))
      2892.77...'''
    return geodesic_distance_mp_ell(a, geos.MultiPoint([b], srid=b.srid))[0]
Exemplo n.º 5
0
Arquivo: gmm.py Projeto: shikhach/quac
 def locate(self, tokens, confidence):
     tweet_points = []
     for token in tokens:
         if token in self.tokens:
             tweet_points.extend(self.tokens[token])
     if len(tweet_points) == 0:
         return None
     else:
         model = gmm_fit_tokenpoints('all',
                                     geos.MultiPoint(tweet_points))[1]
         return Location_Estimate(model, confidence, self.srid)
Exemplo n.º 6
0
Arquivo: gmm.py Projeto: shikhach/quac
 def populate_samples(self, sample_ct):
     sraw = [
         geos.Point(tuple(i), srid=self.srid)
         for i in self.sample(sample_ct, u.rand_np)
     ]
     evals = self.eval([i.coords for i in sraw])
     logprobs = evals[0]
     component_is = [np.argmax(i) for i in evals[1]]
     self.samples = zip(sraw, logprobs, component_is)
     self.samples.sort(reverse=True, key=operator.itemgetter(1))
     mp = geos.MultiPoint([i[0] for i in self.samples], srid=self.srid)
     self.samples_inbound_mp = srs.trim(mp)
Exemplo n.º 7
0
Arquivo: gmm.py Projeto: shikhach/quac
def sample_points(rs, components, ct):
    ''' Sample ct points from a random gaussian mixture model with the
   specified number of components. An equal number of samples are drawn from
   each component.'''
    sz = ct // components
    samples = np.array([]).reshape(0, 2)
    mean = 0
    for i in range(0, components):
        (mean, covar) = sample_gaussian(rs, mean)
        samples = np.append(samples, rs.multivariate_normal(mean, covar, sz),
                            0)
        mean += 5
    return geos.MultiPoint([geos.Point(*xy) for xy in samples],
                           srid=srs.SRID_WGS84)
Exemplo n.º 8
0
Arquivo: gmm.py Projeto: shikhach/quac
 def populate_pred_region_real(self, trim=True):
     # what's the contour value?
     threshold_idx = int(round(self.pred_coverage * len(self.samples)))
     self.pred_region_threshold = self.samples[threshold_idx][1]
     bests = self.samples[:threshold_idx]
     # compute contours
     regions = []
     for i in xrange(self.n_components):
         points = filter(lambda j: j[2] == i, bests)
         if (len(points) < 3):
             # can't make a polygon with less than 3 vertices, skip
             continue
         points = geos.MultiPoint([i[0] for i in points], srid=self.srid)
         regions.append(points.convex_hull)
     # create a multipolygon and clean up
     assert (len(regions) > 0)
     pr = geos.MultiPolygon(regions, srid=self.srid).cascaded_union
     if (trim):
         pr = srs.trim(pr)
     if (pr.geom_type == 'Polygon'):
         # cascaded_union can collapse a MultiPolygon into a single Polygon
         pr = geos.MultiPolygon([pr], srid=self.srid)
     assert (pr.geom_type == 'MultiPolygon')
     self.pred_region = pr
Exemplo n.º 9
0
Arquivo: gmm.py Projeto: shikhach/quac
def test_fitting(coverage, seed, sample_ct):
    result = {}
    rs = np.random.RandomState(seed)
    Model.parms_init({'mc_sample_ct': sample_ct})  # FIXME: kludge ugly here

    # Create and fit a GMM. We fit random points centered on Alert, Nunavut (83
    # degrees north) as well as Los Alamos in order to test clamping for sampled
    # points that are too far north. The two places are roughly 5,447 km apart.
    ct = sample_ct
    alert_xys = zip(-62.33 + rs.normal(scale=4.0, size=ct * 1.5),
                    82.50 + rs.normal(scale=8.0, size=ct * 1.5))
    # make sure we are indeed slushing over the northern boundary of the world
    assert (len(filter(lambda xy: xy[1] >= 90, alert_xys)) > 8)
    la_xys = zip(-106.30 + rs.normal(scale=3.0, size=ct),
                 35.89 + rs.normal(scale=2.0, size=ct))
    all_xys = alert_xys + la_xys
    inbounds_xys = filter(lambda xy: xy[1] < 90, all_xys)
    l.info('true points in bounds = %d/%d = %g' %
           (len(inbounds_xys), len(all_xys), len(inbounds_xys) / len(all_xys)))
    result['all_xys'] = all_xys
    result['inbounds_xys'] = inbounds_xys
    all_mp = geos.MultiPoint([geos.Point(xy) for xy in all_xys],
                             srid=srs.SRID_WGS84)

    t1 = time.time()
    g = Geo_GMM.from_fit(all_mp, 2)
    result['g'] = g
    l.info('fitted %d components in %gs' % (len(g.weights_), time.time() - t1))

    t1 = time.time()
    g.prepare(coverage)
    l.info("prepare()'d %d points in %gs" % (len(g.samples), time.time() - t1))

    l.info('component weights: %s' % ([g.weights_], ))
    l.info('component assignments: %s' % ([
        len([i for i in g.samples if i[2] == 0]),
        len([i for i in g.samples if i[2] == 1])
    ], ))

    # coverage
    covers_ct = sum(
        g.covers_p(geos.Point(xy, srid=srs.SRID_WGS84)) for xy in inbounds_xys)
    result['coverage_req'] = coverage
    result['coverage_obs'] = covers_ct / len(inbounds_xys)
    result['coverage_error'] = result['coverage_obs'] - coverage
    result['coverage_fudge'] = coverage / result['coverage_obs']
    l.info('observed coverage (in-bounds) = %d/%d = %g' %
           (covers_ct, len(inbounds_xys), result['coverage_obs']))

    t1 = time.time()
    result['contour'] = sum(
        g.contour(geos.Point(xy, srid=srs.SRID_WGS84))
        for xy in inbounds_xys) / len(inbounds_xys)
    l.info('computed contour() in %gs per point' %
           ((time.time() - t1) / len(inbounds_xys)))
    covers_ct = sum(
        g.coverst_p(geos.Point(xy, srid=srs.SRID_WGS84))
        for xy in inbounds_xys)
    result['coveraget_obs'] = covers_ct / len(inbounds_xys)
    result['coveraget_error'] = result['coveraget_obs'] - coverage
    result['coveraget_fudge'] = coverage / result['coveraget_obs']
    l.info('observed coverage (in-bounds, coverst) = %d/%d = %g' %
           (covers_ct, len(inbounds_xys), result['coveraget_obs']))

    # absolute error for a random true point
    inb_sample = inbounds_xys[:1]
    t1 = time.time()
    sae = [g.sae(geos.Point(p, srid=srs.SRID_WGS84)) for p in inb_sample]
    l.info('computed SAE in %gs per point' %
           ((time.time() - t1) / len(inb_sample)))
    result['msae'] = np.mean(sae)
    t1 = time.time()
    cae = [g.cae(geos.Point(p, srid=srs.SRID_WGS84)) for p in inb_sample]
    l.info('computed CAE in %gs per point' %
           ((time.time() - t1) / len(inb_sample)))
    result['mcae'] = np.mean(cae)

    # area of confidence region
    result['pra'] = g.pred_area

    return result
Exemplo n.º 10
0
 def test_multicentroid(self):
     p1, p2 = geos.Point(10, 10), geos.Point(11, 11)
     f1 = Feature.objects.create(geom_point=p1)
     f2 = Feature.objects.create(geom_point=p2)
     mp = geos.MultiPoint([p1, p2])