def __init__(self, borders_file, output_file, region, photo_dens_file=None, pop_dens_file=None, top_cities_file=None, osm=False, resolution='i', width=50., thick=10., color='darkred', linestyle='solid', font_size=30.0, dot_size=30.0, label_offset=0.00075, sep=1.0, intervals=100, scale_sizes='', natural_scales=''): self.sep = sep self.intervals = intervals if len(scale_sizes) == 0: self.scale_sizes = [] else: self.scale_sizes = [float(token) for token in scale_sizes.split(',')] if len(natural_scales) == 0: self.natural_scales = [] else: self.natural_scales = [float(token) for token in natural_scales.split(',')] extra_height = (len(self.scale_sizes) + 1) * 0.035 DrawMap.__init__(self, borders_file, output_file, region, photo_dens_file, pop_dens_file, top_cities_file, osm, resolution, width, thick, color, linestyle, font_size, dot_size, label_offset, extra_height) self.width_c = self.cc[3] - self.cc[1] self.height_c = self.cc[2] - self.cc[0] self.width_km = geo.distance({'lat': self.cc[0], 'lng': self.cc[1]}, {'lat': self.cc[0], 'lng': self.cc[3]}) self.height_km = geo.distance({'lat': self.cc[0], 'lng': self.cc[1]}, {'lat': self.cc[2], 'lng': self.cc[1]}) print('width: %s; height: %s' % (self.width_c, self.height_c)) print('width: %skm; height: %skm' % (self.width_km, self.height_km))
def tokms(segments): min_x = float('inf') min_y = float('inf') for seg in segments: if seg['x1'] < min_x: min_x = seg['x1'] if seg['x2'] < min_x: min_x = seg['x2'] if seg['y1'] < min_y: min_y = seg['y1'] if seg['x2'] < min_x: min_y = seg['y2'] normsegs = [] for seg in segments: nseg = { 'x1': geo.distance({ 'lat': min_x, 'lng': min_y }, { 'lat': seg['x1'], 'lng': min_y }), 'y1': geo.distance({ 'lat': min_x, 'lng': min_y }, { 'lat': min_x, 'lng': seg['y1'] }), 'x2': geo.distance({ 'lat': min_x, 'lng': min_y }, { 'lat': seg['x2'], 'lng': min_y }), 'y2': geo.distance({ 'lat': min_x, 'lng': min_y }, { 'lat': min_x, 'lng': seg['y2'] }), 'id1': seg['id1'], 'id2': seg['id2'] } normsegs.append(nseg) return normsegs
def process_link(self, link, time): v1 = link[0] v2 = link[1] if v1 > v2: v1 = link[1] v2 = link[0] l = (v1, v2) if l in self.ll: self.ll[l] += 1 else: self.ll[l] = 1 self.locmap[v1]['neighbors'] += 1 self.locmap[v2]['neighbors'] += 1 self.locmap[v1]['degree'] += 1 self.locmap[v2]['degree'] += 1 loc1 = self.locmap[v1] loc2 = self.locmap[v2] dist = geo.distance(loc1, loc2) self.locmap[v1]['dist'] += dist self.locmap[v2]['dist'] += dist t = float(time) / 60. t /= 60. t /= 24. self.locmap[v1]['time'] += t self.locmap[v2]['time'] += t self.update_angle(v1, v2) self.update_angle(v2, v1)
def post_process(self): for loc in self.locmap: degree = self.locmap[loc]['degree'] if degree > 0: self.locmap[loc]['dist'] /= degree self.locmap[loc]['time'] /= degree self.locmap[loc]['angle'] /= degree self.locmap[loc]['users'] = len(self.locmap[loc]['users']) for link in self.ll: v1 = link[0] v2 = link[1] w = float(self.ll[link]) loc1 = self.locmap[v1] loc2 = self.locmap[v2] dist = geo.distance(loc1, loc2) self.update_entropy(v1, w) self.update_entropy(v2, w) self.update_dist_var(v1, dist, w) self.update_dist_var(v2, dist, w) self.update_angle_var(v1, v2, w) self.update_angle_var(v2, v1, w) self.update_angle_entropy(v1, v2, w) self.update_angle_entropy(v2, v1, w) for loc in self.locmap: degree = self.locmap[loc]['degree'] if degree > 0: self.locmap[loc]['dist_var'] /= degree self.locmap[loc]['angle_var'] /= degree self.compute_angle_entropy(loc)
def filter(self, csv_in, csv_out, max_dist): print('filtering graph for maximum distance: %s' % max_dist) g = graph.read_graph(csv_in) for edge in g: loc1 = self.locmap.coords[edge[0]] loc2 = self.locmap.coords[edge[1]] dist = geo.distance(loc1, loc2) if dist > max_dist: g[edge] = 0. graph.write_graph(g, csv_out)
def tokms(segments): min_x = float('inf') min_y = float('inf') for seg in segments: if seg['x1'] < min_x: min_x = seg['x1'] if seg['x2'] < min_x: min_x = seg['x2'] if seg['y1'] < min_y: min_y = seg['y1'] if seg['x2'] < min_x: min_y = seg['y2'] normsegs = [] for seg in segments: nseg = {'x1': geo.distance({'lat': min_x, 'lng': min_y}, {'lat': seg['x1'], 'lng': min_y}), 'y1': geo.distance({'lat': min_x, 'lng': min_y}, {'lat': min_x, 'lng': seg['y1']}), 'x2': geo.distance({'lat': min_x, 'lng': min_y}, {'lat': seg['x2'], 'lng': min_y}), 'y2': geo.distance({'lat': min_x, 'lng': min_y}, {'lat': min_x, 'lng': seg['y2']}), 'id1': seg['id1'], 'id2': seg['id2']} normsegs.append(nseg) return normsegs
def write_dists(g, db, file_path): f_dist = open(file_path, 'w') f_dist.write('distance\n') locmap = LocMap(db) for edge in g: loc1 = locmap.coords[edge[0]] loc2 = locmap.coords[edge[1]] dist = geo.distance(loc1, loc2) if dist > 0: for i in range(int(g[edge])): f_dist.write('%s\n' % (dist,)) else: print('zero distance found between %s and %s' % (loc1, loc2)) f_dist.close()
def compute(self, infile, outfile): g = graph.read_graph(infile) f = open(outfile, 'w') total_distance = 0. count = 0. for edge in g: loc1 = self.locmap.coords[edge[0]] loc2 = self.locmap.coords[edge[1]] weight = g[edge] dist = geo.distance(loc1, loc2) for i in range(weight): f.write('%s\n' % dist) total_distance += dist count += 1. f.close() mean_distance = total_distance / count print('mean distance: %s' % mean_distance)
def compute(self, infile, outfile): g = graph.read_graph(infile) f = open(outfile, "w") total_distance = 0.0 count = 0.0 for edge in g: loc1 = self.locmap.coords[edge[0]] loc2 = self.locmap.coords[edge[1]] weight = g[edge] dist = geo.distance(loc1, loc2) for i in range(weight): f.write("%s\n" % dist) total_distance += dist count += 1.0 f.close() mean_distance = total_distance / count print("mean distance: %s" % mean_distance)
def process_user(self, user_id): self.db.cur.execute("SELECT location, ts, id FROM media WHERE user=%s ORDER BY ts" % (user_id, )) data = self.db.cur.fetchall() locations = [x[0] for x in data] # unique locations ulocations = set(locations) # only compute metrics for users who have been to at least 2 distinct locations if len(ulocations) >= 2: # photos photos = len(data) photo_ids = [x[2] for x in data] # time stuff times = [x[1] for x in data] times.sort() first_ts = min(times) last_ts = max(times) time_deltas = [times[i] - times[i - 1] for i in range(1, len(times))] mean_time_interval = sum(t for t in time_deltas) / len(time_deltas) # location stuff loc_count = len(ulocations) freqs = {} for loc in ulocations: freqs[loc] = 0 for loc in locations: freqs[loc] += 1 herfindahl = 0.0 for loc in freqs: s = freqs[loc] / len(locations) herfindahl += s * s links = itertools.combinations(ulocations, 2) distances = [geo.distance(self.locs[link[0]], self.locs[link[1]]) for link in links] mean_distance = sum(distances) / len(distances) dists_str = ' '.join([str(d) for d in distances]) total_dist = 0. count = 0 for i in range(1, len(locations)): loc0 = locations[i - 1] loc1 = locations[i] if loc0 != loc1: dist = geo.distance(self.locs[loc0], self.locs[loc1]) total_dist += dist count += 1 mean_weighted_dist = total_dist / count # comments self.db.cur.execute("SELECT count(id) FROM comment WHERE user=%s" % (user_id,)) data = self.db.cur.fetchall() comments_given = data[0][0] comments_received = self.x_received('comment', photo_ids) # likes self.db.cur.execute("SELECT count(id) FROM likes WHERE user=%s" % (user_id,)) data = self.db.cur.fetchall() likes_given = data[0][0] likes_received = self.x_received('comment', photo_ids) # print('photos: %s; first_ts: %s; last_ts: %s; mean_time_interval: %s; loc_count: %s; herfindahl: %s; mean_distance: %s; mean_weighted_distance: %s; comments_given: %s; comments_received: %s; likes_given: %s; likes_received: %s' # % (photos, first_ts, last_ts, mean_time_interval, loc_count, herfindahl, mean_distance, mean_weighted_dist, comments_given, comments_received, likes_given, likes_received)) self.db.cur.execute('UPDATE user SET active=1,dists_str="%s",photos=%s,first_ts=%s,last_ts=%s,mean_time_interval=%s,locations=%s,herfindahl=%s,mean_dist=%s,mean_weighted_dist=%s,comments_given=%s,comments_received=%s,likes_given=%s,likes_received=%s WHERE id=%s' % (dists_str, photos, first_ts, last_ts, mean_time_interval, loc_count, herfindahl, mean_distance, mean_weighted_dist, comments_given, comments_received, likes_given, likes_received, user_id,)) self.db.conn.commit()
def process_user(self, user_id): self.db.cur.execute( "SELECT location, ts, id FROM media WHERE user=%s ORDER BY ts" % (user_id, )) data = self.db.cur.fetchall() locations = [x[0] for x in data] # unique locations ulocations = set(locations) # only compute metrics for users who have been to at least 2 distinct locations if len(ulocations) >= 2: # photos photos = len(data) photo_ids = [x[2] for x in data] # time stuff times = [x[1] for x in data] times.sort() first_ts = min(times) last_ts = max(times) time_deltas = [ times[i] - times[i - 1] for i in range(1, len(times)) ] mean_time_interval = sum(t for t in time_deltas) / len(time_deltas) # location stuff loc_count = len(ulocations) freqs = {} for loc in ulocations: freqs[loc] = 0 for loc in locations: freqs[loc] += 1 herfindahl = 0.0 for loc in freqs: s = freqs[loc] / len(locations) herfindahl += s * s links = itertools.combinations(ulocations, 2) distances = [ geo.distance(self.locs[link[0]], self.locs[link[1]]) for link in links ] mean_distance = sum(distances) / len(distances) dists_str = ' '.join([str(d) for d in distances]) total_dist = 0. count = 0 for i in range(1, len(locations)): loc0 = locations[i - 1] loc1 = locations[i] if loc0 != loc1: dist = geo.distance(self.locs[loc0], self.locs[loc1]) total_dist += dist count += 1 mean_weighted_dist = total_dist / count # comments self.db.cur.execute("SELECT count(id) FROM comment WHERE user=%s" % (user_id, )) data = self.db.cur.fetchall() comments_given = data[0][0] comments_received = self.x_received('comment', photo_ids) # likes self.db.cur.execute("SELECT count(id) FROM likes WHERE user=%s" % (user_id, )) data = self.db.cur.fetchall() likes_given = data[0][0] likes_received = self.x_received('comment', photo_ids) # print('photos: %s; first_ts: %s; last_ts: %s; mean_time_interval: %s; loc_count: %s; herfindahl: %s; mean_distance: %s; mean_weighted_distance: %s; comments_given: %s; comments_received: %s; likes_given: %s; likes_received: %s' # % (photos, first_ts, last_ts, mean_time_interval, loc_count, herfindahl, mean_distance, mean_weighted_dist, comments_given, comments_received, likes_given, likes_received)) self.db.cur.execute( 'UPDATE user SET active=1,dists_str="%s",photos=%s,first_ts=%s,last_ts=%s,mean_time_interval=%s,locations=%s,herfindahl=%s,mean_dist=%s,mean_weighted_dist=%s,comments_given=%s,comments_received=%s,likes_given=%s,likes_received=%s WHERE id=%s' % ( dists_str, photos, first_ts, last_ts, mean_time_interval, loc_count, herfindahl, mean_distance, mean_weighted_dist, comments_given, comments_received, likes_given, likes_received, user_id, )) self.db.conn.commit()
def __init__(self, borders_file, output_file, region, photo_dens_file=None, pop_dens_file=None, top_cities_file=None, osm=False, resolution='i', width=50., thick=10., color='darkred', linestyle='solid', font_size=30.0, dot_size=30.0, label_offset=0.00075, sep=1.0, intervals=100, scale_sizes='', natural_scales=''): self.sep = sep self.intervals = intervals if len(scale_sizes) == 0: self.scale_sizes = [] else: self.scale_sizes = [ float(token) for token in scale_sizes.split(',') ] if len(natural_scales) == 0: self.natural_scales = [] else: self.natural_scales = [ float(token) for token in natural_scales.split(',') ] extra_height = (len(self.scale_sizes) + 1) * 0.035 DrawMap.__init__(self, borders_file, output_file, region, photo_dens_file, pop_dens_file, top_cities_file, osm, resolution, width, thick, color, linestyle, font_size, dot_size, label_offset, extra_height) self.width_c = self.cc[3] - self.cc[1] self.height_c = self.cc[2] - self.cc[0] self.width_km = geo.distance({ 'lat': self.cc[0], 'lng': self.cc[1] }, { 'lat': self.cc[0], 'lng': self.cc[3] }) self.height_km = geo.distance({ 'lat': self.cc[0], 'lng': self.cc[1] }, { 'lat': self.cc[2], 'lng': self.cc[1] }) print('width: %s; height: %s' % (self.width_c, self.height_c)) print('width: %skm; height: %skm' % (self.width_km, self.height_km))