def run(): coordinates = [InstagramConfig.photo_min_lat, InstagramConfig.photo_min_lng, InstagramConfig.photo_max_lat, InstagramConfig.photo_max_lng ] huge_region = Region(coordinates) alarm_region_size = 25 regions = huge_region.divideRegions(alarm_region_size,alarm_region_size) #filtered_regions = huge_region.filterRegions( regions, test=True) #regions = filtered_regions test_cnt = 0 print 'all regions',len(regions) pi = PhotoInterface('tmp_citybeat', 'photos'); for region in regions: #delete the last 7*24*3600 to set it back to Dec 1st start_of_time = 1364571565 - 7*24*3600 #+ 7*24*3600 end_of_time = 1364571565 #+ 7*24*3600 res = pi.rangeQuery(region, [str(start_of_time), str(end_of_time)]); for r in res: try: print r['location']['latitude'],',',r['location']['longitude'] except: continue
def run(): coordinates = [ InstagramConfig.photo_min_lat, InstagramConfig.photo_min_lng, InstagramConfig.photo_max_lat, InstagramConfig.photo_max_lng ] huge_region = Region(coordinates) alarm_region_size = 25 regions = huge_region.divideRegions(alarm_region_size, alarm_region_size) filtered_regions = huge_region.filterRegions(regions) # get the same regions as in db. Here it's 10 by 10 regions = filtered_regions test_cnt = 0 print 'all regions', len(regions) for region in regions: #delete the last 7*24*3600 to set it back to Dec 1st start_of_time = 1354320000 + 7 * 24 * 3600 end_of_time = 1354320000 + 7 * 24 * 3600 + 7 * 24 * 3600 alarm = Alarm(region, start_of_time, end_of_time, 'next_week_prediction_25by25', 'next_week_candidate_event_25by25') cnt = 0 region.display() xia_cnt = 0 while alarm.nextTimeStep(300): cnt += 1 alarm.fireAlarm() if cnt % 100 == 0: print 'cur = ', time.gmtime(float(alarm.cur_time)) print 'alarm = ', cnt print '\n\n'
def goThroughCandidateDB(self): """Go through candidate event db and classify whatever is left""" ei = EventInterface(self.candidate_db, self.candidate_collection) ei_classified = EventInterface(self.classified_event_db, self.classified_event_collection) cnt = 0 # consider past 2 hours for merge low_bound = str(int(getCurrentStampUTC()) - 60 * 60 * 2) condition = {'created_time':{ '$gte': low_bound}} for e in ei.getAllDocuments(condition=condition): logging.warning("Classifying %d-th candidate event..." % cnt) e = Event(e) cnt += 1 region = Region(e.getRegion()) corpus = self.all_corpus[region.getKey()] ef = BaseFeatureProduction(e, corpus) prob = self.clf.classify(ef.extractFeatures()) if ei_classified.getEventByID(e.getID()) is not None: if prob > 0.5: print 'already in front end collection, merge it' ei_classified.addEvent(e) else: print 'after merge it becomes none event, delete it' ei_classified.deleteEventByID(e.getID()) else: if prob > 0.5: print 'new events find in collection but not in front end , add it' ei_classified.addEvent(e)
def run(): coordinates = [ InstagramConfig.photo_min_lat, InstagramConfig.photo_min_lng, InstagramConfig.photo_max_lat, InstagramConfig.photo_max_lng ] huge_region = Region(coordinates) alarm_region_size = 25 regions = huge_region.divideRegions(alarm_region_size, alarm_region_size) filtered_regions = huge_region.filterRegions(region_list=regions, test=True, n=alarm_region_size, m=alarm_region_size) cur_utc_time = getCurrentStampUTC() regions = filtered_regions print 'all regions', len(regions) for region in regions: start_of_time = cur_utc_time end_of_time = cur_utc_time alarm = Alarm(region, start_of_time, end_of_time, 'online_prediction', 'online_candidate') region.display() alarm.fireAlarm()
def run(): coordinates = [ InstagramConfig.photo_min_lat, InstagramConfig.photo_min_lng, InstagramConfig.photo_max_lat, InstagramConfig.photo_max_lng, ] huge_region = Region(coordinates) alarm_region_size = 25 regions = huge_region.divideRegions(alarm_region_size, alarm_region_size) # filtered_regions = huge_region.filterRegions( regions, test=True) # regions = filtered_regions test_cnt = 0 print "all regions", len(regions) pi = PhotoInterface("tmp_citybeat", "photos") for region in regions: # delete the last 7*24*3600 to set it back to Dec 1st start_of_time = 1364571565 - 7 * 24 * 3600 # + 7*24*3600 end_of_time = 1364571565 # + 7*24*3600 res = pi.rangeQuery(region, [str(start_of_time), str(end_of_time)]) for r in res: try: print r["location"]["latitude"], ",", r["location"]["longitude"] except: continue
def run(): coordinates = [ InstagramConfig.photo_min_lat, InstagramConfig.photo_min_lng, InstagramConfig.photo_max_lat, InstagramConfig.photo_max_lng, ] huge_region = Region(coordinates) alarm_region_size = 25 regions = huge_region.divideRegions(alarm_region_size, alarm_region_size) filtered_regions = huge_region.filterRegions(regions) # get the same regions as in db. Here it's 10 by 10 regions = filtered_regions test_cnt = 0 print "all regions", len(regions) for region in regions: # delete the last 7*24*3600 to set it back to Dec 1st start_of_time = 1354320000 + 7 * 24 * 3600 end_of_time = 1354320000 + 7 * 24 * 3600 + 7 * 24 * 3600 alarm = Alarm( region, start_of_time, end_of_time, "next_week_prediction_25by25", "next_week_candidate_event_25by25" ) cnt = 0 region.display() xia_cnt = 0 while alarm.nextTimeStep(300): cnt += 1 alarm.fireAlarm() if cnt % 100 == 0: print "cur = ", time.gmtime(float(alarm.cur_time)) print "alarm = ", cnt print "\n\n"
def run(): coordinates = [ InstagramConfig.photo_min_lat, InstagramConfig.photo_min_lng, InstagramConfig.photo_max_lat, InstagramConfig.photo_max_lng ] huge_region = Region(coordinates) alarm_region_size = 25 regions = huge_region.divideRegions(alarm_region_size, alarm_region_size) filtered_regions = huge_region.filterRegions(regions) regions = filtered_regions test_cnt = 0 print 'all regions', len(regions) for region in regions: #delete the last 7*24*3600 to set it back to Dec 1st start_of_time = 1354320000 #+ 7*24*3600 end_of_time = 1354320000 + 7 * 24 * 3600 #+ 7*24*3600 series = InstagramTimeSeries(region, start_of_time, end_of_time) series = series.buildTimeSeries() region.display() for t in series.index: print t, ',', series[t] print '\n'
def run(data_source): coordinates = [InstagramConfig.photo_min_lat, InstagramConfig.photo_min_lng, InstagramConfig.photo_max_lat, InstagramConfig.photo_max_lng ] alarm_region_size = 25 nyc_region = Region(coordinates) regions = nyc_region.divideRegions(alarm_region_size, alarm_region_size) if data_source == 'twitter': regions = nyc_region.filterRegions(region_list=regions, test=True, n=alarm_region_size, m=alarm_region_size, element_type='tweets') elif data_source == 'instagram': regions = nyc_region.filterRegions(region_list=regions, test=True, n=alarm_region_size, m=alarm_region_size, element_type='photos') cur_utc_time = getCurrentStampUTC() for region in regions: start_of_time = cur_utc_time end_of_time = cur_utc_time if data_source == 'twitter': alarm = Alarm(region, start_of_time, end_of_time, TwitterConfig.prediction_collection, TwitterConfig.event_collection, data_source) elif data_source == 'instagram': alarm = Alarm(region, start_of_time, end_of_time, InstagramConfig.prediction_collection, InstagramConfig.event_collection, data_source) #for test only #alarm = Alarm(region, start_of_time, end_of_time, InstagramConfig.prediction_collection, "tmp_remove", data_source) region.display() alarm.fireAlarm()
def run(): coordinates = [InstagramConfig.photo_min_lat, InstagramConfig.photo_min_lng, InstagramConfig.photo_max_lat, InstagramConfig.photo_max_lng ] huge_region = Region(coordinates) alarm_region_size = 25 regions = huge_region.divideRegions(alarm_region_size,alarm_region_size) filtered_regions = huge_region.filterRegions( regions) regions = filtered_regions test_cnt = 0 print 'all regions',len(regions) for region in regions: #delete the last 7*24*3600 to set it back to Dec 1st start_of_time = 1354320000 #+ 7*24*3600 end_of_time = 1354320000 + 7*24*3600 #+ 7*24*3600 series = InstagramTimeSeries( region, start_of_time, end_of_time) series = series.buildTimeSeries() region.display() for t in series.index: print t,',',series[t] print '\n'
def run(): coordinates = [ InstagramConfig.photo_min_lat, InstagramConfig.photo_min_lng, InstagramConfig.photo_max_lat, InstagramConfig.photo_max_lng ] huge_region = Region(coordinates) alarm_region_size = 25 regions = huge_region.divideRegions(alarm_region_size, alarm_region_size) filtered_regions = huge_region.filterRegions(regions, m=25, n=25) # get the same regions as in db. Here it's 10 by 10 regions = filtered_regions print 'all regions', len(regions) region_cnt = 0 cnt = 0 for region in regions: print 'region_cnt ', region_cnt region_cnt += 1 #delete the last 7*24*3600 to set it back to Dec 1st start_of_time = 1354320000 end_of_time = 1354320000 + 7 * 24 * 3600 alarm = Alarm(region, start_of_time, end_of_time, 'baseline_candidate_events') region.display() while alarm.nextTimeStep(300): alarm.fireAlarm() cnt += 1 if cnt % 100 == 0: print 'cur = ', time.gmtime(float(alarm.cur_time)) print '\n\n'
def run(): coordinates = [InstagramConfig.photo_min_lat, InstagramConfig.photo_min_lng, InstagramConfig.photo_max_lat, InstagramConfig.photo_max_lng ] huge_region = Region(coordinates) alarm_region_size = 25 regions = huge_region.divideRegions(alarm_region_size,alarm_region_size) filtered_regions = huge_region.filterRegions(regions, m = 25, n = 25) # get the same regions as in db. Here it's 10 by 10 regions = filtered_regions print 'all regions',len(regions) region_cnt = 0 cnt = 0 for region in regions: print 'region_cnt ', region_cnt region_cnt+=1 #delete the last 7*24*3600 to set it back to Dec 1st start_of_time = 1354320000 end_of_time = 1354320000 + 7*24*3600 alarm = Alarm(region, start_of_time, end_of_time ,'baseline_candidate_events' ) region.display() while alarm.nextTimeStep(300): alarm.fireAlarm() cnt+=1 if cnt%100==0: print 'cur = ', time.gmtime(float(alarm.cur_time) ) print '\n\n'
def run(): coordinates = [ InstagramConfig.photo_min_lat, InstagramConfig.photo_min_lng, InstagramConfig.photo_max_lat, InstagramConfig.photo_max_lng ] huge_region = Region(coordinates) regions = huge_region.divideRegions(25, 25) filtered_regions = huge_region.filterRegions(regions) regions = filtered_regions for r in regions: r.display() cur_utc_timestamp = getCurrentStampUTC() #experiment start time - Dec 1 00:00 clock = 1354320000 + 7 * 24 * 3600 end_of_time = 1354320000 + 7 * 24 * 3600 + 7 * 24 * 3600 days_passed = 0 _results = {} _saved = {} redis_conn = Redis("tall4") redis_queue = Queue(connection=redis_conn) while clock < end_of_time: print 'working on day ', days_passed days_passed += 1 # use 14 days of data as training fourteen_days_ago = clock - 14 * 24 * 3600 for i in range(len(regions)): #for i in range(1): test_region = regions[i] try: gp = GaussianProcessJob(test_region, str(fourteen_days_ago), str(clock), redis_queue) res, pred_time = gp.submit() except Exception as e: print 'Initialization of gp error. continue, error message %s' % ( e) continue _results[gp.getID()] = (test_region, res, pred_time) _saved[gp.getID()] = False save_to_mongo(_results, _saved, cur_utc_timestamp) clock += 3600 * 24 done = False while not done: done = save_to_mongo(_results, _saved, cur_utc_timestamp) time.sleep(10) print 'finish work'
def run(): coordinates = [InstagramConfig.photo_min_lat, InstagramConfig.photo_min_lng, InstagramConfig.photo_max_lat, InstagramConfig.photo_max_lng ] huge_region = Region(coordinates) regions = huge_region.divideRegions(25,25) filtered_regions = huge_region.filterRegions( regions ) regions = filtered_regions for r in regions: r.display() cur_utc_timestamp = getCurrentStampUTC() #experiment start time - Dec 1 00:00 clock = 1354320000 + 7*24*3600 end_of_time = 1354320000 + 7*24*3600 + 7*24*3600 days_passed = 0 _results = {} _saved = {} redis_conn = Redis("tall4") redis_queue = Queue(connection = redis_conn) while clock<end_of_time: print 'working on day ',days_passed days_passed+=1 # use 14 days of data as training fourteen_days_ago = clock - 14*24*3600 for i in range(len(regions)): #for i in range(1): test_region = regions[i] try: gp = GaussianProcessJob( test_region, str(fourteen_days_ago), str(clock) , redis_queue) res, pred_time = gp.submit() except Exception as e: print 'Initialization of gp error. continue, error message %s'%(e) continue _results[gp.getID()] = (test_region, res, pred_time) _saved[ gp.getID() ] = False save_to_mongo(_results, _saved, cur_utc_timestamp) clock+=3600*24 done = False while not done: done = save_to_mongo(_results, _saved, cur_utc_timestamp) time.sleep(10) print 'finish work'
def run(data_source): coordinates = [ InstagramConfig.photo_min_lat, InstagramConfig.photo_min_lng, InstagramConfig.photo_max_lat, InstagramConfig.photo_max_lng, ] nyc_region = Region(coordinates) regions = nyc_region.divideRegions(25, 25) if data_source == "twitter": regions = nyc_region.filterRegions(regions, test=True, n=25, m=25, element_type="tweets") elif data_source == "instagram": regions = nyc_region.filterRegions(regions, test=True, n=25, m=25, element_type="photos") for r in regions: r.display() cur_utc_timestamp = getCurrentStampUTC() _results = {} _saved = {} redis_conn = Redis("tall4") redis_queue = Queue(connection=redis_conn) fourteen_days_ago = cur_utc_timestamp - 24 * 14 * 3600 for i in range(len(regions)): logging.warn("Working on region %d" % i) test_region = regions[i] # try: gp = GaussianProcessJob(test_region, str(fourteen_days_ago), str(cur_utc_timestamp), redis_queue) res, pred_time = gp.submit() # except Exception as e: # logging.warn("Initialization of gp error. continue, error message %s" % e) # continue _results[gp.getID()] = (test_region, res, pred_time) _saved[gp.getID()] = False save_to_mongo(_results, _saved, cur_utc_timestamp, data_source) done = False while not done: done = save_to_mongo(_results, _saved, cur_utc_timestamp, data_source) time.sleep(10) logging.warn("Waiting for completing...") logging.warn("Work done.")
def run(): coordinates = [ InstagramConfig.photo_min_lat, InstagramConfig.photo_min_lng, InstagramConfig.photo_max_lat, InstagramConfig.photo_max_lng ] huge_region = Region(coordinates) regions = huge_region.divideRegions(25, 25) filtered_regions = huge_region.filterRegions(regions) regions = filtered_regions for r in regions: r.display() cur_utc_timestamp = getCurrentStampUTC() _results = {} _saved = {} redis_conn = Redis("tall4") redis_queue = Queue(connection=redis_conn) fourteen_days_ago = cur_utc_timestamp - 24 * 14 * 3600 for i in range(len(regions)): test_region = regions[i] try: gp = GaussianProcessJob(test_region, str(fourteen_days_ago), str(cur_utc_timestamp), redis_queue) res, pred_time = gp.submit() except Exception as e: print 'Initialization of gp error. continue, error message %s' % ( e) continue _results[gp.getID()] = (test_region, res, pred_time) _saved[gp.getID()] = False save_to_mongo(_results, _saved, cur_utc_timestamp) done = False while not done: done = save_to_mongo(_results, _saved, cur_utc_timestamp) time.sleep(10) print 'finish work'
def run(): coordinates = [InstagramConfig.photo_min_lat, InstagramConfig.photo_min_lng, InstagramConfig.photo_max_lat, InstagramConfig.photo_max_lng ] huge_region = Region(coordinates) regions = huge_region.divideRegions(25,25) filtered_regions = huge_region.filterRegions( regions ) regions = filtered_regions for r in regions: r.display() cur_utc_timestamp = getCurrentStampUTC() _results = {} _saved = {} redis_conn = Redis("tall4") redis_queue = Queue(connection = redis_conn) fourteen_days_ago = cur_utc_timestamp - 24*14*3600 for i in range(len(regions)): test_region = regions[i] try: gp = GaussianProcessJob( test_region, str(fourteen_days_ago), str(cur_utc_timestamp) , redis_queue) res, pred_time = gp.submit() except Exception as e: print 'Initialization of gp error. continue, error message %s'%(e) continue _results[gp.getID()] = (test_region, res, pred_time) _saved[ gp.getID() ] = False save_to_mongo(_results, _saved, cur_utc_timestamp) done = False while not done: done = save_to_mongo(_results, _saved, cur_utc_timestamp) time.sleep(10) print 'finish work'
def run(): coordinates = [InstagramConfig.photo_min_lat, InstagramConfig.photo_min_lng, InstagramConfig.photo_max_lat, InstagramConfig.photo_max_lng ] huge_region = Region(coordinates) alarm_region_size = 25 regions = huge_region.divideRegions(alarm_region_size,alarm_region_size) filtered_regions = huge_region.filterRegions( region_list = regions, test=True, n=alarm_region_size, m = alarm_region_size) cur_utc_time = getCurrentStampUTC() regions = filtered_regions print 'all regions',len(regions) for region in regions: start_of_time = cur_utc_time end_of_time = cur_utc_time alarm = Alarm(region, start_of_time, end_of_time, 'online_prediction', 'online_candidate') region.display() alarm.fireAlarm()