Пример #1
0
def run():
    coordinates = [InstagramConfig.photo_min_lat,
            InstagramConfig.photo_min_lng,
            InstagramConfig.photo_max_lat,
            InstagramConfig.photo_max_lng
                 ]
    huge_region = Region(coordinates)
    
    alarm_region_size = 25

    regions = huge_region.divideRegions(alarm_region_size,alarm_region_size)
    #filtered_regions = huge_region.filterRegions( regions, test=True)
    
    #regions = filtered_regions
    test_cnt = 0
    print 'all regions',len(regions)
    pi = PhotoInterface('tmp_citybeat', 'photos');
    for region in regions:
        #delete the last 7*24*3600 to set it back to Dec 1st
        start_of_time =  1364571565 - 7*24*3600 #+ 7*24*3600
        end_of_time = 1364571565  #+ 7*24*3600
        res = pi.rangeQuery(region, [str(start_of_time), str(end_of_time)]);
        for r in res:
            try:
                print r['location']['latitude'],',',r['location']['longitude']
            except:
                continue
Пример #2
0
def run():
    coordinates = [
        InstagramConfig.photo_min_lat, InstagramConfig.photo_min_lng,
        InstagramConfig.photo_max_lat, InstagramConfig.photo_max_lng
    ]
    huge_region = Region(coordinates)

    alarm_region_size = 25

    regions = huge_region.divideRegions(alarm_region_size, alarm_region_size)
    filtered_regions = huge_region.filterRegions(regions)
    # get the same regions as in db. Here it's 10 by 10

    regions = filtered_regions
    test_cnt = 0
    print 'all regions', len(regions)
    for region in regions:
        #delete the last 7*24*3600 to set it back to Dec 1st
        start_of_time = 1354320000 + 7 * 24 * 3600
        end_of_time = 1354320000 + 7 * 24 * 3600 + 7 * 24 * 3600
        alarm = Alarm(region, start_of_time, end_of_time,
                      'next_week_prediction_25by25',
                      'next_week_candidate_event_25by25')
        cnt = 0
        region.display()
        xia_cnt = 0
        while alarm.nextTimeStep(300):
            cnt += 1
            alarm.fireAlarm()
            if cnt % 100 == 0:
                print 'cur = ', time.gmtime(float(alarm.cur_time))
                print 'alarm = ', cnt
        print '\n\n'
Пример #3
0
    def goThroughCandidateDB(self):
        """Go through candidate event db and classify whatever is left"""
        ei = EventInterface(self.candidate_db, self.candidate_collection)
        ei_classified = EventInterface(self.classified_event_db, self.classified_event_collection)
        cnt = 0
        # consider past 2 hours for merge
        low_bound = str(int(getCurrentStampUTC()) - 60 * 60 * 2)
        condition = {'created_time':{ '$gte':  low_bound}}
        for e in ei.getAllDocuments(condition=condition):
            logging.warning("Classifying %d-th candidate event..." % cnt)
            e = Event(e)
            cnt += 1
            region = Region(e.getRegion())
            corpus = self.all_corpus[region.getKey()]
            ef = BaseFeatureProduction(e, corpus)
            prob = self.clf.classify(ef.extractFeatures())

            if ei_classified.getEventByID(e.getID()) is not None:
                if prob > 0.5:
                    print 'already in front end collection, merge it'
                    ei_classified.addEvent(e)
                else:
                    print 'after merge it becomes none event, delete it'
                    ei_classified.deleteEventByID(e.getID())
            else:
                if prob > 0.5:
                    print 'new events find in collection but not in front end , add it'
                    ei_classified.addEvent(e)
Пример #4
0
def run():
    coordinates = [
        InstagramConfig.photo_min_lat, InstagramConfig.photo_min_lng,
        InstagramConfig.photo_max_lat, InstagramConfig.photo_max_lng
    ]
    huge_region = Region(coordinates)

    alarm_region_size = 25

    regions = huge_region.divideRegions(alarm_region_size, alarm_region_size)
    filtered_regions = huge_region.filterRegions(region_list=regions,
                                                 test=True,
                                                 n=alarm_region_size,
                                                 m=alarm_region_size)

    cur_utc_time = getCurrentStampUTC()

    regions = filtered_regions
    print 'all regions', len(regions)
    for region in regions:
        start_of_time = cur_utc_time
        end_of_time = cur_utc_time
        alarm = Alarm(region, start_of_time, end_of_time, 'online_prediction',
                      'online_candidate')
        region.display()
        alarm.fireAlarm()
Пример #5
0
def run():
    coordinates = [
        InstagramConfig.photo_min_lat,
        InstagramConfig.photo_min_lng,
        InstagramConfig.photo_max_lat,
        InstagramConfig.photo_max_lng,
    ]
    huge_region = Region(coordinates)

    alarm_region_size = 25

    regions = huge_region.divideRegions(alarm_region_size, alarm_region_size)
    # filtered_regions = huge_region.filterRegions( regions, test=True)

    # regions = filtered_regions
    test_cnt = 0
    print "all regions", len(regions)
    pi = PhotoInterface("tmp_citybeat", "photos")
    for region in regions:
        # delete the last 7*24*3600 to set it back to Dec 1st
        start_of_time = 1364571565 - 7 * 24 * 3600  # + 7*24*3600
        end_of_time = 1364571565  # + 7*24*3600
        res = pi.rangeQuery(region, [str(start_of_time), str(end_of_time)])
        for r in res:
            try:
                print r["location"]["latitude"], ",", r["location"]["longitude"]
            except:
                continue
Пример #6
0
def run():
    coordinates = [
        InstagramConfig.photo_min_lat,
        InstagramConfig.photo_min_lng,
        InstagramConfig.photo_max_lat,
        InstagramConfig.photo_max_lng,
    ]
    huge_region = Region(coordinates)

    alarm_region_size = 25

    regions = huge_region.divideRegions(alarm_region_size, alarm_region_size)
    filtered_regions = huge_region.filterRegions(regions)
    # get the same regions as in db. Here it's 10 by 10

    regions = filtered_regions
    test_cnt = 0
    print "all regions", len(regions)
    for region in regions:
        # delete the last 7*24*3600 to set it back to Dec 1st
        start_of_time = 1354320000 + 7 * 24 * 3600
        end_of_time = 1354320000 + 7 * 24 * 3600 + 7 * 24 * 3600
        alarm = Alarm(
            region, start_of_time, end_of_time, "next_week_prediction_25by25", "next_week_candidate_event_25by25"
        )
        cnt = 0
        region.display()
        xia_cnt = 0
        while alarm.nextTimeStep(300):
            cnt += 1
            alarm.fireAlarm()
            if cnt % 100 == 0:
                print "cur = ", time.gmtime(float(alarm.cur_time))
                print "alarm = ", cnt
        print "\n\n"
Пример #7
0
def run():
    coordinates = [
        InstagramConfig.photo_min_lat, InstagramConfig.photo_min_lng,
        InstagramConfig.photo_max_lat, InstagramConfig.photo_max_lng
    ]
    huge_region = Region(coordinates)

    alarm_region_size = 25

    regions = huge_region.divideRegions(alarm_region_size, alarm_region_size)
    filtered_regions = huge_region.filterRegions(regions)

    regions = filtered_regions
    test_cnt = 0
    print 'all regions', len(regions)
    for region in regions:
        #delete the last 7*24*3600 to set it back to Dec 1st
        start_of_time = 1354320000  #+ 7*24*3600
        end_of_time = 1354320000 + 7 * 24 * 3600  #+ 7*24*3600
        series = InstagramTimeSeries(region, start_of_time, end_of_time)
        series = series.buildTimeSeries()
        region.display()
        for t in series.index:
            print t, ',', series[t]
        print '\n'
Пример #8
0
def run(data_source):
    coordinates = [InstagramConfig.photo_min_lat,
                   InstagramConfig.photo_min_lng,
                   InstagramConfig.photo_max_lat,
                   InstagramConfig.photo_max_lng
    ]

    alarm_region_size = 25
    nyc_region = Region(coordinates)
    regions = nyc_region.divideRegions(alarm_region_size, alarm_region_size)

    if data_source == 'twitter':
        regions = nyc_region.filterRegions(region_list=regions, test=True, n=alarm_region_size, m=alarm_region_size,
                                           element_type='tweets')
    elif data_source == 'instagram':
        regions = nyc_region.filterRegions(region_list=regions, test=True, n=alarm_region_size, m=alarm_region_size,
                                           element_type='photos')

    cur_utc_time = getCurrentStampUTC()

    for region in regions:
        start_of_time = cur_utc_time
        end_of_time = cur_utc_time
        if data_source == 'twitter':
            alarm = Alarm(region, start_of_time, end_of_time, TwitterConfig.prediction_collection,
                          TwitterConfig.event_collection, data_source)
        elif data_source == 'instagram':
            alarm = Alarm(region, start_of_time, end_of_time, InstagramConfig.prediction_collection,
                          InstagramConfig.event_collection, data_source)
            #for test only
            #alarm = Alarm(region, start_of_time, end_of_time, InstagramConfig.prediction_collection, "tmp_remove", data_source)
        region.display()
        alarm.fireAlarm()
Пример #9
0
def run():
    coordinates = [InstagramConfig.photo_min_lat,
            InstagramConfig.photo_min_lng,
            InstagramConfig.photo_max_lat,
            InstagramConfig.photo_max_lng
                 ]
    huge_region = Region(coordinates)
    
    alarm_region_size = 25

    regions = huge_region.divideRegions(alarm_region_size,alarm_region_size)
    filtered_regions = huge_region.filterRegions( regions)
    
    regions = filtered_regions
    test_cnt = 0
    print 'all regions',len(regions)
    for region in regions:
        #delete the last 7*24*3600 to set it back to Dec 1st
        start_of_time =  1354320000 #+ 7*24*3600
        end_of_time = 1354320000 + 7*24*3600 #+ 7*24*3600
        series =  InstagramTimeSeries( region, start_of_time, end_of_time)
        series =  series.buildTimeSeries()
        region.display()
        for t in series.index:
            print t,',',series[t]
        print '\n'
Пример #10
0
def run():
    coordinates = [
        InstagramConfig.photo_min_lat, InstagramConfig.photo_min_lng,
        InstagramConfig.photo_max_lat, InstagramConfig.photo_max_lng
    ]
    huge_region = Region(coordinates)

    alarm_region_size = 25

    regions = huge_region.divideRegions(alarm_region_size, alarm_region_size)
    filtered_regions = huge_region.filterRegions(regions, m=25, n=25)
    # get the same regions as in db. Here it's 10 by 10

    regions = filtered_regions
    print 'all regions', len(regions)
    region_cnt = 0
    cnt = 0
    for region in regions:
        print 'region_cnt ', region_cnt
        region_cnt += 1
        #delete the last 7*24*3600 to set it back to Dec 1st
        start_of_time = 1354320000
        end_of_time = 1354320000 + 7 * 24 * 3600
        alarm = Alarm(region, start_of_time, end_of_time,
                      'baseline_candidate_events')
        region.display()

        while alarm.nextTimeStep(300):
            alarm.fireAlarm()
            cnt += 1
            if cnt % 100 == 0:
                print 'cur = ', time.gmtime(float(alarm.cur_time))
        print '\n\n'
Пример #11
0
def run():
    coordinates = [InstagramConfig.photo_min_lat,
            InstagramConfig.photo_min_lng,
            InstagramConfig.photo_max_lat,
            InstagramConfig.photo_max_lng
                 ]
    huge_region = Region(coordinates)
    
    alarm_region_size = 25

    regions = huge_region.divideRegions(alarm_region_size,alarm_region_size)
    filtered_regions = huge_region.filterRegions(regions, m = 25, n = 25)
    # get the same regions as in db. Here it's 10 by 10

    regions = filtered_regions
    print 'all regions',len(regions)
    region_cnt = 0
    cnt = 0
    for region in regions:
        print 'region_cnt ', region_cnt
        region_cnt+=1
        #delete the last 7*24*3600 to set it back to Dec 1st
        start_of_time =  1354320000 
        end_of_time = 1354320000 + 7*24*3600 
        alarm = Alarm(region, start_of_time, end_of_time ,'baseline_candidate_events' ) 
        region.display()
        
        while alarm.nextTimeStep(300):
            alarm.fireAlarm()
            cnt+=1
            if cnt%100==0:
                print 'cur = ', time.gmtime(float(alarm.cur_time) )
        print '\n\n' 
Пример #12
0
def run():
    coordinates = [
        InstagramConfig.photo_min_lat, InstagramConfig.photo_min_lng,
        InstagramConfig.photo_max_lat, InstagramConfig.photo_max_lng
    ]
    huge_region = Region(coordinates)

    regions = huge_region.divideRegions(25, 25)
    filtered_regions = huge_region.filterRegions(regions)
    regions = filtered_regions

    for r in regions:
        r.display()

    cur_utc_timestamp = getCurrentStampUTC()
    #experiment start time - Dec 1 00:00
    clock = 1354320000 + 7 * 24 * 3600
    end_of_time = 1354320000 + 7 * 24 * 3600 + 7 * 24 * 3600
    days_passed = 0
    _results = {}
    _saved = {}

    redis_conn = Redis("tall4")
    redis_queue = Queue(connection=redis_conn)

    while clock < end_of_time:
        print 'working on day ', days_passed
        days_passed += 1
        # use 14 days of data as training
        fourteen_days_ago = clock - 14 * 24 * 3600

        for i in range(len(regions)):
            #for i in range(1):
            test_region = regions[i]
            try:
                gp = GaussianProcessJob(test_region, str(fourteen_days_ago),
                                        str(clock), redis_queue)
                res, pred_time = gp.submit()
            except Exception as e:
                print 'Initialization of gp error. continue, error message %s' % (
                    e)
                continue
            _results[gp.getID()] = (test_region, res, pred_time)
            _saved[gp.getID()] = False
        save_to_mongo(_results, _saved, cur_utc_timestamp)
        clock += 3600 * 24
    done = False
    while not done:
        done = save_to_mongo(_results, _saved, cur_utc_timestamp)
        time.sleep(10)

    print 'finish work'
Пример #13
0
def run():
    coordinates = [InstagramConfig.photo_min_lat,
            InstagramConfig.photo_min_lng,
            InstagramConfig.photo_max_lat,
            InstagramConfig.photo_max_lng
                 ]
    huge_region = Region(coordinates)
    
    regions = huge_region.divideRegions(25,25)
    filtered_regions = huge_region.filterRegions( regions )
    regions = filtered_regions

    for r in regions:
        r.display()

    cur_utc_timestamp = getCurrentStampUTC() 
    #experiment start time - Dec 1 00:00
    clock = 1354320000  + 7*24*3600
    end_of_time = 1354320000 + 7*24*3600  + 7*24*3600
    days_passed = 0
    _results =  {} 
    _saved = {}

    redis_conn = Redis("tall4")
    redis_queue = Queue(connection = redis_conn)

    while clock<end_of_time:
        print 'working on day ',days_passed
        days_passed+=1
        # use 14 days of data as training
        fourteen_days_ago = clock - 14*24*3600

        for i in range(len(regions)):
        #for i in range(1):
            test_region = regions[i]
            try:
                gp = GaussianProcessJob( test_region, str(fourteen_days_ago), str(clock) , redis_queue)
                res, pred_time = gp.submit()
            except Exception as e:
                print 'Initialization of gp error. continue, error message %s'%(e)
                continue
            _results[gp.getID()] = (test_region, res, pred_time)
            _saved[ gp.getID() ] = False
        save_to_mongo(_results, _saved, cur_utc_timestamp) 
        clock+=3600*24
    done = False
    while not done:
        done = save_to_mongo(_results, _saved, cur_utc_timestamp)
        time.sleep(10)

    print 'finish work' 
Пример #14
0
def run(data_source):
    coordinates = [
        InstagramConfig.photo_min_lat,
        InstagramConfig.photo_min_lng,
        InstagramConfig.photo_max_lat,
        InstagramConfig.photo_max_lng,
    ]
    nyc_region = Region(coordinates)
    regions = nyc_region.divideRegions(25, 25)
    if data_source == "twitter":
        regions = nyc_region.filterRegions(regions, test=True, n=25, m=25, element_type="tweets")
    elif data_source == "instagram":
        regions = nyc_region.filterRegions(regions, test=True, n=25, m=25, element_type="photos")

    for r in regions:
        r.display()

    cur_utc_timestamp = getCurrentStampUTC()

    _results = {}
    _saved = {}

    redis_conn = Redis("tall4")
    redis_queue = Queue(connection=redis_conn)
    fourteen_days_ago = cur_utc_timestamp - 24 * 14 * 3600

    for i in range(len(regions)):
        logging.warn("Working on region %d" % i)
        test_region = regions[i]
        # try:
        gp = GaussianProcessJob(test_region, str(fourteen_days_ago), str(cur_utc_timestamp), redis_queue)
        res, pred_time = gp.submit()
        # except Exception as e:
        #    logging.warn("Initialization of gp error. continue, error message %s" % e)
        #    continue
        _results[gp.getID()] = (test_region, res, pred_time)
        _saved[gp.getID()] = False

    save_to_mongo(_results, _saved, cur_utc_timestamp, data_source)
    done = False
    while not done:
        done = save_to_mongo(_results, _saved, cur_utc_timestamp, data_source)
        time.sleep(10)
        logging.warn("Waiting for completing...")

    logging.warn("Work done.")
Пример #15
0
def run():
    coordinates = [
        InstagramConfig.photo_min_lat, InstagramConfig.photo_min_lng,
        InstagramConfig.photo_max_lat, InstagramConfig.photo_max_lng
    ]
    huge_region = Region(coordinates)

    regions = huge_region.divideRegions(25, 25)
    filtered_regions = huge_region.filterRegions(regions)
    regions = filtered_regions

    for r in regions:
        r.display()

    cur_utc_timestamp = getCurrentStampUTC()

    _results = {}
    _saved = {}

    redis_conn = Redis("tall4")
    redis_queue = Queue(connection=redis_conn)
    fourteen_days_ago = cur_utc_timestamp - 24 * 14 * 3600

    for i in range(len(regions)):
        test_region = regions[i]
        try:
            gp = GaussianProcessJob(test_region, str(fourteen_days_ago),
                                    str(cur_utc_timestamp), redis_queue)
            res, pred_time = gp.submit()
        except Exception as e:
            print 'Initialization of gp error. continue, error message %s' % (
                e)
            continue
        _results[gp.getID()] = (test_region, res, pred_time)
        _saved[gp.getID()] = False

    save_to_mongo(_results, _saved, cur_utc_timestamp)
    done = False
    while not done:
        done = save_to_mongo(_results, _saved, cur_utc_timestamp)
        time.sleep(10)

    print 'finish work'
Пример #16
0
def run():
    coordinates = [InstagramConfig.photo_min_lat,
            InstagramConfig.photo_min_lng,
            InstagramConfig.photo_max_lat,
            InstagramConfig.photo_max_lng
                 ]
    huge_region = Region(coordinates)
    
    regions = huge_region.divideRegions(25,25)
    filtered_regions = huge_region.filterRegions( regions )
    regions = filtered_regions

    for r in regions:
        r.display()

    cur_utc_timestamp = getCurrentStampUTC() 
    
    _results =  {} 
    _saved = {}

    redis_conn = Redis("tall4")
    redis_queue = Queue(connection = redis_conn)
    fourteen_days_ago = cur_utc_timestamp - 24*14*3600

    for i in range(len(regions)):
        test_region = regions[i]
        try:
            gp = GaussianProcessJob( test_region, str(fourteen_days_ago), str(cur_utc_timestamp) , redis_queue)
            res, pred_time = gp.submit()
        except Exception as e:
            print 'Initialization of gp error. continue, error message %s'%(e)
            continue
        _results[gp.getID()] = (test_region, res, pred_time)
        _saved[ gp.getID() ] = False

    save_to_mongo(_results, _saved, cur_utc_timestamp) 
    done = False
    while not done:
        done = save_to_mongo(_results, _saved, cur_utc_timestamp)
        time.sleep(10)

    print 'finish work' 
Пример #17
0
def run():
    coordinates = [InstagramConfig.photo_min_lat,
            InstagramConfig.photo_min_lng,
            InstagramConfig.photo_max_lat,
            InstagramConfig.photo_max_lng
                 ]
    huge_region = Region(coordinates)
    
    alarm_region_size = 25

    regions = huge_region.divideRegions(alarm_region_size,alarm_region_size)
    filtered_regions = huge_region.filterRegions( region_list = regions, test=True, n=alarm_region_size, m = alarm_region_size)

    cur_utc_time = getCurrentStampUTC() 

    regions = filtered_regions
    print 'all regions',len(regions)
    for region in regions:
        start_of_time =  cur_utc_time
        end_of_time = cur_utc_time
        alarm = Alarm(region, start_of_time, end_of_time, 'online_prediction', 'online_candidate')
        region.display()
        alarm.fireAlarm()