def goThroughCandidateDB(self): """Go through candidate event db and classify whatever is left""" ei = EventInterface(self.candidate_db, self.candidate_collection) ei_classified = EventInterface(self.classified_event_db, self.classified_event_collection) cnt = 0 # consider past 2 hours for merge low_bound = str(int(getCurrentStampUTC()) - 60 * 60 * 2) condition = {'created_time':{ '$gte': low_bound}} for e in ei.getAllDocuments(condition=condition): logging.warning("Classifying %d-th candidate event..." % cnt) e = Event(e) cnt += 1 region = Region(e.getRegion()) corpus = self.all_corpus[region.getKey()] ef = BaseFeatureProduction(e, corpus) prob = self.clf.classify(ef.extractFeatures()) if ei_classified.getEventByID(e.getID()) is not None: if prob > 0.5: print 'already in front end collection, merge it' ei_classified.addEvent(e) else: print 'after merge it becomes none event, delete it' ei_classified.deleteEventByID(e.getID()) else: if prob > 0.5: print 'new events find in collection but not in front end , add it' ei_classified.addEvent(e)
def findLast24HourEvents(): ei = EventInterface() ei.setCollection(InstagramConfig.front_end_events) now = int(getCurrentStampUTC()) # for merge reason, delay one hour offset = 60 * 60 end_time = now - offset begin_time = end_time - 24 * 3600 conditions = {'created_time':{'$gte':str(begin_time), '$lte':str(end_time)}} fields = ['_id'] cur = ei.getAllFields(fields=fields, condition=conditions) event_count = 0 with open(csv_file, 'wb') as csvfile: event_writer = csv.writer(csvfile, delimiter=',') events = [] for event in cur: url = 'http://ec2-23-22-67-45.compute-1.amazonaws.com/cb/event/' + str(event['_id']) events.append([url]) event_count += 1 event_writer.writerows(events) return event_count
def fireAlarm(self): self._getFiftenMiniutesPhotos() #get current_value cur_hour = datetime.utcfromtimestamp(float(self.cur_time)).hour #print 'cur_hour = ',cur_hour, 'time = ',self.cur_time mu = self.means[cur_hour]/4.0 std = self.stds[cur_hour]/4.0 #print 'mu is ',mu, 'std is ',std, 'cur_value = ',self.current_value zscore = (self.current_value - mu)*1.0/std if zscore > 3 and self.current_value>=8: e = Event() e.setPredictedValues(mu, std) e.setZscore(zscore) e.setRegion(self.region) e.setCreatedTime(self.cur_time) e.setActualValue(self.current_value) for p in self.photos: e.addPhoto(p) ei = EventInterface( ) ei.setCollection(self.candidate_collection) #print datetime.utcfromtimestamp(float(e.getEarliestPhotoTime())), datetime.utcfromtimestamp(float(e.getLatestPhotoTime())) #print e.getEarliestPhotoTime(),e.getLatestPhotoTime() #print e.toJSON()['region'] ei.addEvent(e)
def fireAlarm(self): prediction = self.getNearestPrediction() self._getFiftenMiniutesData() if prediction is None: print 'No prediction' return else: print 'Data!' mu = float(prediction['mu']) / 4.0 std = float(prediction['std']) / 4.0 time_stamp = prediction['time'] zscore = (self.current_value - mu) * 1.0 / std print 'cur value = ', self.current_value, 'zscore = ', zscore if zscore > 3.0 and self.current_value > 5: #comment this print 'in alarm!, cur value = ', self.current_value if self.data_source == 'twitter': e = TweetEvent() for dt in self.data: e.addTweet(dt) elif self.data_source == 'instagram': e = PhotoEvent() for dt in self.data: e.addPhoto(dt) e.setPredictedValues(mu, std) e.setZscore(zscore) e.setRegion(self.region) e.setCreatedTime(self.cur_time) e.setActualValue(self.current_value) ei = EventInterface() ei.setCollection(self.candidate_collection) print e.getEarliestPhotoTime(), e.getLatestPhotoTime() print ei.addEvent(e)
def fireAlarm(self): prediction = self.getNearestPrediction() self._getFiftenMiniutesPhotos() if prediction is None: print "None data for this region: details as follow" self.region.display() print "time:", self.cur_time return mu = float(prediction["mu"]) / 4.0 std = float(prediction["std"]) / 4.0 time_stamp = prediction["time"] zscore = (self.current_value - mu) * 1.0 / std if zscore > 3: e = Event() e.setPredictedValues(mu, std) e.setZscore(zscore) e.setRegion(self.region) e.setCreatedTime(self.cur_time) e.setActualValue(self.current_value) for p in self.photos: e.addPhoto(p) # print 'current value ',4.0*self.current_value, ' predict = ',mu*4.0,' std = ',std*4.0 ei = EventInterface() ei.setCollection(self.candidate_collection) print e.getEarliestPhotoTime(), e.getLatestPhotoTime() # print e.toJSON()['region'] # ei.addEvent(e) ei.addEventWithoutMerge(e)
def __init__(self): self.ei = EventInterface() self.ei.setDB('AmazonMT') self.ei.setCollection('candidate_event_25by25_merged') self.representor = Representor() #self.ei.setDB('citybeat') #self.ei.setCollection('next_week_candidate_event_25by25_merged') #self.ei.setCollection('online_candidate') self._loadCrowdFlowerCode()
def __init__(self): self.ei = EventInterface() self.ei.setDB(InstagramConfig.event_db) #self.representor = Representor(db='citybeat_production', collection='instagram_front_end_events') self.ei.setCollection(InstagramConfig.front_end_events) self.stats_interface = StatsInterface()
def __init__(self): self.ei = EventInterface() #self.ei.setDB('AmazonMT') #self.ei.setCollection('candidate_event_25by25_merged') self.ei.setDB('citybeat') #self.ei.setCollection('next_week_candidate_event_25by25_merged') self.ei.setCollection('online_candidate') #collection = 'candidate_event_25by25_merged' #self.c = Corpus() #self.c.buildCorpusOnDB('AmazonMT', collection) #collection = 'candidate_event_25by25_merged' #self.c = Corpus() #self.c.buildCorpusOnDB('citybeat', 'online_candidate') self._loadCrowdFlowerCode() self.cache_events = {} self.cache_photos = {}
def fireAlarm(self): prediction = self.getNearestPrediction() self._getFiftenMiniutesPhotos() if prediction is None: print 'None data for this region: details as follow' self.region.display() print 'time:', self.cur_time return mu = float(prediction['mu']) / 4.0 std = float(prediction['std']) / 4.0 time_stamp = prediction['time'] zscore = (self.current_value - mu) * 1.0 / std if zscore > 3: e = Event() e.setPredictedValues(mu, std) e.setZscore(zscore) e.setRegion(self.region) e.setCreatedTime(self.cur_time) e.setActualValue(self.current_value) for p in self.photos: e.addPhoto(p) #print 'current value ',4.0*self.current_value, ' predict = ',mu*4.0,' std = ',std*4.0 ei = EventInterface() ei.setCollection(self.candidate_collection) print e.getEarliestPhotoTime(), e.getLatestPhotoTime() #print e.toJSON()['region'] #ei.addEvent(e) ei.addEventWithoutMerge(e)
def fireAlarm(self): self._getFiftenMiniutesPhotos() #get current_value cur_hour = datetime.utcfromtimestamp(float(self.cur_time)).hour #print 'cur_hour = ',cur_hour, 'time = ',self.cur_time mu = self.means[cur_hour] / 4.0 std = self.stds[cur_hour] / 4.0 #print 'mu is ',mu, 'std is ',std, 'cur_value = ',self.current_value zscore = (self.current_value - mu) * 1.0 / std if zscore > 3 and self.current_value >= 8: e = Event() e.setPredictedValues(mu, std) e.setZscore(zscore) e.setRegion(self.region) e.setCreatedTime(self.cur_time) e.setActualValue(self.current_value) for p in self.photos: e.addPhoto(p) ei = EventInterface() ei.setCollection(self.candidate_collection) #print datetime.utcfromtimestamp(float(e.getEarliestPhotoTime())), datetime.utcfromtimestamp(float(e.getLatestPhotoTime())) #print e.getEarliestPhotoTime(),e.getLatestPhotoTime() #print e.toJSON()['region'] ei.addEvent(e)
def __init__(self): self.ei = EventInterface() #self.ei.setDB('AmazonMT') #self.ei.setCollection('candidate_event_25by25_merged') #self.ei.setDB('citybeat') #self.ei.setCollection('baseline_candidate_events') self.ei.setDB('citybeat_production') #self.ei.setCollection('next_week_candidate_event_25by25_merged') #self.representor = Representor(db='citybeat', collection='next_week_candidate_event_25by25_merged') #print 'Building representor' #self.representor = Representor(db='citybeat_production', collection='instagram_front_end_events') print 'Building done' self.ei.setCollection('instagram_front_end_events') #self.ei.setCollection('online_candidate_instagram') self._loadCrowdFlowerCode()
class Root: def __init__(self): self.ei = EventInterface() self.ei.setDB('AmazonMT') self.ei.setCollection('candidate_event_25by25_merged') self.representor = Representor() #self.ei.setDB('citybeat') #self.ei.setCollection('next_week_candidate_event_25by25_merged') #self.ei.setCollection('online_candidate') self._loadCrowdFlowerCode() def getAllEvents(self): event_cursor = self.ei.getAllDocuments() events = [] tmp_events = [e for e in event_cursor] for e in tmp_events: if len(e['photos'])>3: if random.random()>0.1: e['_id'] = str(e['_id']) e['urgency'] = 58 e['volume'] = 99 e['stats'] = {'photos':50, 'tweets':0, 'checkins':0} rep_photos = self.representor.getRepresentivePhotos(e) e['photos'] = rep_photos[:min(5,len(rep_photos))] events.append(e) return json.dumps(events) getAllEvents.exposed = True def _loadCrowdFlowerCode(self): lines = open('crowdflower_code.txt').readlines() self.cf_code = {} for line in lines: t = line.split(',') self.cf_code[t[0]] = t[1] def getCrowdFlowerCode(self, event_id): if event_id in self.cf_code: return self.cf_code[event_id] else: return None getCrowdFlowerCode.exposed = True def getAllEventsIDs(self): object_ids = self.ei.getAllDocumentIDs() return_value = [] for _id in object_ids: return_value.append( str(_id) ) return json.dumps( return_value ) #getAllEventsIDs.exposed = True def _deleteExtraMeta(self,photo): try: del photo['comments'] except Exception as e: pass try: del photo['caption']['from'] except Exception as e: pass try: del photo['filter'] except Exception as e: pass try: del photo['user'] except Exception as e: pass try: del photo['images']['standard_resolution'] except Exception as e: pass try: del photo['images']['low_resolution'] except Exception as e: pass try: del photo['likes'] except Exception as e: pass try: del photo['likes'] except Exception as e: pass return photo def getPhotosByID(self, event_id): event = json.loads(self.getEventByID(event_id)) #event = EventFrontend(event, self.c) #top_words_list = event.getTopKeywordsAndPhotos(20,5) #words_pics_list = event.getTopKeywordsAndPhotosByTFIDF(20, 5) #keywords_shown = set() res = [] all_photos = [] top10_photos = [] all_photos.append('all_photos') #print event['photos'] all_photos.append(len(event['photos'])) all_photos.append( event['photos']) rep_photos = self.representor.getRepresentivePhotos(event) rep_photos = rep_photos[:10] top10_photos.append('top_10_representative') top10_photos.append(min(10, len(rep_photos))) top10_photos.append(rep_photos) res.append(all_photos) res.append(top10_photos) """ for tf, idf in zip(top_words_list,words_pics_list): if tf[0] not in keywords_shown: keywords_shown.add(tf[0]) res.append(tf) if idf[0] not in keywords_shown: keywords_shown.add(idf[0]) res.append(idf) """ r = json.dumps(res) #print r #r = json.dumps(words_pics_list + top_words_list) return r getPhotosByID.exposed = True def _cacheAll(self): print 'begin cache' all_events = self.getAllEvents() print type(all_events) all_events = json.loads(all_events) cnt = 0 for e in all_events: cnt+=1 if cnt%100 == 0: print cnt self.cache_events[e['_id']] = json.dumps(e) for e in all_events: cnt+=1 if cnt%100 == 0: print cnt self.cache_photos[e['_id']] = self.getPhotosByID(e['_id']) def getEventByID(self, event_id): event = self.ei.getEventByID(event_id) event = Event(event) event.selectOnePhotoForOneUser() event_dic = event.toJSON() event_dic['_id'] = str(event_dic['_id']) return json.dumps(event_dic) getEventByID.exposed = True def getTopKeywords(self, event_id): event = self.ei.getEventByID(event_id) ef = EventFeature(event) words = ef.getTopKeywords(k=10) return json.dumps(words) #getTopKeywords.exposed = True def setLabel(self, event_id, label): event = self.ei.getEventByID(str(event_id)) print 'setting ',event_id, 'label = ',label #event['label'] = int(label) event['label'] = int(label) self.ei.updateDocument( event )
class Root: def __init__(self): self.ei = EventInterface() #self.ei.setDB('AmazonMT') #self.ei.setCollection('candidate_event_25by25_merged') self.ei.setDB('citybeat') #self.ei.setCollection('next_week_candidate_event_25by25_merged') self.ei.setCollection('online_candidate') #collection = 'candidate_event_25by25_merged' #self.c = Corpus() #self.c.buildCorpusOnDB('AmazonMT', collection) #collection = 'candidate_event_25by25_merged' #self.c = Corpus() #self.c.buildCorpusOnDB('citybeat', 'online_candidate') self._loadCrowdFlowerCode() self.cache_events = {} self.cache_photos = {} #self._cacheAll() def getAllEvents(self): event_cursor = self.ei.getAllDocuments() events = [] #lines = open('./label_data_csv2.txt').readlines() #ok_ids = set() #for line in lines: # t = line.split() # if t[1]=='1': # ok_ids.add( t[0] ) #limit = 10 tmp_events = [e for e in event_cursor] #tmp_events = tmp_events[-10:-1] for e in tmp_events: #if random.random()>0.5: # continue #if str(e['_id']) not in ok_ids: # continue #if limit==0: # break #limit -= 1; #if e['label'] =='unlabeled': # continue e['_id'] = str(e['_id']) e['urgency'] = 58 e['volume'] = 99 #e['photos'] = e['photos'][:min(5, len(e['photos']))] e['stats'] = {'photos':50, 'tweets':0, 'checkins':0} events.append( e ) return json.dumps(events) getAllEvents.exposed = True def _loadCrowdFlowerCode(self): lines = open('crowdflower_code.txt').readlines() self.cf_code = {} for line in lines: t = line.split(',') self.cf_code[t[0]] = t[1] def getCrowdFlowerCode(self, event_id): if event_id in self.cf_code: return self.cf_code[event_id] else: return None getCrowdFlowerCode.exposed = True def getAllEventsIDs(self): object_ids = self.ei.getAllDocumentIDs() return_value = [] for _id in object_ids: return_value.append( str(_id) ) return json.dumps( return_value ) #getAllEventsIDs.exposed = True def _deleteExtraMeta(self,photo): try: del photo['comments'] except Exception as e: pass try: del photo['caption']['from'] except Exception as e: pass try: del photo['filter'] except Exception as e: pass try: del photo['user'] except Exception as e: pass try: del photo['images']['standard_resolution'] except Exception as e: pass try: del photo['images']['low_resolution'] except Exception as e: pass try: del photo['likes'] except Exception as e: pass try: del photo['likes'] except Exception as e: pass return photo def getPhotosByID(self, event_id): if event_id in self.cache_photos: print 'cached. return directly' tmp = json.loads(self.cache_photos[event_id]) to_return = [] for idx in range(len(tmp)): tmp[idx][2] = [self._deleteExtraMeta(p) for p in tmp[idx][2] ] return json.dumps(tmp) #return self.cache_photos[event_id] #return self.cache_photos[event_id] event = self.ei.getEventByID(event_id) #event = EventFrontend(event, self.c) #words_pics_list = event.getTopKeywordsAndPhotos(10, 6) top_words_list = event.getTopKeywordsAndPhotos(20,5) words_pics_list = event.getTopKeywordsAndPhotosByTFIDF(20, 5) keywords_shown = set() res = [] for tf, idf in zip(top_words_list,words_pics_list): if tf[0] not in keywords_shown: keywords_shown.add(tf[0]) res.append(tf) if idf[0] not in keywords_shown: keywords_shown.add(idf[0]) res.append(idf) r = json.dumps(res) #r = json.dumps(words_pics_list + top_words_list) return r getPhotosByID.exposed = True def _cacheAll(self): print 'begin cache' all_events = self.getAllEvents() print type(all_events) all_events = json.loads(all_events) cnt = 0 for e in all_events: cnt+=1 if cnt%100 == 0: print cnt self.cache_events[e['_id']] = json.dumps(e) for e in all_events: cnt+=1 if cnt%100 == 0: print cnt self.cache_photos[e['_id']] = self.getPhotosByID(e['_id']) def getEventByID(self, event_id): if event_id in self.cache_events: tmp = json.loads(self.cache_events[event_id]) tmp['photos'] = [ self._deleteExtraMeta(p) for p in tmp['photos']] return json.dumps( tmp ) print 'event cached. return directly' #return self.cache_events[event_id] event = self.ei.getEventByID(event_id) event['_id'] = str(event['_id']) return json.dumps(event) getEventByID.exposed = True def getTopKeywords(self, event_id): event = self.ei.getEventByID(event_id) ef = EventFeature(event) words = ef.getTopKeywords(k=10) return json.dumps(words) #getTopKeywords.exposed = True def setLabel(self, event_id, label): event = self.ei.getEventByID(str(event_id)) print 'setting ',event_id, 'label = ',label #event['label'] = int(label) event['label'] = int(label) self.ei.updateDocument( event )
from utility.event_interface import EventInterface ei = EventInterface() ei.setDB('citybeat') ei.setCollection('baseline_candidate_events') p = ei.getPhotoDistributionArray() print p
class Root: def __init__(self): self.ei = EventInterface() #self.ei.setDB('AmazonMT') #self.ei.setCollection('candidate_event_25by25_merged') self.ei.setDB('citybeat') #self.ei.setCollection('next_week_candidate_event_25by25_merged') self.ei.setCollection('online_candidate') #collection = 'candidate_event_25by25_merged' #self.c = Corpus() #self.c.buildCorpusOnDB('AmazonMT', collection) #collection = 'candidate_event_25by25_merged' #self.c = Corpus() #self.c.buildCorpusOnDB('citybeat', 'online_candidate') self._loadCrowdFlowerCode() self.cache_events = {} self.cache_photos = {} #self._cacheAll() def getAllEvents(self): event_cursor = self.ei.getAllDocuments() events = [] #lines = open('./label_data_csv2.txt').readlines() #ok_ids = set() #for line in lines: # t = line.split() # if t[1]=='1': # ok_ids.add( t[0] ) #limit = 10 tmp_events = [e for e in event_cursor] #tmp_events = tmp_events[-10:-1] for e in tmp_events: #if random.random()>0.5: # continue #if str(e['_id']) not in ok_ids: # continue #if limit==0: # break #limit -= 1; #if e['label'] =='unlabeled': # continue e['_id'] = str(e['_id']) e['urgency'] = 58 e['volume'] = 99 #e['photos'] = e['photos'][:min(5, len(e['photos']))] e['stats'] = {'photos': 50, 'tweets': 0, 'checkins': 0} events.append(e) return json.dumps(events) getAllEvents.exposed = True def _loadCrowdFlowerCode(self): lines = open('crowdflower_code.txt').readlines() self.cf_code = {} for line in lines: t = line.split(',') self.cf_code[t[0]] = t[1] def getCrowdFlowerCode(self, event_id): if event_id in self.cf_code: return self.cf_code[event_id] else: return None getCrowdFlowerCode.exposed = True def getAllEventsIDs(self): object_ids = self.ei.getAllDocumentIDs() return_value = [] for _id in object_ids: return_value.append(str(_id)) return json.dumps(return_value) #getAllEventsIDs.exposed = True def _deleteExtraMeta(self, photo): try: del photo['comments'] except Exception as e: pass try: del photo['caption']['from'] except Exception as e: pass try: del photo['filter'] except Exception as e: pass try: del photo['user'] except Exception as e: pass try: del photo['images']['standard_resolution'] except Exception as e: pass try: del photo['images']['low_resolution'] except Exception as e: pass try: del photo['likes'] except Exception as e: pass try: del photo['likes'] except Exception as e: pass return photo def getPhotosByID(self, event_id): if event_id in self.cache_photos: print 'cached. return directly' tmp = json.loads(self.cache_photos[event_id]) to_return = [] for idx in range(len(tmp)): tmp[idx][2] = [self._deleteExtraMeta(p) for p in tmp[idx][2]] return json.dumps(tmp) #return self.cache_photos[event_id] #return self.cache_photos[event_id] event = self.ei.getEventByID(event_id) #event = EventFrontend(event, self.c) #words_pics_list = event.getTopKeywordsAndPhotos(10, 6) top_words_list = event.getTopKeywordsAndPhotos(20, 5) words_pics_list = event.getTopKeywordsAndPhotosByTFIDF(20, 5) keywords_shown = set() res = [] for tf, idf in zip(top_words_list, words_pics_list): if tf[0] not in keywords_shown: keywords_shown.add(tf[0]) res.append(tf) if idf[0] not in keywords_shown: keywords_shown.add(idf[0]) res.append(idf) r = json.dumps(res) #r = json.dumps(words_pics_list + top_words_list) return r getPhotosByID.exposed = True def _cacheAll(self): print 'begin cache' all_events = self.getAllEvents() print type(all_events) all_events = json.loads(all_events) cnt = 0 for e in all_events: cnt += 1 if cnt % 100 == 0: print cnt self.cache_events[e['_id']] = json.dumps(e) for e in all_events: cnt += 1 if cnt % 100 == 0: print cnt self.cache_photos[e['_id']] = self.getPhotosByID(e['_id']) def getEventByID(self, event_id): if event_id in self.cache_events: tmp = json.loads(self.cache_events[event_id]) tmp['photos'] = [self._deleteExtraMeta(p) for p in tmp['photos']] return json.dumps(tmp) print 'event cached. return directly' #return self.cache_events[event_id] event = self.ei.getEventByID(event_id) event['_id'] = str(event['_id']) return json.dumps(event) getEventByID.exposed = True def getTopKeywords(self, event_id): event = self.ei.getEventByID(event_id) ef = EventFeature(event) words = ef.getTopKeywords(k=10) return json.dumps(words) #getTopKeywords.exposed = True def setLabel(self, event_id, label): event = self.ei.getEventByID(str(event_id)) print 'setting ', event_id, 'label = ', label #event['label'] = int(label) event['label'] = int(label) self.ei.updateDocument(event)
class Root: def __init__(self): self.ei = EventInterface() self.ei.setDB(InstagramConfig.event_db) #self.representor = Representor(db='citybeat_production', collection='instagram_front_end_events') self.ei.setCollection(InstagramConfig.front_end_events) self.stats_interface = StatsInterface() def getAllEvents(self): now = int(getCurrentStampUTC()) two_days_before = now - 3 * 24 * 3600 event_cursor = self.ei.getAllDocuments({'created_time':{'$gte':str(two_days_before)}}) events = [] for e in event_cursor: #representor #rep_photos = self.representor.getRepresentivePhotos(e) #e['photos'] = rep_photos[:min(5,len(rep_photos))] e['_id'] = str(e['_id']) e['urgency'] = 58 e['volume'] = 99 e['stats'] = {'photos':50, 'tweets':0, 'checkins':0} #print e['photos'] if e['actual_value']>=6 and e['zscore']>3.0: events.append(e) events = sorted(events, key = lambda x:x['created_time'], reverse=True) for w in events: print w['created_time'] events = events[:5] return json.dumps(events) getAllEvents.exposed = True def getAllEventsIDs(self): object_ids = self.ei.getAllDocumentIDs() return_value = [] for _id in object_ids: return_value.append( str(_id) ) return json.dumps( return_value ) #getAllEventsIDs.exposed = True def getPhotosByID(self, event_id): event = json.loads(self.getEventByID(event_id)) res = [] all_photos = [] top10_photos = [] all_photos.append('all_photos') all_photos.append(len(event['photos'])) all_photos.append( event['photos']) rep_photos = event['photos'] top10_photos.append('top_10_representative') top10_photos.append(min(10, len(rep_photos))) top10_photos.append(rep_photos) res.append(all_photos) res.append(top10_photos) r = json.dumps(res) return r getPhotosByID.exposed = True def getEventByID(self, event_id): event = self.ei.getEventByID(event_id) event = Event(event) event.selectOnePhotoForOneUser() event_dic = event.toDict() event_dic['_id'] = str(event_dic['_id']) return json.dumps(event_dic) getEventByID.exposed = True def setLabel(self, event_id, label): event = self.ei.getEventByID(str(event_id)) event['label'] = int(label) self.ei.updateDocument( event ) #setLabel.exposed = True def getLatestStats(self): now = int(getCurrentStampUTC()) - 5 * 60 condition = {'created_time': {"$gte": str(now)}} most_recent_stats = self.stats_interface.getAllDocuments(condition=condition).sort('created_time', -1)[0] most_recent_stats['_id'] = str(most_recent_stats['_id']) return json.dumps(most_recent_stats) getLatestStats.exposed = True
class Root: def __init__(self): self.ei = EventInterface() self.ei.setDB('AmazonMT') self.ei.setCollection('candidate_event_25by25_merged') self.representor = Representor() #self.ei.setDB('citybeat') #self.ei.setCollection('next_week_candidate_event_25by25_merged') #self.ei.setCollection('online_candidate') self._loadCrowdFlowerCode() def getAllEvents(self): event_cursor = self.ei.getAllDocuments() events = [] tmp_events = [e for e in event_cursor] for e in tmp_events: if len(e['photos']) > 3: if random.random() > 0.1: e['_id'] = str(e['_id']) e['urgency'] = 58 e['volume'] = 99 e['stats'] = {'photos': 50, 'tweets': 0, 'checkins': 0} rep_photos = self.representor.getRepresentivePhotos(e) e['photos'] = rep_photos[:min(5, len(rep_photos))] events.append(e) return json.dumps(events) getAllEvents.exposed = True def _loadCrowdFlowerCode(self): lines = open('crowdflower_code.txt').readlines() self.cf_code = {} for line in lines: t = line.split(',') self.cf_code[t[0]] = t[1] def getCrowdFlowerCode(self, event_id): if event_id in self.cf_code: return self.cf_code[event_id] else: return None getCrowdFlowerCode.exposed = True def getAllEventsIDs(self): object_ids = self.ei.getAllDocumentIDs() return_value = [] for _id in object_ids: return_value.append(str(_id)) return json.dumps(return_value) #getAllEventsIDs.exposed = True def _deleteExtraMeta(self, photo): try: del photo['comments'] except Exception as e: pass try: del photo['caption']['from'] except Exception as e: pass try: del photo['filter'] except Exception as e: pass try: del photo['user'] except Exception as e: pass try: del photo['images']['standard_resolution'] except Exception as e: pass try: del photo['images']['low_resolution'] except Exception as e: pass try: del photo['likes'] except Exception as e: pass try: del photo['likes'] except Exception as e: pass return photo def getPhotosByID(self, event_id): event = json.loads(self.getEventByID(event_id)) #event = EventFrontend(event, self.c) #top_words_list = event.getTopKeywordsAndPhotos(20,5) #words_pics_list = event.getTopKeywordsAndPhotosByTFIDF(20, 5) #keywords_shown = set() res = [] all_photos = [] top10_photos = [] all_photos.append('all_photos') #print event['photos'] all_photos.append(len(event['photos'])) all_photos.append(event['photos']) rep_photos = self.representor.getRepresentivePhotos(event) rep_photos = rep_photos[:10] top10_photos.append('top_10_representative') top10_photos.append(min(10, len(rep_photos))) top10_photos.append(rep_photos) res.append(all_photos) res.append(top10_photos) """ for tf, idf in zip(top_words_list,words_pics_list): if tf[0] not in keywords_shown: keywords_shown.add(tf[0]) res.append(tf) if idf[0] not in keywords_shown: keywords_shown.add(idf[0]) res.append(idf) """ r = json.dumps(res) #print r #r = json.dumps(words_pics_list + top_words_list) return r getPhotosByID.exposed = True def _cacheAll(self): print 'begin cache' all_events = self.getAllEvents() print type(all_events) all_events = json.loads(all_events) cnt = 0 for e in all_events: cnt += 1 if cnt % 100 == 0: print cnt self.cache_events[e['_id']] = json.dumps(e) for e in all_events: cnt += 1 if cnt % 100 == 0: print cnt self.cache_photos[e['_id']] = self.getPhotosByID(e['_id']) def getEventByID(self, event_id): event = self.ei.getEventByID(event_id) event = Event(event) event.selectOnePhotoForOneUser() event_dic = event.toJSON() event_dic['_id'] = str(event_dic['_id']) return json.dumps(event_dic) getEventByID.exposed = True def getTopKeywords(self, event_id): event = self.ei.getEventByID(event_id) ef = EventFeature(event) words = ef.getTopKeywords(k=10) return json.dumps(words) #getTopKeywords.exposed = True def setLabel(self, event_id, label): event = self.ei.getEventByID(str(event_id)) print 'setting ', event_id, 'label = ', label #event['label'] = int(label) event['label'] = int(label) self.ei.updateDocument(event)