Пример #1
0
class Root:
    def __init__(self):
        self.ei = EventInterface()
        #self.ei.setDB('AmazonMT')
        #self.ei.setCollection('candidate_event_25by25_merged')

        self.ei.setDB('citybeat')
        #self.ei.setCollection('next_week_candidate_event_25by25_merged')
        self.ei.setCollection('online_candidate')

        #collection = 'candidate_event_25by25_merged'
        #self.c = Corpus()
        #self.c.buildCorpusOnDB('AmazonMT', collection)
        
        #collection = 'candidate_event_25by25_merged'
        #self.c = Corpus()
        #self.c.buildCorpusOnDB('citybeat', 'online_candidate')
        
        self._loadCrowdFlowerCode()

        self.cache_events = {}
        self.cache_photos = {}
        #self._cacheAll()

    def getAllEvents(self):
        event_cursor = self.ei.getAllDocuments()
        events = []
        #lines = open('./label_data_csv2.txt').readlines()
        #ok_ids = set()
        #for line in lines:
        #    t = line.split()
        #    if t[1]=='1':
        #        ok_ids.add( t[0] )
        #limit = 10
        tmp_events = [e for e in event_cursor]
        #tmp_events = tmp_events[-10:-1]
        for e in tmp_events:
            #if random.random()>0.5:
            #    continue
            #if str(e['_id']) not in ok_ids:
            #    continue
            #if limit==0:
            #    break
            #limit -= 1;
            #if e['label'] =='unlabeled':
            #    continue
            e['_id'] = str(e['_id'])
            e['urgency'] = 58
            e['volume'] = 99
            #e['photos'] = e['photos'][:min(5, len(e['photos']))] 
            e['stats'] = {'photos':50, 'tweets':0, 'checkins':0}

            events.append( e )
        return json.dumps(events)
    getAllEvents.exposed = True 
    
    def _loadCrowdFlowerCode(self):
        lines = open('crowdflower_code.txt').readlines()
        self.cf_code = {}
        for line in lines:
            t = line.split(',')
            self.cf_code[t[0]] = t[1]

    def getCrowdFlowerCode(self, event_id):
        if event_id in self.cf_code:
            return self.cf_code[event_id]
        else:
            return None
    getCrowdFlowerCode.exposed = True

    def getAllEventsIDs(self):
        object_ids = self.ei.getAllDocumentIDs()
        return_value = []
        for _id in object_ids:
            return_value.append( str(_id) )
        return json.dumps( return_value )
    #getAllEventsIDs.exposed = True
    
    def _deleteExtraMeta(self,photo):
        try: del photo['comments']
        except Exception as e: pass

        try: del photo['caption']['from']
        except Exception as e: pass
        try: del photo['filter']
        except Exception as e: pass
        try: del photo['user']
        except Exception as e: pass
        try: del photo['images']['standard_resolution']
        except Exception as e: pass
        try: del photo['images']['low_resolution']
        except Exception as e: pass
        try: del photo['likes']
        except Exception as e: pass
        try: del photo['likes']
        except Exception as e: pass
        return photo

    def getPhotosByID(self, event_id):
        if event_id in self.cache_photos:
            print 'cached. return directly'
            tmp = json.loads(self.cache_photos[event_id])
            to_return = []
            for idx in range(len(tmp)):
                tmp[idx][2] = [self._deleteExtraMeta(p) for p in tmp[idx][2] ]
            
            return json.dumps(tmp)
            #return self.cache_photos[event_id]
            #return self.cache_photos[event_id]
        event = self.ei.getEventByID(event_id)
        #event = EventFrontend(event, self.c)
            
        #words_pics_list = event.getTopKeywordsAndPhotos(10, 6)
        top_words_list = event.getTopKeywordsAndPhotos(20,5)
        words_pics_list = event.getTopKeywordsAndPhotosByTFIDF(20, 5)
        keywords_shown = set()
        
        res = []
        for tf, idf in zip(top_words_list,words_pics_list):
            if tf[0] not in keywords_shown:
                keywords_shown.add(tf[0])
                res.append(tf)
            if idf[0] not in keywords_shown:
                keywords_shown.add(idf[0])
                res.append(idf)
        
        r = json.dumps(res) 
        #r = json.dumps(words_pics_list + top_words_list)
        return r
    getPhotosByID.exposed = True
   
    def _cacheAll(self):
        print 'begin cache'
        all_events = self.getAllEvents()
        print type(all_events)
        all_events = json.loads(all_events)
        cnt = 0
        for e in all_events:
            cnt+=1
            if cnt%100 == 0:
                print cnt
            self.cache_events[e['_id']] = json.dumps(e)
        for e in all_events:
            cnt+=1
            if cnt%100 == 0:
                print cnt
            self.cache_photos[e['_id']] = self.getPhotosByID(e['_id'])
          

    def getEventByID(self, event_id):
        if event_id in self.cache_events:
            tmp = json.loads(self.cache_events[event_id])
            tmp['photos'] = [ self._deleteExtraMeta(p) for p in tmp['photos']]

            return json.dumps( tmp )
            print 'event cached. return directly'
            #return self.cache_events[event_id]

        event = self.ei.getEventByID(event_id)
        event['_id'] = str(event['_id'])
        return json.dumps(event)
    getEventByID.exposed = True
    
    def getTopKeywords(self, event_id):
        event = self.ei.getEventByID(event_id)
        ef = EventFeature(event)
        words = ef.getTopKeywords(k=10)
        return json.dumps(words)
    #getTopKeywords.exposed = True

    def setLabel(self, event_id, label):
        event = self.ei.getEventByID(str(event_id))
        print 'setting ',event_id, 'label = ',label
        #event['label'] = int(label)
        event['label'] = int(label)
        self.ei.updateDocument( event ) 
Пример #2
0
class Root:
    def __init__(self):
        self.ei = EventInterface()
        self.ei.setDB('AmazonMT')
        self.ei.setCollection('candidate_event_25by25_merged')
        self.representor = Representor()
        #self.ei.setDB('citybeat')
        #self.ei.setCollection('next_week_candidate_event_25by25_merged')
        #self.ei.setCollection('online_candidate')

        self._loadCrowdFlowerCode()

    def getAllEvents(self):
        event_cursor = self.ei.getAllDocuments()
        events = []
        tmp_events = [e for e in event_cursor]
        for e in tmp_events:
            if len(e['photos']) > 3:
                if random.random() > 0.1:
                    e['_id'] = str(e['_id'])
                    e['urgency'] = 58
                    e['volume'] = 99
                    e['stats'] = {'photos': 50, 'tweets': 0, 'checkins': 0}
                    rep_photos = self.representor.getRepresentivePhotos(e)
                    e['photos'] = rep_photos[:min(5, len(rep_photos))]
                    events.append(e)
        return json.dumps(events)

    getAllEvents.exposed = True

    def _loadCrowdFlowerCode(self):
        lines = open('crowdflower_code.txt').readlines()
        self.cf_code = {}
        for line in lines:
            t = line.split(',')
            self.cf_code[t[0]] = t[1]

    def getCrowdFlowerCode(self, event_id):
        if event_id in self.cf_code:
            return self.cf_code[event_id]
        else:
            return None

    getCrowdFlowerCode.exposed = True

    def getAllEventsIDs(self):
        object_ids = self.ei.getAllDocumentIDs()
        return_value = []
        for _id in object_ids:
            return_value.append(str(_id))
        return json.dumps(return_value)

    #getAllEventsIDs.exposed = True

    def _deleteExtraMeta(self, photo):
        try:
            del photo['comments']
        except Exception as e:
            pass

        try:
            del photo['caption']['from']
        except Exception as e:
            pass
        try:
            del photo['filter']
        except Exception as e:
            pass
        try:
            del photo['user']
        except Exception as e:
            pass
        try:
            del photo['images']['standard_resolution']
        except Exception as e:
            pass
        try:
            del photo['images']['low_resolution']
        except Exception as e:
            pass
        try:
            del photo['likes']
        except Exception as e:
            pass
        try:
            del photo['likes']
        except Exception as e:
            pass
        return photo

    def getPhotosByID(self, event_id):
        event = json.loads(self.getEventByID(event_id))
        #event = EventFrontend(event, self.c)

        #top_words_list = event.getTopKeywordsAndPhotos(20,5)
        #words_pics_list = event.getTopKeywordsAndPhotosByTFIDF(20, 5)
        #keywords_shown = set()

        res = []

        all_photos = []
        top10_photos = []
        all_photos.append('all_photos')
        #print event['photos']
        all_photos.append(len(event['photos']))
        all_photos.append(event['photos'])

        rep_photos = self.representor.getRepresentivePhotos(event)
        rep_photos = rep_photos[:10]
        top10_photos.append('top_10_representative')
        top10_photos.append(min(10, len(rep_photos)))
        top10_photos.append(rep_photos)

        res.append(all_photos)
        res.append(top10_photos)
        """
        for tf, idf in zip(top_words_list,words_pics_list):
            if tf[0] not in keywords_shown:
                keywords_shown.add(tf[0])
                res.append(tf)
            if idf[0] not in keywords_shown:
                keywords_shown.add(idf[0])
                res.append(idf)
        """
        r = json.dumps(res)
        #print r
        #r = json.dumps(words_pics_list + top_words_list)
        return r

    getPhotosByID.exposed = True

    def _cacheAll(self):
        print 'begin cache'
        all_events = self.getAllEvents()
        print type(all_events)
        all_events = json.loads(all_events)
        cnt = 0
        for e in all_events:
            cnt += 1
            if cnt % 100 == 0:
                print cnt
            self.cache_events[e['_id']] = json.dumps(e)
        for e in all_events:
            cnt += 1
            if cnt % 100 == 0:
                print cnt
            self.cache_photos[e['_id']] = self.getPhotosByID(e['_id'])

    def getEventByID(self, event_id):
        event = self.ei.getEventByID(event_id)
        event = Event(event)
        event.selectOnePhotoForOneUser()
        event_dic = event.toJSON()
        event_dic['_id'] = str(event_dic['_id'])
        return json.dumps(event_dic)

    getEventByID.exposed = True

    def getTopKeywords(self, event_id):
        event = self.ei.getEventByID(event_id)
        ef = EventFeature(event)
        words = ef.getTopKeywords(k=10)
        return json.dumps(words)

    #getTopKeywords.exposed = True

    def setLabel(self, event_id, label):
        event = self.ei.getEventByID(str(event_id))
        print 'setting ', event_id, 'label = ', label
        #event['label'] = int(label)
        event['label'] = int(label)
        self.ei.updateDocument(event)
Пример #3
0
class Root:
    def __init__(self):
        self.ei = EventInterface()
        self.ei.setDB(InstagramConfig.event_db)
        #self.representor = Representor(db='citybeat_production', collection='instagram_front_end_events')
        
        self.ei.setCollection(InstagramConfig.front_end_events)
        self.stats_interface = StatsInterface()

    def getAllEvents(self):
        now = int(getCurrentStampUTC())
        two_days_before = now - 3 * 24 * 3600
        event_cursor = self.ei.getAllDocuments({'created_time':{'$gte':str(two_days_before)}})
        events = []
        for e in event_cursor:
            #representor
            #rep_photos = self.representor.getRepresentivePhotos(e)
            #e['photos'] = rep_photos[:min(5,len(rep_photos))]
            e['_id'] = str(e['_id'])
            e['urgency'] = 58
            e['volume'] = 99
            e['stats'] = {'photos':50, 'tweets':0, 'checkins':0}
            #print e['photos']
            if e['actual_value']>=6 and e['zscore']>3.0:
                events.append(e)
        events = sorted(events, key = lambda x:x['created_time'], reverse=True)
        for w in events:
            print w['created_time']
        events = events[:5]
        return json.dumps(events)
    getAllEvents.exposed = True 
    
    def getAllEventsIDs(self):
        object_ids = self.ei.getAllDocumentIDs()
        return_value = []
        for _id in object_ids:
            return_value.append( str(_id) )
        return json.dumps( return_value )
    #getAllEventsIDs.exposed = True
    
    def getPhotosByID(self, event_id):
        event = json.loads(self.getEventByID(event_id))
        res = []
        all_photos = []
        top10_photos = []
        all_photos.append('all_photos')
        all_photos.append(len(event['photos']))
        all_photos.append( event['photos'])
        rep_photos = event['photos']
        top10_photos.append('top_10_representative')
        top10_photos.append(min(10, len(rep_photos)))
        top10_photos.append(rep_photos)
   
        res.append(all_photos)
        res.append(top10_photos)
        r = json.dumps(res) 
        return r
    getPhotosByID.exposed = True
   
    def getEventByID(self, event_id):
        event = self.ei.getEventByID(event_id)
        event = Event(event)
        event.selectOnePhotoForOneUser()
        event_dic = event.toDict()
        event_dic['_id'] = str(event_dic['_id'])
        return json.dumps(event_dic)
    getEventByID.exposed = True
    
    def setLabel(self, event_id, label):
        event = self.ei.getEventByID(str(event_id))
        event['label'] = int(label)
        self.ei.updateDocument( event )
    #setLabel.exposed = True

    def getLatestStats(self):
        now = int(getCurrentStampUTC()) - 5 * 60
        condition = {'created_time': {"$gte": str(now)}}
        most_recent_stats = self.stats_interface.getAllDocuments(condition=condition).sort('created_time', -1)[0]
        most_recent_stats['_id'] = str(most_recent_stats['_id'])
        return json.dumps(most_recent_stats)
    getLatestStats.exposed = True
Пример #4
0
class Root:
    def __init__(self):
        self.ei = EventInterface()
        self.ei.setDB('AmazonMT')
        self.ei.setCollection('candidate_event_25by25_merged')
        self.representor = Representor()
        #self.ei.setDB('citybeat')
        #self.ei.setCollection('next_week_candidate_event_25by25_merged')
        #self.ei.setCollection('online_candidate')
        
        self._loadCrowdFlowerCode()


    def getAllEvents(self):
        event_cursor = self.ei.getAllDocuments()
        events = []
        tmp_events = [e for e in event_cursor]
        for e in tmp_events:
            if len(e['photos'])>3:
                if random.random()>0.1:
                    e['_id'] = str(e['_id'])
                    e['urgency'] = 58
                    e['volume'] = 99
                    e['stats'] = {'photos':50, 'tweets':0, 'checkins':0}
                    rep_photos = self.representor.getRepresentivePhotos(e)
                    e['photos'] = rep_photos[:min(5,len(rep_photos))]
                    events.append(e)
        return json.dumps(events)
    getAllEvents.exposed = True 
    
    def _loadCrowdFlowerCode(self):
        lines = open('crowdflower_code.txt').readlines()
        self.cf_code = {}
        for line in lines:
            t = line.split(',')
            self.cf_code[t[0]] = t[1]

    def getCrowdFlowerCode(self, event_id):
        if event_id in self.cf_code:
            return self.cf_code[event_id]
        else:
            return None
    getCrowdFlowerCode.exposed = True

    def getAllEventsIDs(self):
        object_ids = self.ei.getAllDocumentIDs()
        return_value = []
        for _id in object_ids:
            return_value.append( str(_id) )
        return json.dumps( return_value )
    #getAllEventsIDs.exposed = True
    
    def _deleteExtraMeta(self,photo):
        try: del photo['comments']
        except Exception as e: pass

        try: del photo['caption']['from']
        except Exception as e: pass
        try: del photo['filter']
        except Exception as e: pass
        try: del photo['user']
        except Exception as e: pass
        try: del photo['images']['standard_resolution']
        except Exception as e: pass
        try: del photo['images']['low_resolution']
        except Exception as e: pass
        try: del photo['likes']
        except Exception as e: pass
        try: del photo['likes']
        except Exception as e: pass
        return photo

    def getPhotosByID(self, event_id):
        event = json.loads(self.getEventByID(event_id))
        #event = EventFrontend(event, self.c)
            
        #top_words_list = event.getTopKeywordsAndPhotos(20,5)
        #words_pics_list = event.getTopKeywordsAndPhotosByTFIDF(20, 5)
        #keywords_shown = set()
        
        res = []

        all_photos = []
        top10_photos = []
        all_photos.append('all_photos')
        #print event['photos']
        all_photos.append(len(event['photos']))
        all_photos.append( event['photos'])

        rep_photos = self.representor.getRepresentivePhotos(event)
        rep_photos = rep_photos[:10]
        top10_photos.append('top_10_representative')
        top10_photos.append(min(10, len(rep_photos)))
        top10_photos.append(rep_photos)
   
        res.append(all_photos)
        res.append(top10_photos)
        """
        for tf, idf in zip(top_words_list,words_pics_list):
            if tf[0] not in keywords_shown:
                keywords_shown.add(tf[0])
                res.append(tf)
            if idf[0] not in keywords_shown:
                keywords_shown.add(idf[0])
                res.append(idf)
        """ 
        r = json.dumps(res) 
        #print r
        #r = json.dumps(words_pics_list + top_words_list)
        return r
    getPhotosByID.exposed = True
   
    def _cacheAll(self):
        print 'begin cache'
        all_events = self.getAllEvents()
        print type(all_events)
        all_events = json.loads(all_events)
        cnt = 0
        for e in all_events:
            cnt+=1
            if cnt%100 == 0:
                print cnt
            self.cache_events[e['_id']] = json.dumps(e)
        for e in all_events:
            cnt+=1
            if cnt%100 == 0:
                print cnt
            self.cache_photos[e['_id']] = self.getPhotosByID(e['_id'])
          

    def getEventByID(self, event_id):
        event = self.ei.getEventByID(event_id)
        event = Event(event)
        event.selectOnePhotoForOneUser()
        event_dic = event.toJSON()
        event_dic['_id'] = str(event_dic['_id'])
        return json.dumps(event_dic)
    getEventByID.exposed = True
    
    def getTopKeywords(self, event_id):
        event = self.ei.getEventByID(event_id)
        ef = EventFeature(event)
        words = ef.getTopKeywords(k=10)
        return json.dumps(words)
    #getTopKeywords.exposed = True

    def setLabel(self, event_id, label):
        event = self.ei.getEventByID(str(event_id))
        print 'setting ',event_id, 'label = ',label
        #event['label'] = int(label)
        event['label'] = int(label)
        self.ei.updateDocument( event ) 
Пример #5
0
class Root:
    def __init__(self):
        self.ei = EventInterface()
        #self.ei.setDB('AmazonMT')
        #self.ei.setCollection('candidate_event_25by25_merged')

        self.ei.setDB('citybeat')
        #self.ei.setCollection('next_week_candidate_event_25by25_merged')
        self.ei.setCollection('online_candidate')

        #collection = 'candidate_event_25by25_merged'
        #self.c = Corpus()
        #self.c.buildCorpusOnDB('AmazonMT', collection)

        #collection = 'candidate_event_25by25_merged'
        #self.c = Corpus()
        #self.c.buildCorpusOnDB('citybeat', 'online_candidate')

        self._loadCrowdFlowerCode()

        self.cache_events = {}
        self.cache_photos = {}
        #self._cacheAll()

    def getAllEvents(self):
        event_cursor = self.ei.getAllDocuments()
        events = []
        #lines = open('./label_data_csv2.txt').readlines()
        #ok_ids = set()
        #for line in lines:
        #    t = line.split()
        #    if t[1]=='1':
        #        ok_ids.add( t[0] )
        #limit = 10
        tmp_events = [e for e in event_cursor]
        #tmp_events = tmp_events[-10:-1]
        for e in tmp_events:
            #if random.random()>0.5:
            #    continue
            #if str(e['_id']) not in ok_ids:
            #    continue
            #if limit==0:
            #    break
            #limit -= 1;
            #if e['label'] =='unlabeled':
            #    continue
            e['_id'] = str(e['_id'])
            e['urgency'] = 58
            e['volume'] = 99
            #e['photos'] = e['photos'][:min(5, len(e['photos']))]
            e['stats'] = {'photos': 50, 'tweets': 0, 'checkins': 0}

            events.append(e)
        return json.dumps(events)

    getAllEvents.exposed = True

    def _loadCrowdFlowerCode(self):
        lines = open('crowdflower_code.txt').readlines()
        self.cf_code = {}
        for line in lines:
            t = line.split(',')
            self.cf_code[t[0]] = t[1]

    def getCrowdFlowerCode(self, event_id):
        if event_id in self.cf_code:
            return self.cf_code[event_id]
        else:
            return None

    getCrowdFlowerCode.exposed = True

    def getAllEventsIDs(self):
        object_ids = self.ei.getAllDocumentIDs()
        return_value = []
        for _id in object_ids:
            return_value.append(str(_id))
        return json.dumps(return_value)

    #getAllEventsIDs.exposed = True

    def _deleteExtraMeta(self, photo):
        try:
            del photo['comments']
        except Exception as e:
            pass

        try:
            del photo['caption']['from']
        except Exception as e:
            pass
        try:
            del photo['filter']
        except Exception as e:
            pass
        try:
            del photo['user']
        except Exception as e:
            pass
        try:
            del photo['images']['standard_resolution']
        except Exception as e:
            pass
        try:
            del photo['images']['low_resolution']
        except Exception as e:
            pass
        try:
            del photo['likes']
        except Exception as e:
            pass
        try:
            del photo['likes']
        except Exception as e:
            pass
        return photo

    def getPhotosByID(self, event_id):
        if event_id in self.cache_photos:
            print 'cached. return directly'
            tmp = json.loads(self.cache_photos[event_id])
            to_return = []
            for idx in range(len(tmp)):
                tmp[idx][2] = [self._deleteExtraMeta(p) for p in tmp[idx][2]]

            return json.dumps(tmp)
            #return self.cache_photos[event_id]
            #return self.cache_photos[event_id]
        event = self.ei.getEventByID(event_id)
        #event = EventFrontend(event, self.c)

        #words_pics_list = event.getTopKeywordsAndPhotos(10, 6)
        top_words_list = event.getTopKeywordsAndPhotos(20, 5)
        words_pics_list = event.getTopKeywordsAndPhotosByTFIDF(20, 5)
        keywords_shown = set()

        res = []
        for tf, idf in zip(top_words_list, words_pics_list):
            if tf[0] not in keywords_shown:
                keywords_shown.add(tf[0])
                res.append(tf)
            if idf[0] not in keywords_shown:
                keywords_shown.add(idf[0])
                res.append(idf)

        r = json.dumps(res)
        #r = json.dumps(words_pics_list + top_words_list)
        return r

    getPhotosByID.exposed = True

    def _cacheAll(self):
        print 'begin cache'
        all_events = self.getAllEvents()
        print type(all_events)
        all_events = json.loads(all_events)
        cnt = 0
        for e in all_events:
            cnt += 1
            if cnt % 100 == 0:
                print cnt
            self.cache_events[e['_id']] = json.dumps(e)
        for e in all_events:
            cnt += 1
            if cnt % 100 == 0:
                print cnt
            self.cache_photos[e['_id']] = self.getPhotosByID(e['_id'])

    def getEventByID(self, event_id):
        if event_id in self.cache_events:
            tmp = json.loads(self.cache_events[event_id])
            tmp['photos'] = [self._deleteExtraMeta(p) for p in tmp['photos']]

            return json.dumps(tmp)
            print 'event cached. return directly'
            #return self.cache_events[event_id]

        event = self.ei.getEventByID(event_id)
        event['_id'] = str(event['_id'])
        return json.dumps(event)

    getEventByID.exposed = True

    def getTopKeywords(self, event_id):
        event = self.ei.getEventByID(event_id)
        ef = EventFeature(event)
        words = ef.getTopKeywords(k=10)
        return json.dumps(words)

    #getTopKeywords.exposed = True

    def setLabel(self, event_id, label):
        event = self.ei.getEventByID(str(event_id))
        print 'setting ', event_id, 'label = ', label
        #event['label'] = int(label)
        event['label'] = int(label)
        self.ei.updateDocument(event)