예제 #1
0
    def __init__(self):
        self.ei = EventInterface()
        self.ei.setDB('AmazonMT')
        self.ei.setCollection('candidate_event_25by25_merged')
        self.representor = Representor()
        #self.ei.setDB('citybeat')
        #self.ei.setCollection('next_week_candidate_event_25by25_merged')
        #self.ei.setCollection('online_candidate')

        self._loadCrowdFlowerCode()
예제 #2
0
 def __init__(self):
     self.ei = EventInterface()
     self.ei.setDB('AmazonMT')
     self.ei.setCollection('candidate_event_25by25_merged')
     self.representor = Representor()
     #self.ei.setDB('citybeat')
     #self.ei.setCollection('next_week_candidate_event_25by25_merged')
     #self.ei.setCollection('online_candidate')
     
     self._loadCrowdFlowerCode()
예제 #3
0
class Root:
    def __init__(self):
        self.ei = EventInterface()
        self.ei.setDB('AmazonMT')
        self.ei.setCollection('candidate_event_25by25_merged')
        self.representor = Representor()
        #self.ei.setDB('citybeat')
        #self.ei.setCollection('next_week_candidate_event_25by25_merged')
        #self.ei.setCollection('online_candidate')

        self._loadCrowdFlowerCode()

    def getAllEvents(self):
        event_cursor = self.ei.getAllDocuments()
        events = []
        tmp_events = [e for e in event_cursor]
        for e in tmp_events:
            if len(e['photos']) > 3:
                if random.random() > 0.1:
                    e['_id'] = str(e['_id'])
                    e['urgency'] = 58
                    e['volume'] = 99
                    e['stats'] = {'photos': 50, 'tweets': 0, 'checkins': 0}
                    rep_photos = self.representor.getRepresentivePhotos(e)
                    e['photos'] = rep_photos[:min(5, len(rep_photos))]
                    events.append(e)
        return json.dumps(events)

    getAllEvents.exposed = True

    def _loadCrowdFlowerCode(self):
        lines = open('crowdflower_code.txt').readlines()
        self.cf_code = {}
        for line in lines:
            t = line.split(',')
            self.cf_code[t[0]] = t[1]

    def getCrowdFlowerCode(self, event_id):
        if event_id in self.cf_code:
            return self.cf_code[event_id]
        else:
            return None

    getCrowdFlowerCode.exposed = True

    def getAllEventsIDs(self):
        object_ids = self.ei.getAllDocumentIDs()
        return_value = []
        for _id in object_ids:
            return_value.append(str(_id))
        return json.dumps(return_value)

    #getAllEventsIDs.exposed = True

    def _deleteExtraMeta(self, photo):
        try:
            del photo['comments']
        except Exception as e:
            pass

        try:
            del photo['caption']['from']
        except Exception as e:
            pass
        try:
            del photo['filter']
        except Exception as e:
            pass
        try:
            del photo['user']
        except Exception as e:
            pass
        try:
            del photo['images']['standard_resolution']
        except Exception as e:
            pass
        try:
            del photo['images']['low_resolution']
        except Exception as e:
            pass
        try:
            del photo['likes']
        except Exception as e:
            pass
        try:
            del photo['likes']
        except Exception as e:
            pass
        return photo

    def getPhotosByID(self, event_id):
        event = json.loads(self.getEventByID(event_id))
        #event = EventFrontend(event, self.c)

        #top_words_list = event.getTopKeywordsAndPhotos(20,5)
        #words_pics_list = event.getTopKeywordsAndPhotosByTFIDF(20, 5)
        #keywords_shown = set()

        res = []

        all_photos = []
        top10_photos = []
        all_photos.append('all_photos')
        #print event['photos']
        all_photos.append(len(event['photos']))
        all_photos.append(event['photos'])

        rep_photos = self.representor.getRepresentivePhotos(event)
        rep_photos = rep_photos[:10]
        top10_photos.append('top_10_representative')
        top10_photos.append(min(10, len(rep_photos)))
        top10_photos.append(rep_photos)

        res.append(all_photos)
        res.append(top10_photos)
        """
        for tf, idf in zip(top_words_list,words_pics_list):
            if tf[0] not in keywords_shown:
                keywords_shown.add(tf[0])
                res.append(tf)
            if idf[0] not in keywords_shown:
                keywords_shown.add(idf[0])
                res.append(idf)
        """
        r = json.dumps(res)
        #print r
        #r = json.dumps(words_pics_list + top_words_list)
        return r

    getPhotosByID.exposed = True

    def _cacheAll(self):
        print 'begin cache'
        all_events = self.getAllEvents()
        print type(all_events)
        all_events = json.loads(all_events)
        cnt = 0
        for e in all_events:
            cnt += 1
            if cnt % 100 == 0:
                print cnt
            self.cache_events[e['_id']] = json.dumps(e)
        for e in all_events:
            cnt += 1
            if cnt % 100 == 0:
                print cnt
            self.cache_photos[e['_id']] = self.getPhotosByID(e['_id'])

    def getEventByID(self, event_id):
        event = self.ei.getEventByID(event_id)
        event = Event(event)
        event.selectOnePhotoForOneUser()
        event_dic = event.toJSON()
        event_dic['_id'] = str(event_dic['_id'])
        return json.dumps(event_dic)

    getEventByID.exposed = True

    def getTopKeywords(self, event_id):
        event = self.ei.getEventByID(event_id)
        ef = EventFeature(event)
        words = ef.getTopKeywords(k=10)
        return json.dumps(words)

    #getTopKeywords.exposed = True

    def setLabel(self, event_id, label):
        event = self.ei.getEventByID(str(event_id))
        print 'setting ', event_id, 'label = ', label
        #event['label'] = int(label)
        event['label'] = int(label)
        self.ei.updateDocument(event)
예제 #4
0
class Root:
    def __init__(self):
        self.ei = EventInterface()
        self.ei.setDB('AmazonMT')
        self.ei.setCollection('candidate_event_25by25_merged')
        self.representor = Representor()
        #self.ei.setDB('citybeat')
        #self.ei.setCollection('next_week_candidate_event_25by25_merged')
        #self.ei.setCollection('online_candidate')
        
        self._loadCrowdFlowerCode()


    def getAllEvents(self):
        event_cursor = self.ei.getAllDocuments()
        events = []
        tmp_events = [e for e in event_cursor]
        for e in tmp_events:
            if len(e['photos'])>3:
                if random.random()>0.1:
                    e['_id'] = str(e['_id'])
                    e['urgency'] = 58
                    e['volume'] = 99
                    e['stats'] = {'photos':50, 'tweets':0, 'checkins':0}
                    rep_photos = self.representor.getRepresentivePhotos(e)
                    e['photos'] = rep_photos[:min(5,len(rep_photos))]
                    events.append(e)
        return json.dumps(events)
    getAllEvents.exposed = True 
    
    def _loadCrowdFlowerCode(self):
        lines = open('crowdflower_code.txt').readlines()
        self.cf_code = {}
        for line in lines:
            t = line.split(',')
            self.cf_code[t[0]] = t[1]

    def getCrowdFlowerCode(self, event_id):
        if event_id in self.cf_code:
            return self.cf_code[event_id]
        else:
            return None
    getCrowdFlowerCode.exposed = True

    def getAllEventsIDs(self):
        object_ids = self.ei.getAllDocumentIDs()
        return_value = []
        for _id in object_ids:
            return_value.append( str(_id) )
        return json.dumps( return_value )
    #getAllEventsIDs.exposed = True
    
    def _deleteExtraMeta(self,photo):
        try: del photo['comments']
        except Exception as e: pass

        try: del photo['caption']['from']
        except Exception as e: pass
        try: del photo['filter']
        except Exception as e: pass
        try: del photo['user']
        except Exception as e: pass
        try: del photo['images']['standard_resolution']
        except Exception as e: pass
        try: del photo['images']['low_resolution']
        except Exception as e: pass
        try: del photo['likes']
        except Exception as e: pass
        try: del photo['likes']
        except Exception as e: pass
        return photo

    def getPhotosByID(self, event_id):
        event = json.loads(self.getEventByID(event_id))
        #event = EventFrontend(event, self.c)
            
        #top_words_list = event.getTopKeywordsAndPhotos(20,5)
        #words_pics_list = event.getTopKeywordsAndPhotosByTFIDF(20, 5)
        #keywords_shown = set()
        
        res = []

        all_photos = []
        top10_photos = []
        all_photos.append('all_photos')
        #print event['photos']
        all_photos.append(len(event['photos']))
        all_photos.append( event['photos'])

        rep_photos = self.representor.getRepresentivePhotos(event)
        rep_photos = rep_photos[:10]
        top10_photos.append('top_10_representative')
        top10_photos.append(min(10, len(rep_photos)))
        top10_photos.append(rep_photos)
   
        res.append(all_photos)
        res.append(top10_photos)
        """
        for tf, idf in zip(top_words_list,words_pics_list):
            if tf[0] not in keywords_shown:
                keywords_shown.add(tf[0])
                res.append(tf)
            if idf[0] not in keywords_shown:
                keywords_shown.add(idf[0])
                res.append(idf)
        """ 
        r = json.dumps(res) 
        #print r
        #r = json.dumps(words_pics_list + top_words_list)
        return r
    getPhotosByID.exposed = True
   
    def _cacheAll(self):
        print 'begin cache'
        all_events = self.getAllEvents()
        print type(all_events)
        all_events = json.loads(all_events)
        cnt = 0
        for e in all_events:
            cnt+=1
            if cnt%100 == 0:
                print cnt
            self.cache_events[e['_id']] = json.dumps(e)
        for e in all_events:
            cnt+=1
            if cnt%100 == 0:
                print cnt
            self.cache_photos[e['_id']] = self.getPhotosByID(e['_id'])
          

    def getEventByID(self, event_id):
        event = self.ei.getEventByID(event_id)
        event = Event(event)
        event.selectOnePhotoForOneUser()
        event_dic = event.toJSON()
        event_dic['_id'] = str(event_dic['_id'])
        return json.dumps(event_dic)
    getEventByID.exposed = True
    
    def getTopKeywords(self, event_id):
        event = self.ei.getEventByID(event_id)
        ef = EventFeature(event)
        words = ef.getTopKeywords(k=10)
        return json.dumps(words)
    #getTopKeywords.exposed = True

    def setLabel(self, event_id, label):
        event = self.ei.getEventByID(str(event_id))
        print 'setting ',event_id, 'label = ',label
        #event['label'] = int(label)
        event['label'] = int(label)
        self.ei.updateDocument( event )