def query(): """ Request format: {'account1': {'platform':'xxx', 'account': 'aaa'}, 'account2': {'platform':'yyy', 'account': 'bbb'}} Response format: {'result': 0.123, 'doc_id': '5bea4d3efa3646879'} """ data = json.loads(request.get_data().decode('utf-8')) account1 = data['account1'] account2 = data['account2'] score = query_existing_similarity_in_db(account1, account2) if len(score) == 0: try: info1 = retrieve(account1, mode=REALTIME_MODE) info2 = retrieve(account2, mode=REALTIME_MODE) vector = algoModule.calc(info1, info2, enable_networking=(account1['platform'] == account2['platform']), mode=REALTIME_MODE) doc_id = algoModule.store_result(info1, info2, vector, DATABASE_DATA_AWAIT_FEEDBACK) score = Couch(DATABASE_DATA_AWAIT_FEEDBACK).query({'_id': doc_id}) except Exception as e: logger.error(e) return make_response({'error': True, 'error_message': str(e)}) doc = score[0] doc_id = doc['_id'] vector = doc['vector'] overall_score = OverallSimilarityCalculator().calc(doc) return make_response({'result': vector, 'columns': column_names, 'score': str(overall_score), 'doc_id': doc_id, 'error': False})
def _do_test_case(account1, account2): handler = SimCalculator() info1 = retrieve(account1, BATCH_MODE) info2 = retrieve(account2, BATCH_MODE) info1['platform'] = account1['platform'].lower() info2['platform'] = account2['platform'].lower() vector = handler.calc(info1, info2, enable_networking=False, mode=BATCH_MODE) doc_id = handler.store_result(info1, info2, vector, DATABASE_DATA_AWAIT_FEEDBACK) return doc_id
def test_retrieve_flickr_realtime_in_db(self): account = {'platform': 'Flickr', 'account': 'sakuranyochan'} retrieve(account, REALTIME_MODE) db = Couch('flickr') query_result = db.query({'profile': {'username': '******'}}) db.close() self.assertTrue(len(query_result) > 0) query_result = retrieve(account, REALTIME_MODE) for item in query_result: self.assertTrue('profile' in item.keys() and item['profile']['username'] == 'sakuranyochan')
def test_generate_vector_realtime(self): handler = SimCalculator() account1 = {'platform': 'twitter', 'account': '1angharad_rees'} account2 = {'platform': 'instagram', 'account': 'kaligraphicprint'} info1 = retrieve(account1, REALTIME_MODE) info2 = retrieve(account2, REALTIME_MODE) info1['platform'] = account1['platform'].lower() info2['platform'] = account2['platform'].lower() vector = handler.calc(info1, info2, enable_networking=False, mode=REALTIME_MODE) doc_id = handler.store_result(info1, info2, vector, DATABASE_DATA_AWAIT_FEEDBACK) self.assertIsNotNone(doc_id)
def test_generate_vector_batch(self): handler = SimCalculator() account1 = {'platform': 'twitter', 'account': 'tohtohchan'} account2 = {'platform': 'instagram', 'account': 'tohtohchan'} info1 = retrieve(account1, BATCH_MODE) info2 = retrieve(account2, BATCH_MODE) info1['platform'] = account1['platform'].lower() info2['platform'] = account2['platform'].lower() vector = handler.calc(info1, info2, enable_networking=False, mode=BATCH_MODE) doc_id = handler.store_result(info1, info2, vector, DATABASE_DATA_AWAIT_FEEDBACK) self.assertIsNotNone(doc_id)
def test_retrieve_flickr_batch_in_db(self): account = {'platform': 'Flickr', 'account': 'sakuranyochan'} query_result = retrieve(account, BATCH_MODE) for item in query_result: self.assertTrue('profile' in item.keys() and item['profile']['username'] == 'sakuranyochan') self.assertTrue('posts_content' in item.keys())
def generate(size, positive): dataset = Sampler().getPositiveDataset(size) if positive else Sampler().getNegativeDataset(size) calculator = SimCalculator() for index, sample in enumerate(dataset): account1 = {'platform': 'twitter', 'account': sample['twitter']} account2 = {'platform': 'instagram', 'account': sample['instagram']} try: logger.info('Processing {}-th sample. Account1: {}, Account2: {}.'.format(index, account1, account2)) data1 = retrieve(account1, BATCH_MODE) data2 = retrieve(account2, BATCH_MODE) fetch_result = calculator.fetch_vector(data1, data2, DATABASE_LABELED_DATA) if len(fetch_result) > 0: continue vector = (calculator.calc(data1, data2, enable_networking=False, mode=BATCH_MODE)) vector['label'] = 1 if positive else 0 calculator.store_result(data1, data2, vector, DATABASE_LABELED_DATA) except Exception as ex: logger.error('Error: {}, account1: {}, account2: {}'.format(ex, account1, account2)) continue
def userinfo(): """ Request format: {'platform':'xxx', 'username': '******'} :return: detailed information of the given user. Will take some time if the user is not in the database. returns error message if an error occurs. """ username = request.args.get('username').lower() platform = request.args.get('platform').lower() account = {'platform': platform, 'account': username} logger.info('Querying user {} from {}.'.format(username, platform)) try: account_info = retrieve(account, mode=BATCH_MODE) del account_info['_id'] del account_info['_rev'] del account_info['timestamp'] except Exception as e: logger.error(e) return make_response({'error': True, 'error_message': str(e)}) return make_response(account_info)
def test_retrieve_instagram_posts_not_exist_should_not_parse(self): selector = {'platform': 'Instagram', 'account': 'thedunkstar'} data = retrieve(selector, BATCH_MODE) self.assertTrue('posts_content' not in data.keys())