Пример #1
0
def query():
    """
        Request format:
            {'account1': {'platform':'xxx', 'account': 'aaa'}, 'account2': {'platform':'yyy', 'account': 'bbb'}}
        Response format:
            {'result': 0.123, 'doc_id': '5bea4d3efa3646879'}
    """
    data = json.loads(request.get_data().decode('utf-8'))
    account1 = data['account1']
    account2 = data['account2']
    score = query_existing_similarity_in_db(account1, account2)
    if len(score) == 0:
        try:
            info1 = retrieve(account1, mode=REALTIME_MODE)
            info2 = retrieve(account2, mode=REALTIME_MODE)
            vector = algoModule.calc(info1, info2, enable_networking=(account1['platform'] == account2['platform']),
                                     mode=REALTIME_MODE)
            doc_id = algoModule.store_result(info1, info2, vector, DATABASE_DATA_AWAIT_FEEDBACK)
            score = Couch(DATABASE_DATA_AWAIT_FEEDBACK).query({'_id': doc_id})
        except Exception as e:
            logger.error(e)
            return make_response({'error': True, 'error_message': str(e)})
    doc = score[0]
    doc_id = doc['_id']
    vector = doc['vector']
    overall_score = OverallSimilarityCalculator().calc(doc)
    return make_response({'result': vector, 'columns': column_names,
                          'score': str(overall_score), 'doc_id': doc_id,
                          'error': False})
def _do_test_case(account1, account2):
    handler = SimCalculator()
    info1 = retrieve(account1, BATCH_MODE)
    info2 = retrieve(account2, BATCH_MODE)
    info1['platform'] = account1['platform'].lower()
    info2['platform'] = account2['platform'].lower()
    vector = handler.calc(info1, info2, enable_networking=False, mode=BATCH_MODE)
    doc_id = handler.store_result(info1, info2, vector, DATABASE_DATA_AWAIT_FEEDBACK)
    return doc_id
Пример #3
0
 def test_retrieve_flickr_realtime_in_db(self):
     account = {'platform': 'Flickr', 'account': 'sakuranyochan'}
     retrieve(account, REALTIME_MODE)
     db = Couch('flickr')
     query_result = db.query({'profile': {'username': '******'}})
     db.close()
     self.assertTrue(len(query_result) > 0)
     query_result = retrieve(account, REALTIME_MODE)
     for item in query_result:
         self.assertTrue('profile' in item.keys()
                         and item['profile']['username'] == 'sakuranyochan')
Пример #4
0
 def test_generate_vector_realtime(self):
     handler = SimCalculator()
     account1 = {'platform': 'twitter', 'account': '1angharad_rees'}
     account2 = {'platform': 'instagram', 'account': 'kaligraphicprint'}
     info1 = retrieve(account1, REALTIME_MODE)
     info2 = retrieve(account2, REALTIME_MODE)
     info1['platform'] = account1['platform'].lower()
     info2['platform'] = account2['platform'].lower()
     vector = handler.calc(info1,
                           info2,
                           enable_networking=False,
                           mode=REALTIME_MODE)
     doc_id = handler.store_result(info1, info2, vector,
                                   DATABASE_DATA_AWAIT_FEEDBACK)
     self.assertIsNotNone(doc_id)
Пример #5
0
 def test_generate_vector_batch(self):
     handler = SimCalculator()
     account1 = {'platform': 'twitter', 'account': 'tohtohchan'}
     account2 = {'platform': 'instagram', 'account': 'tohtohchan'}
     info1 = retrieve(account1, BATCH_MODE)
     info2 = retrieve(account2, BATCH_MODE)
     info1['platform'] = account1['platform'].lower()
     info2['platform'] = account2['platform'].lower()
     vector = handler.calc(info1,
                           info2,
                           enable_networking=False,
                           mode=BATCH_MODE)
     doc_id = handler.store_result(info1, info2, vector,
                                   DATABASE_DATA_AWAIT_FEEDBACK)
     self.assertIsNotNone(doc_id)
Пример #6
0
 def test_retrieve_flickr_batch_in_db(self):
     account = {'platform': 'Flickr', 'account': 'sakuranyochan'}
     query_result = retrieve(account, BATCH_MODE)
     for item in query_result:
         self.assertTrue('profile' in item.keys()
                         and item['profile']['username'] == 'sakuranyochan')
         self.assertTrue('posts_content' in item.keys())
Пример #7
0
def generate(size, positive):
    dataset = Sampler().getPositiveDataset(size) if positive else Sampler().getNegativeDataset(size)
    calculator = SimCalculator()
    for index, sample in enumerate(dataset):
        account1 = {'platform': 'twitter', 'account': sample['twitter']}
        account2 = {'platform': 'instagram', 'account': sample['instagram']}
        try:
            logger.info('Processing {}-th sample. Account1: {}, Account2: {}.'.format(index, account1, account2))
            data1 = retrieve(account1, BATCH_MODE)
            data2 = retrieve(account2, BATCH_MODE)
            fetch_result = calculator.fetch_vector(data1, data2, DATABASE_LABELED_DATA)
            if len(fetch_result) > 0:
                continue
            vector = (calculator.calc(data1, data2, enable_networking=False, mode=BATCH_MODE))
            vector['label'] = 1 if positive else 0
            calculator.store_result(data1, data2, vector, DATABASE_LABELED_DATA)
        except Exception as ex:
            logger.error('Error: {}, account1: {}, account2: {}'.format(ex, account1, account2))
            continue
Пример #8
0
def userinfo():
    """
    Request format:
        {'platform':'xxx', 'username': '******'}
    :return: detailed information of the given user. Will take some time if the user is not in the database.
             returns error message if an error occurs.
    """
    username = request.args.get('username').lower()
    platform = request.args.get('platform').lower()
    account = {'platform': platform, 'account': username}
    logger.info('Querying user {} from {}.'.format(username, platform))
    try:
        account_info = retrieve(account, mode=BATCH_MODE)
        del account_info['_id']
        del account_info['_rev']
        del account_info['timestamp']
    except Exception as e:
        logger.error(e)
        return make_response({'error': True, 'error_message': str(e)})
    return make_response(account_info)
Пример #9
0
 def test_retrieve_instagram_posts_not_exist_should_not_parse(self):
     selector = {'platform': 'Instagram', 'account': 'thedunkstar'}
     data = retrieve(selector, BATCH_MODE)
     self.assertTrue('posts_content' not in data.keys())