Пример #1
0
def mocked_requests(*args, **kwargs):
    class MockResponse:
        def __init__(self, status_code, text):
            self.status_code = status_code
            self.text = text

    url = kwargs['url']
    data = str(kwargs['data'])
    if url == f'{tr_url}/calais':
        print(data)
        if get_correct_keyword(found_keyword) in data:
            return MockResponse(200,
                                '{"http://d.opencalais.com/comphash-1/520f1c53": {"_type": "Company",'
                                '"confidence": {"aggregate": "1.0", "resolution": "1.0"},'
                                '"resolutions": [{"name": "MICROSOFT CORPORATION",'
                                '"permid": "4295907168", "score": 0.9946112}],'
                                '"instances": [{"exact": "Microsoft"}]}}')
        elif get_correct_keyword(not_found_keyword) in data:
            return MockResponse(200, '{}')
        elif get_correct_keyword(unsupported_language_keyword) in data:
            return MockResponse(400, '{which is not currently supported}')
        elif get_correct_keyword(api_error_keyword) in data:
            return MockResponse(429, '{}')

    return MockResponse(None, 404)
    def test_entity_match_when_not_found(self, mock_post):
        result = issue_entity_match_request(not_found_keyword)

        expected_result = default_not_found_result(resolution_algorithm=ResolutionAlgo.ENTITY_MATCH.value,
                                                   keyword=get_correct_keyword(not_found_keyword))
        self.assertEqual(result, expected_result)
        pass
Пример #3
0
    def test_open_calais_when_unsupported_language(self, mock_post):
        response = issue_open_calais_request(keyword_arg=unsupported_language_keyword)
        expected_result = default_not_found_result(resolution_algorithm=ResolutionAlgo.OPEN_CALAIS,
                                                   keyword=get_correct_keyword(unsupported_language_keyword))

        self.assertEqual(response, expected_result)
        pass
def mocked_requests(**kwargs):
    class MockResponse:
        def __init__(self, status_code, text):
            self.status_code = status_code
            self.text = text

    url = kwargs['url']
    data = str(kwargs['data'])
    if url == f'{tr_url}/match':
        if get_correct_keyword(found_keyword) in data:
            return MockResponse(200,
                                '{"outputContentResponse": [{"Match OpenPermID":"' + found_perm_id_url + '", '
                                '"Match OrgName": "' + found_org_name + '","Match Score": "' + found_match_score + '",'
                                '"Match Level": "' + found_match_level + '", "Input_Name": "' + get_correct_keyword(found_keyword) + '"}]}')
        elif get_correct_keyword(not_found_keyword) in data:
            return MockResponse(200,
                                '{"outputContentResponse": [{"Match Level": "' + not_found_match_level +
                                '", "Input_Name": "' + get_correct_keyword(not_found_keyword) + '"}]}')
        elif get_correct_keyword(api_error_keyword) in data:
            return MockResponse(429, '{}')

    return MockResponse(None, 404)
Пример #5
0
def process_redo_data():
    global CORRECTED_COUNT
    dynamo_client = boto3.resource('dynamodb', region_name=AWS_REGION)
    table = dynamo_client.Table(RESOLVED_ENTITY_TABLE_NAME)

    local_list = []
    with open(FILE_NAME) as fp:
        keyword_orig = fp.readline()
        batch_count = 0
        local_count = 0
        while keyword_orig:
            keyword_orig = chomp(keyword_orig)
            local_count = local_count + 1

            if keyword_orig is not None and keyword_orig is not '':
                keyword = get_correct_keyword(keyword_orig)
                local_list.append(f'{local_count},{keyword}')

                ORIG_KEYWORD_DICT[f'{local_count}'] = chomp(keyword_orig)
                EM_KEYWORD_DICT[f'{local_count}'] = keyword

            if local_count % BATCH_SIZE is 0:
                batch_count = batch_count + 1

                if batch_count >= SKIP_BATCHES_UNTIL:
                    __issue_em_request(local_list, table)
                    print(
                        f'Finished processing {batch_count} batches of {BATCH_SIZE} entities each'
                    )

                local_list.clear()

            keyword_orig = fp.readline()

        __issue_em_request(local_list, table)

    print(f'Corrected {CORRECTED_COUNT} entries')
Пример #6
0
def issue_entity_match_request(keyword_arg, max_retry=5):
    keyword = get_correct_keyword(keyword=keyword_arg)
    log.info(f'Issuing request to TR for {keyword}')

    url = f'{get_tr_url()}/match'
    data = f"LocalID,Name\n1,{keyword}"

    retry_count = 0
    result = requests.post(url=url,
                           headers=__get_headers(),
                           data=data.encode('utf-8'))
    while result.status_code == requests.codes.too_many_requests:
        log.error(f'{result.status_code}, {result.text}')

        retry_count += 1
        if retry_count > max_retry:
            log.error(
                f'Max Retry(s) reached for {keyword}, unable to resolve via Entity Match- EM issue!'
            )
            return None
        else:
            log.info(f'Retry {retry_count}/{max_retry}...')
            time.sleep(1)
            result = requests.post(url=url, headers=__get_headers(), data=data)

    log.info(
        f'Got a result for {data} from Entity Match Code : {result.status_code}, Result : {result.text}'
    )
    if result.status_code != requests.codes.ok:
        log.error(f'{result.status_code}, {result.text}')
        return None

    json_result = json.loads(result.text)
    relevant_result = get_val(keyword, json_result['outputContentResponse'][0])

    return relevant_result
Пример #7
0
def issue_open_calais_request(keyword_arg,
                              max_retry=3,
                              do_smart_phrasing=True):
    keyword = get_correct_keyword(keyword=keyword_arg)
    log.info(f'Issuing request to TR (Open Calais) for {keyword}')
    smart_keyword = smart_phrasing(keyword=keyword,
                                   do_smart_phrasing=do_smart_phrasing)

    try:
        url = f'{get_tr_url()}/calais'

        retry_count = 0
        result = requests.post(url=url,
                               headers=__get_headers(),
                               data=smart_keyword.encode('utf-8'))
        while result.status_code != requests.codes.ok:
            log.error(f'{result.status_code}, {result.text}')
            retry_count += 1
            if retry_count > max_retry:
                log.error(
                    f'Max Retry(s) reached for {smart_keyword}, unable to resolve via Open Calais- OC issue!'
                )
                return None
            elif 'which is not currently supported' in result.text or 'Unrecognized-Language' in result.text:
                log.error(
                    f'Unsupported language for {keyword}, unable to resolve via Open Calais- OC Language !'
                )
                return default_not_found_result(
                    resolution_algorithm=ResolutionAlgo.OPEN_CALAIS,
                    keyword=keyword)
            else:
                log.info(f'Retry {retry_count}/{max_retry}...')
                time.sleep(1)
                result = requests.post(url,
                                       headers=__get_headers(),
                                       data=smart_keyword.encode('utf-8'))

        result = json.loads(result.text)
        companies = [
            v for v in result.values()
            if v.get('_type') == 'Company' and v.get('resolutions') is not None
        ]
        vals = [
            _get_val(keyword, r) for r in companies for i in r['instances']
        ]

        log.info(f'Got a result from Open Calais {vals}')
        if vals is None or len(vals) <= 0:
            log.error(
                f'No result in Open Calais for {smart_keyword}- No confident match'
            )
            return default_not_found_result(
                resolution_algorithm=ResolutionAlgo.OPEN_CALAIS,
                keyword=keyword)

        return vals
    except:
        log.error(traceback.format_exc())
        log.error(
            f'OPEN-CALAIS: Unable to successfully issue request to OC for entity resolution for {smart_keyword}'
        )
        return None