def check_ip(ip_info, port_info, type):
    check_url = "https://bck.hermes.com/product-page?locale=us_en&productsku=H056289CC18"
    ip_url = "%s://%s:%s" % (type, ip_info, port_info)
    manager = ProxyManager(ip_url,
                           timeout=10,
                           cert_reqs='CERT_REQUIRED',
                           ca_certs=certifi.where())
    headers = util.make_headers(
        accept_encoding='gzip, deflate',
        keep_alive=True,
        user_agent=
        "Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:47.0) Gecko/20100101 Firefox/47.0"
    )
    headers['Accept-Language'] = "en-US,en;q=0.5"
    headers['Connection'] = 'keep-alive'
    headers[
        'Accept'] = "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8"
    try:
        response = manager.request('GET',
                                   check_url,
                                   preload_content=False,
                                   headers=headers)
        res = response.data
        print(res)
        json.loads(res)
        return True
    except Exception as ex:
        return False
Пример #2
0
def configure_http_pool():

    global gl_http_pool

    if gl_args.mode == 'auto-scan' or gl_args.mode == 'file-scan':
        timeout = Timeout(connect=1.0, read=3.0)
    else:
        timeout = Timeout(connect=gl_args.timeout, read=6.0)

    if gl_args.proxy:
        # when using proxy, protocol should be informed
        if 'http' not in gl_args.host or 'http' not in gl_args.proxy:
            print_and_flush(RED + " * When using proxy, you must specify the http or https protocol"
                        " (eg. http://%s).\n\n" %(gl_args.host if 'http' not in gl_args.host else gl_args.proxy) +ENDC)
            logging.critical('Protocol not specified')
            exit(1)

        try:
            if gl_args.proxy_cred:
                headers = make_headers(proxy_basic_auth=gl_args.proxy_cred)
                gl_http_pool = ProxyManager(proxy_url=gl_args.proxy, proxy_headers=headers, timeout=timeout, cert_reqs='CERT_NONE')
            else:
                gl_http_pool = ProxyManager(proxy_url=gl_args.proxy, timeout=timeout, cert_reqs='CERT_NONE')
        except:
            print_and_flush(RED + " * An error occurred while setting the proxy. Please see log for details..\n\n" +ENDC)
            logging.critical('Error while setting the proxy', exc_info=traceback)
            exit(1)
    else:
        gl_http_pool = PoolManager(timeout=timeout, cert_reqs='CERT_NONE')
Пример #3
0
def get_web_page(url: str, proxies: list = None):

    headers = {
        'User-agent':
        "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.102 Safari/537.36 Edge/18.19582",
    }

    if proxies:
        import numpy as np
        status = 0
        i = 0
        while status != 220 and i < 10:
            try:
                proxy_url = np.random.choice(proxies)
                print(f"Proxy: {proxy_url}")
                proxy_url = "https://" + url.strip()
                http = ProxyManager(proxy_url=proxy_url,
                                    headers=headers,
                                    cert_reqs='CERT_NONE',
                                    assert_hostname=False)
                resp = http.request('GET', url)
                status = resp.status
                print(status)
            except:
                i += 1

    else:
        http = PoolManager(headers=headers,
                           cert_reqs='CERT_NONE',
                           assert_hostname=False)
        resp = http.request('GET', url)

    return resp.status, resp.data.decode('utf-8')
class NCBI_Authetication():
    def __init__(self):
        self.authenticate()

    def authenticate(self):
        self.base_url = "http://eutils.ncbi.nlm.nih.gov/entrez/eutils/"
        self.my_query = "PD-1%20ab%20agonist%5BTitle%2FAbstract%5D)%20AND%20(%222000%2F01%2F01%22%5BDate%20-%20Publication%5D%20%3A%20%223000%22%5BDate%20-%20Publication%5D"
        self.database = "pubmed"
        self.second_url = "esearch.fcgi?db={db}&term={query}&usehistory=y"
        self.final_url = self.base_url + self.second_url.format(
            db=self.database, query=self.my_query)
        self.http = ProxyManager("http://proxy.gtm.lilly.com:9000/")
        self.response = self.http.request('GET', self.final_url)
        self.http = ProxyManager("http://proxy.gtm.lilly.com:9000/")
        self.firstResponse = self.http.request('GET', self.final_url)

        self.base_url = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/"
        self.my_query = "id=29554659"
        self.database = "pubmed"
        self.second_url = "elink.fcgi?dbfrom=gene&db={db}&{query}"
        self.final_url = self.base_url + self.second_url.format(
            db=self.database, query=self.my_query)
        self.http = ProxyManager("http://proxy.gtm.lilly.com:9000/")
        self.secondResponse = self.http.request('GET', self.final_url)

    def get_response(self):
        return self.firstResponse, self.secondResponse
Пример #5
0
 def __init__(self):
     user_agent = 'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.9.0.7) Gecko/2009021910 Firefox/3.0.7'
     self.headers = {'User-Agent': user_agent}
     self.ip_url = 'http://icanhazip.com/'
     self.logger = logging.getLogger('gkp')
     retries = Retry(connect=5, read=5, redirect=5)
     self.agent = ProxyManager('http://localhost:8118/',
                               retries=retries,
                               timeout=Timeout(total=60.0))
Пример #6
0
 def request(self):
     QtWidgets.qApp.processEvents()
     self.proxi()
     print(self.stroka2)
     self.prm = ProxyManager(str(self.stroka2))
     print(self.stroka2)
     try:
         QtWidgets.qApp.processEvents()
         r = self.prm.request('GET', 'https://www.yandex.ru/')
     except:
         return False
     return True
Пример #7
0
    def request(self):
        req = self._request

        if req.proxy:
            if req.proxy_userpwd:
                headers = make_headers(proxy_basic_auth=req.proxy_userpwd)
            else:
                headers = None
            proxy_url = '%s://%s' % (req.proxy_type, req.proxy)
            try:
                pool = ProxyManager(proxy_url, proxy_headers=headers)
            except ProxySchemeUnknown:
                raise GrabMisuseError('Urllib3 transport does '
                                      'not support %s proxies' %
                                      req.proxy_type)
        else:
            pool = self.pool
        try:
            retry = Retry(redirect=False, connect=False, read=False)
            # The read timeout is not total response time timeout
            # It is the timeout on read of next data chunk from the server
            # Total response timeout is handled by Grab
            timeout = Timeout(connect=req.connect_timeout, read=req.timeout)
            #req_headers = dict((make_unicode(x), make_unicode(y))
            #                   for (x, y) in req.headers.items())
            if six.PY3:
                req_url = make_unicode(req.url)
                req_method = make_unicode(req.method)
            else:
                req_url = make_str(req.url)
                req_method = req.method
            req.op_started = time.time()
            res = pool.urlopen(req_method,
                               req_url,
                               body=req.data,
                               timeout=timeout,
                               retries=retry,
                               headers=req.headers,
                               preload_content=False)
        except exceptions.ReadTimeoutError as ex:
            raise error.GrabTimeoutError('Read timeout')
        except exceptions.ConnectTimeoutError as ex:
            raise error.GrabConnectionError('Could not create connection')
        except exceptions.ProtocolError as ex:
            raise error.GrabConnectionError(ex.args[1][0], ex.args[1][1])

        # WTF?
        self.request_head = b''
        self.request_body = b''
        self.request_log = b''

        self._response = res
Пример #8
0
def run_proxy(url):
	global lock
	#print(lock.locked())
	if(prox == ''):
		print('No proxys available.')
		return run(url)	
	print('Proxy: ' + prox)
	http = ProxyManager(prox)	
	try:
		data = {'attribute': 'value'}
		encoded_data = json.dumps(data).encode('utf-8')
		req = http.request(
		'POST',
		url,
		timeout = 3,
		body=encoded_data,
		headers={'Content-Type': 'html/text'})
		print(req.status)
		if(req.status == 404):
			print('Item Does not exist.')
			#return run(url)
			return
		if(req.status == 501):
			print('Proxy at api call limit')
			get_new_proxy()
			return run_proxy(url)
		if(req.status == 407):
			print('Authentication required')
			get_new_proxy()
			return run_proxy(url)
		if(req.status != 200):
			print('Unknown Status Code')
			print(req.status)
			get_new_proxy()
			return run_proxy(url)
	except:
		print('Request timed out.')
		get_new_proxy()
		return run(url)
	
	
	data = json.loads(req.data)
	req.release_conn()
	
	data = data['item']
	id = str(data['id'])
	print('ID: ' + id)
	file = open('ItemIds','a')
	file.write(id  + '\n')
	file.close()
Пример #9
0
 def get_pool(self, req):
     if req['proxy']:
         if req['proxy_auth']:
             proxy_headers = make_headers(
                 proxy_basic_auth=req['proxy_auth'])
         else:
             proxy_headers = None
         proxy_url = '%s://%s' % (req['proxy_type'], req['proxy'])
         pool_key = (req['proxy_type'], req['proxy'], bool(req['verify']))
         if pool_key not in self.pools:
             if req['proxy_type'] == 'socks5':
                 opts = {
                     #num_pools=1000,
                     #maxsize=10,
                 }
                 if req['verify']:
                     pool = SOCKSProxyManager(proxy_url,
                                              cert_reqs='CERT_REQUIRED',
                                              ca_certs=certifi.where(),
                                              **opts)
                 else:
                     pool = SOCKSProxyManager(proxy_url, **opts)
             elif req['proxy_type'] == 'http':
                 opts = {
                     #num_pools=1000,
                     #maxsize=10,
                 }
                 if req['verify']:
                     pool = ProxyManager(
                         proxy_url,
                         proxy_headers=proxy_headers,
                         cert_reqs='CERT_REQUIRED',
                         ca_certs=certifi.where(),
                         **opts,
                     )
                 else:
                     pool = ProxyManager(proxy_url,
                                         proxy_headers=proxy_headers,
                                         **opts)
             else:
                 raise IowebConfigError(
                     'Invalid value of request option `proxy_type`: %s' %
                     req['proxy_type'])
             self.pools[pool_key] = pool
         else:
             pool = self.pools[pool_key]
     else:
         pool = self.pools[(None, None, bool(req['verify']))]
     return pool
Пример #10
0
    def __proxy_pool(self):
        """
        Create Proxy connection pool
        :raise ProxyRequestError
        :return: urllib3.HTTPConnectionPool
        """

        try:

            self.__server = self.__cfg.proxy if True is self.__cfg.is_standalone_proxy else self.__get_random_proxy()

            if self.__get_proxy_type(self.__server) == 'socks':

                disable_warnings(InsecureRequestWarning)

                if not hasattr(self, '__pm'):

                    package_module = importlib.import_module('urllib3.contrib.socks')
                    self.__pm = getattr(package_module, 'SOCKSProxyManager')

                pool = self.__pm(self.__server,
                                 num_pools=self.__cfg.threads,
                                 timeout=Timeout(self.__cfg.timeout,
                                 read=self.__cfg.timeout),
                                 block=True)
            else:
                pool = ProxyManager(self.__server,
                                    num_pools=self.__cfg.threads,
                                    timeout=Timeout(self.__cfg.timeout, read=self.__cfg.timeout),
                                    block=True)
            return pool
        except (DependencyWarning, ProxySchemeUnknown, ImportError) as error:
            raise ProxyRequestError(error)
Пример #11
0
def check_stock_proxy_manager(url, proxy=None, count=0):
    if proxy is None:
        manager = PoolManager(timeout=5,
                              cert_reqs='CERT_REQUIRED',
                              ca_certs=certifi.where())
    else:
        proxy_url = "%s://%s:%s" % (proxy[0], proxy[1], proxy[2])
        manager = ProxyManager(proxy_url,
                               timeout=5,
                               cert_reqs='CERT_REQUIRED',
                               ca_certs=certifi.where())
    headers = util.make_headers(accept_encoding='gzip, deflate',
                                keep_alive=True,
                                user_agent="Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:47.0) Gecko/20100101 Firefox/47.0")
    headers['Accept-Language'] = "en-US,en;q=0.5"
    headers['Connection'] = 'keep-alive'
    headers['Accept'] = "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8"
    try:
        response = manager.request('GET',
                                   url,
                                   preload_content=False,
                                   headers=headers)
        content = json.loads(response.data)
        print("%s - Connect Success!" % count)
        return content['hasStock']
    except Exception as ex:
        print("%s - Connect Error!" % count)
        return False
Пример #12
0
 def __init__(self):
     user_agent = 'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.9.0.7) Gecko/2009021910 Firefox/3.0.7'
     self.headers = {'User-Agent': user_agent}
     self.ip_url = 'http://icanhazip.com/'
     retries = Retry(connect=5, read=25, redirect=5)
     self.agent = ProxyManager(
         'http://localhost:8118/', retries=retries, timeout=Timeout(total=60.0))
Пример #13
0
    def _init_connection(self):
        """Function for initiating connection with remote server"""
        cert_reqs = 'CERT_NONE'
        if self._connection_properties.get('ca_cert_data'):
            LOGGER.info('Using CA cert to confirm identity.')
            cert_reqs = 'CERT_REQUIRED'
            self._connection_properties.update(
                self._connection_properties.pop('ca_cert_data'))

        if self.proxy:
            if self.proxy.startswith('socks'):
                LOGGER.info("Initializing a SOCKS proxy.")
                http = SOCKSProxyManager(self.proxy, cert_reqs=cert_reqs, maxsize=6, \
                                                                **self._connection_properties)
            else:
                LOGGER.info("Initializing a HTTP proxy.")
                http = ProxyManager(self.proxy, cert_reqs=cert_reqs, maxsize=6, \
                                    **self._connection_properties)
        else:
            LOGGER.info("Initializing no proxy.")
            try:
                self._connection_properties.pop('ca_cert_data')
            except KeyError:
                pass
            http = PoolManager(cert_reqs=cert_reqs,
                               maxsize=6,
                               **self._connection_properties)

        self._conn = http.request
    def _scrape_market(self, app_id):
        scrape_url = APPLE_APP_URL.format(app_id=app_id)
        header = {'content-type': 'text/html',
                  'User-Agent': user_agents[random.randint(0, len(user_agents)-1)]}
        try:
            response = self.connection_pool.request('GET', scrape_url, timeout=60, retries=2, headers=header)
            if response:
                content = response.data
                if len(content) > REJECT_PAGE_SIZE:
                    if len(content) > NORMAL_APP_PAGE_SIZE:
                        self.proxy_service.manage(self.proxy, False)
                        print 'Succeed scrape app', app_id
                        logger.info('Succeed scrape app {}'.format(app_id))
                        return content
                    else:
                        print 'Invalid app', app_id
                        logger.info('Invalid app {}'.format(app_id))
                else:
                    logger.info('Reject visit app {}, use proxy {}'.format(app_id, self.proxy))
                    raise Exception('Reject visit app {}'.format(app_id))
            else:
                raise Exception('Response is None')

        except Exception as ex:
            self.proxy_service.manage(self.proxy, True)
            self.proxy = self.proxy_service.get_proxy('https')
            self.connection_pool = ProxyManager(self.proxy['https']) if self.proxy else PoolManager()
            raise ex
Пример #15
0
 def get_pool(self, req, use_cache=True):
     if req['proxy']:
         if req['proxy_type'] == 'socks5' and req['proxy_auth']:
             proxy_url = '%s://%s@%s' % (req['proxy_type'],
                                         req['proxy_auth'], req['proxy'])
         else:
             proxy_url = '%s://%s' % (req['proxy_type'], req['proxy'])
         pool_key = (req['proxy_type'], req['proxy'], bool(req['verify']))
         if not use_cache or pool_key not in self.pools:
             if req['proxy_type'] == 'socks5':
                 if req['verify']:
                     pool = SOCKSProxyManager(
                         proxy_url,
                         cert_reqs='CERT_REQUIRED',
                         ca_certs=certifi.where(),
                     )
                 else:
                     pool = SOCKSProxyManager(proxy_url)
             elif req['proxy_type'] == 'http':
                 if req['proxy_auth']:
                     proxy_headers = make_headers(
                         proxy_basic_auth=req['proxy_auth'])
                 else:
                     proxy_headers = None
                 if req['verify']:
                     pool = ProxyManager(
                         proxy_url,
                         proxy_headers=proxy_headers,
                         cert_reqs='CERT_REQUIRED',
                         ca_certs=certifi.where(),
                     )
                 else:
                     pool = ProxyManager(
                         proxy_url,
                         proxy_headers=proxy_headers,
                     )
             else:
                 raise error.IowebConfigError(
                     'Invalid value of request option `proxy_type`: %s' %
                     req['proxy_type'])
             if use_cache:
                 self.pools[pool_key] = pool
         else:
             pool = self.pools[pool_key]
     else:
         pool = self.pools[(None, None, bool(req['verify']))]
     return pool
    def get_uids(self, term):

        base_url = "https://www.ncbi.nlm.nih.gov/medgen/?term="
        term = term.replace(" ", "+")
        final_url = base_url + term
        http = urllib3.PoolManager()
        http = ProxyManager("http://proxy.gtm.lilly.com:9000/")
        response = http.request('GET', final_url)
        soup = BeautifulSoup(response.data, 'lxml')

        pattern = "<dd>[0-9]*</dd>"
        p = re.compile(pattern)
        ids = p.findall(str(soup))
        ids = [
            id.replace("<dd>", "").replace("</dd>", "").strip() for id in ids
        ]
        return ids
Пример #17
0
    def request(self):
        req = self._request

        if req.proxy:
            if req.proxy_userpwd:
                headers = make_headers(proxy_basic_auth=req.proxy_userpwd)
            else:
                headers = None
            proxy_url = '%s://%s' % (req.proxy_type, req.proxy)
            try:
                pool = ProxyManager(proxy_url, proxy_headers=headers)
            except ProxySchemeUnknown:
                raise GrabMisuseError('Urllib3 transport does '
                                      'not support %s proxies' % req.proxy_type)
        else:
            pool = self.pool
        try:
            retry = Retry(redirect=False, connect=False, read=False)
            timeout = Timeout(connect=req.connect_timeout,
                              read=req.timeout)
            #req_headers = dict((make_unicode(x), make_unicode(y))
            #                   for (x, y) in req.headers.items())
            if six.PY3:
                req_url = make_unicode(req.url)
                req_method = make_unicode(req.method)
            else:
                req_url = make_str(req.url)
                req_method = req.method
            res = pool.urlopen(req_method,
                               req_url,
                               body=req.data, timeout=timeout,
                               retries=retry, headers=req.headers,
                               preload_content=False)
        except exceptions.ConnectTimeoutError as ex:
            raise error.GrabConnectionError('Could not create connection')
        except exceptions.ProtocolError as ex:
            raise error.GrabConnectionError(ex.args[1][0], ex.args[1][1])

        # WTF?
        self.request_head = ''
        self.request_body = ''
        self.request_log = ''

        self._response = res
Пример #18
0
    def request(self):
        req = self._request

        if req.proxy:
            if req.proxy_userpwd:
                auth = "%s@" % req.proxy_userpwd
            else:
                auth = ""
            proxy_url = "%s://%s%s" % (req.proxy_type, auth, req.proxy)
            pool = ProxyManager(proxy_url)
        else:
            pool = self.pool
        try:
            retry = Retry(redirect=False, connect=False, read=False)
            timeout = Timeout(connect=req.connect_timeout, read=req.timeout)
            # req_headers = dict((make_unicode(x), make_unicode(y))
            #                   for (x, y) in req.headers.items())
            if six.PY3:
                req_url = make_unicode(req.url)
                req_method = make_unicode(req.method)
            else:
                req_url = make_str(req.url)
                req_method = req.method
            res = pool.urlopen(
                req_method,
                req_url,
                body=req.data,
                timeout=timeout,
                retries=retry,
                headers=req.headers,
                preload_content=False,
            )
        except exceptions.ConnectTimeoutError as ex:
            raise error.GrabConnectionError("Could not create connection")
        except exceptions.ProtocolError as ex:
            raise error.GrabConnectionError(ex.args[1][0], ex.args[1][1])

        # WTF?
        self.request_head = ""
        self.request_body = ""
        self.request_log = ""

        self._response = res
Пример #19
0
    def _check(self, protocol, proxy_url_set):
        valid_proxy_url_set = set()
        for url in proxy_url_set:
            header = {'content-type': 'text/html',
                      'User-Agent': user_agents[random.randint(0, len(user_agents)-1)]}
            proxy = {protocol: url}
            conection_pool = ProxyManager(url)
            try:
                response = conection_pool.request('GET', CHECK_URL[protocol], timeout=60, headers=header)
                if response.status == 200:
                    valid_proxy_url_set.add(url)
                    print 'Valid proxy url', url
                else:
                    print 'Invalid ', url
            except Exception as ex:
                print ex
                print 'Invalid ', url

        return valid_proxy_url_set
Пример #20
0
    def request(self):
        req = self._request

        if req.proxy:
            if req.proxy_userpwd:
                headers = make_headers(proxy_basic_auth=req.proxy_userpwd)
            else:
                headers = None
            proxy_url = '%s://%s' % (req.proxy_type, req.proxy)
            if req.proxy_type == 'socks5':
                pool = SOCKSProxyManager(proxy_url)  # , proxy_headers=headers)
            else:
                pool = ProxyManager(proxy_url, proxy_headers=headers)
        else:
            pool = self.pool
        try:
            retry = Retry(redirect=False, connect=False, read=False)
            # The read timeout is not total response time timeout
            # It is the timeout on read of next data chunk from the server
            # Total response timeout is handled by Grab
            timeout = Timeout(connect=req.connect_timeout, read=req.timeout)
            #req_headers = dict((make_unicode(x), make_unicode(y))
            #                   for (x, y) in req.headers.items())
            if six.PY3:
                req_url = make_unicode(req.url)
                req_method = make_unicode(req.method)
            else:
                req_url = make_str(req.url)
                req_method = req.method
            req.op_started = time.time()
            res = pool.urlopen(req_method,
                               req_url,
                               body=req.data,
                               timeout=timeout,
                               retries=retry,
                               headers=req.headers,
                               preload_content=False)
        except exceptions.ReadTimeoutError as ex:
            raise error.GrabTimeoutError('ReadTimeoutError', ex)
        except exceptions.ConnectTimeoutError as ex:
            raise error.GrabConnectionError('ConnectTimeoutError', ex)
        except exceptions.ProtocolError as ex:
            # TODO:
            # the code
            # raise error.GrabConnectionError(ex.args[1][0], ex.args[1][1])
            # fails
            # with error TypeError: 'OSError' object is not subscriptable
            raise error.GrabConnectionError('ProtocolError', ex)

        # WTF?
        self.request_head = b''
        self.request_body = b''
        self.request_log = b''

        self._response = res
Пример #21
0
class TorUtility():

    def __init__(self):
        user_agent = 'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.9.0.7) Gecko/2009021910 Firefox/3.0.7'
        self.headers = {'User-Agent': user_agent}
        self.ip_url = 'http://icanhazip.com/'
        retries = Retry(connect=5, read=25, redirect=5)
        self.agent = ProxyManager(
            'http://localhost:8118/', retries=retries, timeout=Timeout(total=60.0))

    def renewTorIdentity(self, passAuth):
        try:
            s = socket.socket()
            s.connect(('localhost', 9051))
            s.send('AUTHENTICATE "{0}"\r\n'.format(passAuth))
            resp = s.recv(1024)

            if resp.startswith('250'):
                s.send("signal NEWNYM\r\n")
                resp = s.recv(1024)

                if resp.startswith('250'):
                    logger.info("Identity renewed")
                else:
                    logger.info("response 2:%s" % resp)

            else:
                logger.info("response 1:%s" % resp)

        except Exception as e:
            logger.error("Can't renew identity: %s" % e)

    def renew_connection(self):
        with Controller.from_port(port=9051) as controller:
            controller.authenticate('natalie')
            controller.signal(Signal.NEWNYM)

        logger.info('*' * 50)
        logger.info('\t' * 6 + 'Renew TOR IP: %s' %
                         self.request(self.ip_url))
        logger.info('*' * 50)

    def request(self, url):
        r = self.agent.request('GET', url)
        if r.status == 200:
            return r.data
        elif r.status == 403:
            self.renew_connection()
        else:
            logger.error('status %s' % r.status)
        return ''

    def current_ip(self):
        return self.request(self.ip_url)
    def get_uids(self, term):

        # Base Query and More Proxy Management #
        base_url = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/"
        term = self.preprocess(term).replace(" ", "+")
        second_url = "esearch.fcgi?db={db}&term={query}&retmax=100&format=json"
        final_url = base_url + second_url.format(db=self.ontology, query=term)
        http = urllib3.PoolManager()
        http = ProxyManager("http://proxy.gtm.lilly.com:9000/")
        t.sleep(1)

        # Response data #
        response = http.request('GET', final_url)
        json_data = json.loads(response.data)

        # Updates number of search results #
        self.get_counts(int(json_data['esearchresult']['count']))

        # Returns ID List #
        return json_data['esearchresult']['idlist']
Пример #23
0
    def request(self):
        req = self._request

        if req.proxy:
            if req.proxy_userpwd:
                auth = '%s@' % req.proxy_userpwd
            else:
                auth = ''
            proxy_url = '%s://%s%s' % (req.proxy_type, auth, req.proxy)
            pool = ProxyManager(proxy_url)
        else:
            pool = self.pool
        try:
            retry = Retry(redirect=False, connect=False, read=False)
            timeout = Timeout(connect=req.connect_timeout, read=req.timeout)
            #req_headers = dict((make_unicode(x), make_unicode(y))
            #                   for (x, y) in req.headers.items())
            if six.PY3:
                req_url = make_unicode(req.url)
                req_method = make_unicode(req.method)
            else:
                req_url = make_str(req.url)
                req_method = req.method
            res = pool.urlopen(req_method,
                               req_url,
                               body=req.data,
                               timeout=timeout,
                               retries=retry,
                               headers=req.headers,
                               preload_content=False)
        except exceptions.ConnectTimeoutError as ex:
            raise error.GrabConnectionError('Could not create connection')
        except exceptions.ProtocolError as ex:
            raise error.GrabConnectionError(ex.args[1][0], ex.args[1][1])

        # WTF?
        self.request_head = ''
        self.request_body = ''
        self.request_log = ''

        self._response = res
Пример #24
0
    def __init_connection(self, url=None, proxy=False):
        """Function for initiating connection with remote server

        :param url: The URL of the remote system
        :type url: str

        """

        self.__url = url if url else self.__url
        http = ProxyManager(self.get_proxy(), cert_reqs='CERT_NONE') if self.get_proxy()\
                         and proxy else urllib3.PoolManager(cert_reqs='CERT_NONE')
        self._conn = http.request
Пример #25
0
def get_internet_item(url, html=True):
    """ get html or data from given url

    :param url: target site url string
    :param html: download html or data boolean
    :return: html string
    """

    if PROXY_MODE == "http_proxy":
        http = ProxyManager(proxy_url=PROXY_URL_PORT)

    elif PROXY_MODE == "auth_proxy":
        auth_proxy_headers = make_headers(proxy_basic_auth=PROXY_BASIC_AUTH)
        http = ProxyManager(proxy_url=PROXY_URL_PORT,
                            proxy_headers=auth_proxy_headers,
                            cert_reqs="CERT_REQUIRED",
                            ca_certs=certifi.where())

    else:
        http = PoolManager(cert_reqs="CERT_REQUIRED",
                           ca_certs=certifi.where())

    r = http.request("GET", url)

    if r.status != 200:
        raise ConnectionError("http request failure")

    if html:
        data = r.data.decode()

    else:
        data = r.data

    return data
Пример #26
0
class TorUtility():
    def __init__(self):
        user_agent = 'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.9.0.7) Gecko/2009021910 Firefox/3.0.7'
        self.headers = {'User-Agent': user_agent}
        self.ip_url = 'http://icanhazip.com/'
        self.logger = logging.getLogger('gkp')
        retries = Retry(connect=5, read=5, redirect=5)
        self.agent = ProxyManager('http://localhost:8118/',
                                  retries=retries,
                                  timeout=Timeout(total=60.0))

    def renewTorIdentity(self, passAuth):
        try:
            s = socket.socket()
            s.connect(('localhost', 9051))
            s.send('AUTHENTICATE "{0}"\r\n'.format(passAuth))
            resp = s.recv(1024)

            if resp.startswith('250'):
                s.send("signal NEWNYM\r\n")
                resp = s.recv(1024)

                if resp.startswith('250'):
                    self.logger.info("Identity renewed")
                else:
                    self.logger.info("response 2:%s" % resp)

            else:
                self.logger.info("response 1:%s" % resp)

        except Exception as e:
            self.logger.error("Can't renew identity: %s" % e)

    def renew_connection(self):
        with Controller.from_port(port=9051) as controller:
            controller.authenticate('natalie')
            controller.signal(Signal.NEWNYM)

        self.logger.info('*' * 50)
        self.logger.info('\t' * 6 +
                         'Renew TOR IP: %s' % self.request(self.ip_url))
        self.logger.info('*' * 50)

    def request(self, url):
        r = self.agent.request('GET', url)
        if r.status == 200: return r.data
        elif r.status == 403: self.renew_connection()
        else: self.logger.error('status %s' % r.status)
        return ''

    def current_ip(self):
        return self.request(self.ip_url)
Пример #27
0
def get_http_connector(conf, options):
    """
    Used to create http connector, depends on api_proxy configuration parameter

    :param conf: configuration object
    :param options: additional options

    :return: ProxyManager if api_proxy is set, otherwise PoolManager object
    """
    if conf.api_proxy:
        return ProxyManager(conf.api_proxy, **options)
    else:
        return PoolManager(**options)
Пример #28
0
    def urllib3_from_pool(self, http_request):
        """
        Get a u3 pool from url and request
        :param http_request: HttpRequest
        :type http_request: HttpRequest
        :return urllib3.poolmanager.ProxyManager
        :rtype urllib3.poolmanager.ProxyManager
        """

        if not http_request.http_proxy_host:
            SolBase.sleep(0)
            return self._u3_basic_pool

        # Compute key
        key = "{0}#{1}#".format(
            http_request.http_proxy_host,
            http_request.http_proxy_port,
        )

        # Check
        if key in self._u3_proxy_pool:
            SolBase.sleep(0)
            return self._u3_proxy_pool[key]

        # Allocate (in lock)
        with self._u3_proxy_locker:
            # Check maxed
            if len(self._u3_proxy_pool) >= self._u3_proxy_pool_max:
                raise Exception("u3 pool maxed, cur={0}, max={1}".format(
                    len(self._u3_proxy_pool), self._u3_proxy_pool_max
                ))

            # Uri
            proxy_url = "http://{0}:{1}".format(
                http_request.http_proxy_host,
                http_request.http_proxy_port)

            # Ok, allocate
            # Force underlying fifo queue to 1024 via maxsize
            p = ProxyManager(num_pools=1024, maxsize=1024, proxy_url=proxy_url)
            self._u3_proxy_pool[key] = p
            logger.info("Started new pool for key=%s", key)
            SolBase.sleep(0)
            return p
Пример #29
0
class Downloader:
    def __init__(self, proxy_list):
        self.__proxyCounter = 0
        self.__proxyList = proxy_list
        self.__http = ProxyManager("http://" + self.__proxyList[self.__proxyCounter])

    def try_download(self, url, tries=0):
        try:
            r = self.__http.request('GET', url)
        except:
            if tries > 2:
                print("To many tries, updating proxy...")
                self.update_proxy()
                r = self.try_download(url)
            else:
                print("Error while downloading from \'%s\'. Trying again in 3 secs... [%d]" % (url, tries + 1))
                time.sleep(3)
                r = self.try_download(url, tries + 1)
        return r

    def update_proxy(self):
        self.__proxyCounter += 1
        if self.__proxyCounter >= len(proxyList):
            self.__proxyCounter = 0
        self.__http = ProxyManager("http://" + self.__proxyList[self.__proxyCounter])

    def download_to_file(self, url, file_adress, tries=0):
        print("Start downloading from: '{0}'".format(url))
        r = self.try_download(url)
        if r.status == 200:
            print("Downloaded. Saving to '{0}'".format(file_adress))
            f = open(file_adress, 'wb')
            f.write(r.data)
            f.close()
        elif r.status // 100 == 5:
            print("Something wrong with server (%s). Waiting 2 secs and trying again... [%d]" % (r.status, tries + 1))
            time.sleep(2)
            if tries < 5:
                self.download_to_file(url, file_adress, tries + 1)
            else:
                print("Too many tries. Aborting! Try to start update later")
                return -1
        else:
            print("Wrong response status: {0}".format(r.status))
Пример #30
0
    def __init_connection(self, url=None, proxy=False):
        """Function for initiating connection with remote server

        :param url: The URL of the remote system
        :type url: str

        """

        self.__url = url if url else self.__url
        if self.get_proxy() and proxy:
            if self.get_proxy().startswith('socks'):
                LOGGER.info("Initializing a SOCKS proxy.")
                http = SOCKSProxyManager(self.get_proxy(), cert_reqs='CERT_NONE')
            else:
                LOGGER.info("Initializing a HTTP proxy.")
                http = ProxyManager(self.get_proxy(), cert_reqs='CERT_NONE')
        else:
            LOGGER.info("Initializing no proxy.")
            http = urllib3.PoolManager(cert_reqs='CERT_NONE')

        self._conn = http.request
Пример #31
0
    def __init__(self, configuration: Configuration, pools_size: int = 4):
        self.configuration = configuration

        self.header_params: dict = {}
        self.user_agent = 'felix-scholz/website-python-client/1.0.1/python'

        addition_pool_args = {}
        if configuration.assert_hostname is not None:
            addition_pool_args[
                'assert_hostname'] = configuration.assert_hostname

        if configuration.retries is not None:
            addition_pool_args['retries'] = configuration.retries

        if configuration.proxy:
            self.pool_manager = ProxyManager(
                num_pools=pools_size,
                maxsize=configuration.connection_pool_maxsize
                if not None else 4,
                cert_reqs=ssl.CERT_REQUIRED
                if configuration.verify_ssl else ssl.CERT_NONE,
                ca_certs=configuration.ssl_ca_cert
                if configuration.ssl_ca_cert is not None else certifi.where(),
                cert_file=configuration.cert_file,
                key_file=configuration.key_file,
                proxy_url=configuration.proxy,
                proxy_headers=configuration.proxy_headers,
                **addition_pool_args)
        else:
            self.pool_manager = PoolManager(
                num_pools=pools_size,
                maxsize=configuration.connection_pool_maxsize
                if not None else 4,
                cert_reqs=ssl.CERT_REQUIRED
                if configuration.verify_ssl else ssl.CERT_NONE,
                ca_certs=configuration.ssl_ca_cert
                if configuration.ssl_ca_cert is not None else certifi.where(),
                cert_file=configuration.cert_file,
                key_file=configuration.key_file,
                **addition_pool_args)
Пример #32
0
def fetch_title(proxy: urllib3.ProxyManager, title_id) -> dict:
    url = f"https://mangadex.org/api/v2/manga/{title_id}"
    scrape_id = db.run_sql(
        "INSERT INTO scrape (proxy, url) VALUES (?, ?)",
        (proxy.proxy_url, url),
        return_last_insert_rowid=True,
    )

    resp = proxy.request("GET", url)
    assert resp.status in [200, 404], resp.data

    db.run_sql(
        """
        UPDATE scrape
        SET resp_status = ?,
            resp_body = ?,
            ended_at = datetime('now')
        WHERE id = ?;
        """,
        (resp.status, resp.data, scrape_id),
    )
    print("Saved title", title_id, "-", resp.status)
Пример #33
0
    def _init_connection(self):
        """Function for initiating connection with remote server"""
        if self._connection_properties.get('ca_certs', None):
            LOGGER.info('Using CA cert to confirm identity.')
            cert_reqs = 'CERT_REQUIRED'
        else:
            LOGGER.info('Not using CA certificate.')
            cert_reqs = 'CERT_NONE'
        if self.proxy:
            if self.proxy.startswith('socks'):
                LOGGER.info("Initializing a SOCKS proxy.")
                http = SOCKSProxyManager(self.proxy, cert_reqs=cert_reqs, maxsize=6, \
                                                                    **self._connection_properties)
            else:
                LOGGER.info("Initializing a HTTP proxy.")
                http = ProxyManager(self.proxy, cert_reqs=cert_reqs, maxsize=6, \
                                    **self._connection_properties)
        else:
            LOGGER.info("Initializing no proxy.")
            http = PoolManager(cert_reqs=cert_reqs,
                               maxsize=6,
                               **self._connection_properties)

        self._conn = http.request
Пример #34
0
	def __init__(self):
		user_agent = 'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.9.0.7) Gecko/2009021910 Firefox/3.0.7'
		self.headers={'User-Agent':user_agent}
		self.ip_url = 'http://icanhazip.com/'
		self.logger = logging.getLogger('gkp')
		self.http = ProxyManager('http://localhost:8118/', retries=retries, timeout=Timeout(total=60.0))
    def get_terms(self,
                  term,
                  id,
                  id_string,
                  number_of_results,
                  is_match=False):

        # Make API call to get xml data #
        term = self.lemmatize(self.preprocess(term))

        # Proxy Code and Base Query #
        base_url = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/"
        second_url = "esummary.fcgi?db=medgen&db=medgen&{query}"
        final_url = base_url + second_url.format(db=self.ontology,
                                                 query="id=" + id_string)
        http = urllib3.PoolManager()
        http = ProxyManager("http://proxy.gtm.lilly.com:9000/")
        t.sleep(1)
        response = http.request('GET', final_url)
        soup = BeautifulSoup(response.data, 'lxml')

        # Get the separate hits in lists #
        hits = soup.find_all('documentsummary')

        # Dictionary to store the results #
        results = []

        # Set threshold, take the min of the threshold requested and the total number of search results #
        threshold = min(self.threshold, number_of_results)

        # For every hit (each hit represents data from ONE UID) #
        for hit in hits:

            # Keeps track of meeting the threshold #
            counter = 0

            # Check if return is a disease #
            check = "Blank" if not len(hit.find("semanticid")) else hit.find(
                "semanticid").text.strip()

            # List of acceptable semantic types #
            semantic_types = [
                'T191', 'T047', 'T048', 'T019', 'T190', 'T033', 'T049', 'T046',
                'T184', "Blank"
            ]

            # If term is a disease, execute the following: #
            if check in semantic_types:

                # Get Concept ID #
                concept_id = "Blank" if not len(hit.find(
                    'conceptid')) else hit.find('conceptid').text.strip()

                # Get Title #
                title = hit.find('title').text.strip()

                # Get name tags for looping #
                name_tags = hit.find_all('name')

                # Get definition/description #
                definition = hit.find('definition').text.strip()
                def_score = self.modified_jaccard_similarity(term, definition)

                # Get SAB, CODE, SCUI, SDUI, and Title #
                processed_term = self.stem(term)
                new_title = self.stem(self.lemmatize(self.preprocess(title)))

                # Keeps track of best scores for each uid #
                scores = []

                # Loop through synonyms #
                for data in name_tags:

                    # Get the max syn_score between a synonym and the title #
                    new_text = self.stem(
                        self.lemmatize(self.preprocess(data.text)))
                    syn_score = max(fuzz.ratio(new_text, processed_term),
                                    fuzz.ratio(processed_term, new_title))
                    syn_score = max(
                        fuzz.ratio(new_text, processed_term),
                        fuzz.ratio(processed_term, new_title)
                    ) if len(new_text.split()) == 1 and len(
                        new_title.split()) == 1 and len(processed_term.split(
                        )) == 1 else self.jaccard_similarity(
                            new_text, processed_term)

                    # If score is 100 or the term is one word, take the syn_score #
                    score = syn_score if len(
                        term.split()) == 1 or syn_score == 100 else max(
                            syn_score, def_score)

                    # Intialize dictionary to add to results #
                    value = dict()
                    code, sab, scui, sdui = None, None, None, None
                    index = hits.index(hit)

                    # Add Basic Data MetaData to Dictionary #
                    value['Disease_Input'] = term
                    value['Ontology'] = self.ontology
                    value['Synonym'] = data.text
                    value['Description'] = definition
                    value['Semantic_Type'] = check
                    value['UID'] = id[index]
                    value['Ontology_ID'] = concept_id
                    value['Final_Score'] = syn_score + def_score
                    value['Synonym_Score'] = syn_score
                    value['Description_Score'] = def_score
                    value['Title'] = title
                    value['Number_of_Results'] = number_of_results
                    value['Holder'] = score

                    # Add extra metadata that may throw errors and add to dictionary #
                    try:
                        code = data['code']
                        value['CODE'] = code
                    except:
                        value['CODE'] = np.nan
                    try:
                        sab = data['sab']
                        value['SAB'] = sab
                    except:
                        value['SAB'] = np.nan
                    try:
                        scui = data['scui']
                        value['SCUI'] = scui
                    except:
                        value['SCUI'] = np.nan
                    try:
                        sdui = data['sdui']
                        value['SDUI'] = sdui
                    except:
                        value['SDUI'] = np.nan

                    scores.append(value)

                # This code takes scores, (as it has metadata for only ONE uid) and finds the best match #
                # Get the best score, if scores has results (it maybe empty) #
                if scores:

                    # Gets the dictionary with the highest score and it's corresponding data #
                    best_score_data = max(scores,
                                          key=lambda x: x['Final_Score'])
                    best_score = best_score_data['Holder']
                    results.append(best_score_data)

                    # If best score is greater than or equal to the threshold, increase counter (a step closer to threshold) #
                    if best_score >= self.score_threshold or threshold == 1:
                        counter += 1

                    # If threshold is met, then return results #
                    if counter == threshold:
                        return results

        return results
Пример #36
0
    def request(self):
        req = self._request

        if req.proxy:
            if req.proxy_userpwd:
                headers = make_headers(proxy_basic_auth=req.proxy_userpwd)
            else:
                headers = None
            proxy_url = '%s://%s' % (req.proxy_type, req.proxy)
            if req.proxy_type == 'socks5':
                pool = SOCKSProxyManager(
                    proxy_url,
                    cert_reqs='CERT_REQUIRED',
                    ca_certs=certifi.where())  # , proxy_headers=headers)
            else:
                pool = ProxyManager(proxy_url,
                                    proxy_headers=headers,
                                    cert_reqs='CERT_REQUIRED',
                                    ca_certs=certifi.where())
        else:
            pool = self.pool
        with self.wrap_transport_error():
            # Retries can be disabled by passing False:
            # http://urllib3.readthedocs.io/en/latest/reference/urllib3.util.html#module-urllib3.util.retry
            # Do not use False because of warning:
            # Converted retries value: False -> Retry(total=False,
            # connect=None, read=None, redirect=0, status=None)
            retry = Retry(
                total=False,
                connect=False,
                read=False,
                redirect=0,
                status=None,
            )
            # The read timeout is not total response time timeout
            # It is the timeout on read of next data chunk from the server
            # Total response timeout is handled by Grab
            timeout = Timeout(connect=req.connect_timeout, read=req.timeout)
            #req_headers = dict((make_unicode(x), make_unicode(y))
            #                   for (x, y) in req.headers.items())
            if six.PY3:
                req_url = make_unicode(req.url)
                req_method = make_unicode(req.method)
            else:
                req_url = make_str(req.url)
                req_method = req.method
            req.op_started = time.time()
            try:
                res = pool.urlopen(req_method,
                                   req_url,
                                   body=req.data,
                                   timeout=timeout,
                                   retries=retry,
                                   headers=req.headers,
                                   preload_content=False)
            except UnicodeError as ex:
                raise error.GrabConnectionError('GrabInvalidUrl', ex)
        #except exceptions.ReadTimeoutError as ex:
        #    raise error.GrabTimeoutError('ReadTimeoutError', ex)
        #except exceptions.ConnectTimeoutError as ex:
        #    raise error.GrabConnectionError('ConnectTimeoutError', ex)
        #except exceptions.ProtocolError as ex:
        #    # TODO:
        #    # the code
        #    # raise error.GrabConnectionError(ex.args[1][0], ex.args[1][1])
        #    # fails
        #    # with error TypeError: 'OSError' object is not subscriptable
        #    raise error.GrabConnectionError('ProtocolError', ex)
        #except exceptions.SSLError as ex:
        #    raise error.GrabConnectionError('SSLError', ex)

        # WTF?
        self.request_head = b''
        self.request_body = b''
        self.request_log = b''

        self._response = res
 def __init__(self, error_dict):
     super(AppleDetailSpider, self).__init__(error_dict)
     self.market = 'apple'
     self.proxy = self.proxy_service.get_proxy('https')
     self.connection_pool = ProxyManager(self.proxy['https']) if self.proxy else PoolManager()
    def get_terms(self, term, id, number_of_results):

        # Make API call to get json_data #
        term = self.lemmatize(self.preprocess(term))

        # It stores a given score result that will be added to scores, then to results #
        json_dict = dict()

        # Base Query and More Proxy Management #
        base_url = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/"
        second_url = "esummary.fcgi?db=mesh&db=mesh&{query}&format=json"
        final_url = base_url + second_url.format(db=self.ontology,
                                                 query="id=" + id)
        http = urllib3.PoolManager()
        http = ProxyManager("http://proxy.gtm.lilly.com:9000/")
        t.sleep(1)

        # Response data #
        response = http.request('GET', final_url)
        json_data = json.loads(response.data)
        uids = json_data['result']['uids']

        # Holds a list of dictionaries, will be converted to dataframe #
        results = []

        # Take the minimum of what the threshold is, versus the number of search hits #
        threshold = min(self.threshold, number_of_results)

        # Loop through each uid in the uids list #
        for uid in uids:

            # Keeps track of uids that score at or above the scoring requirement, used for pruning #
            counter = 0

            # This represents json data from the UID that is CURRENTLY being looped through #
            json_section = json_data['result'][uid]

            # Check if ID is a disease #
            check_id = self.filter_by_disease(id, json_section)

            # If the search term is a disease... #
            if check_id:

                # Pure extracted data from json file before processing #
                scope_note = json_section["ds_scopenote"]
                mesh_id = json_section["ds_meshui"]
                mesh_terms = json_section["ds_meshterms"]

                # Intitialize score variables #
                score = None
                syn_score = None
                processed_term = self.stem(term)
                def_score = self.modified_jaccard_similarity(term, scope_note)

                # Keeps track of best scores for each uid #
                scores = []

                # If there's only one search result, take it (regardless of score), and return it #
                # Adding it to just the scores list is fine since it's the only output #
                if threshold == 1:
                    processed_mesh_term = self.stem(
                        self.lemmatize(self.preprocess(mesh_terms[0])))
                    syn_score = fuzz.ratio(
                        processed_mesh_term, processed_term
                    ) if len(processed_term.split()) == 1 and len(
                        processed_mesh_term) == 1 else self.jaccard_similarity(
                            processed_mesh_term, processed_term)
                    score = max(syn_score, def_score)
                    json_dict = {
                        'Ontology': self.ontology,
                        'UID': uid,
                        'Ontology_ID': mesh_id,
                        'Disease_Input': term,
                        "Synonym": mesh_terms[0],
                        "Description": scope_note,
                        'Number_of_Results': number_of_results,
                        'Synonym_Score': syn_score,
                        'Description_Score': def_score,
                        'Final_Score': syn_score + def_score,
                        'Holder': score
                    }
                    scores.append(json_dict)
                    return scores

                else:

                    # Loop through each synonym in mesh_terms for scoring #
                    for mesh_term in mesh_terms:

                        # Prepare synonymn for levenstein distance matching (through fuzzy library) #
                        processed_mesh_term = self.stem(
                            self.lemmatize(self.preprocess(mesh_term)))
                        syn_score = fuzz.ratio(
                            processed_mesh_term, processed_term) if len(
                                processed_term.split()) == 1 and len(
                                    processed_mesh_term
                                ) == 1 else self.jaccard_similarity(
                                    processed_mesh_term, processed_term)

                        # If term is only one word, just take the syn_score as its final score, otherwise take the max #
                        score = syn_score if len(term.split()) == 1 else max(
                            syn_score, def_score)

                        # If the score is >= 60, add it to the scores list #
                        json_dict = {
                            'Ontology': self.ontology,
                            'UID': uid,
                            'Ontology_ID': mesh_id,
                            'Disease_Input': term,
                            "Synonym": mesh_term,
                            "Description": scope_note,
                            'Number_of_Results': number_of_results,
                            'Synonym_Score': syn_score,
                            'Description_Score': def_score,
                            'Final_Score': syn_score + def_score,
                            'Holder': score
                        }
                        scores.append(json_dict)

                # This code takes scores, (as it has metadata for only ONE uid) and finds the best match #
                # Get the best score, if scores has results (it maybe empty) #
                if scores:

                    # Gets the dictionary with the highest score and it's corresponding data #
                    best_score_data = max(scores,
                                          key=lambda x: x['Final_Score'])
                    best_score = best_score_data['Holder']
                    results.append(best_score_data)

                    # If best score is greater than or equal to the threshold, increase counter (a step closer to threshold) #
                    if best_score >= self.score_threshold or threshold == 1:
                        counter += 1

                    # If threshold is met, then return results #
                    if counter == threshold:
                        return results

        return results