示例#1
0
def get_web_page(url: str, proxies: list = None):

    headers = {
        'User-agent':
        "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.102 Safari/537.36 Edge/18.19582",
    }

    if proxies:
        import numpy as np
        status = 0
        i = 0
        while status != 220 and i < 10:
            try:
                proxy_url = np.random.choice(proxies)
                print(f"Proxy: {proxy_url}")
                proxy_url = "https://" + url.strip()
                http = ProxyManager(proxy_url=proxy_url,
                                    headers=headers,
                                    cert_reqs='CERT_NONE',
                                    assert_hostname=False)
                resp = http.request('GET', url)
                status = resp.status
                print(status)
            except:
                i += 1

    else:
        http = PoolManager(headers=headers,
                           cert_reqs='CERT_NONE',
                           assert_hostname=False)
        resp = http.request('GET', url)

    return resp.status, resp.data.decode('utf-8')
class NCBI_Authetication():
    def __init__(self):
        self.authenticate()

    def authenticate(self):
        self.base_url = "http://eutils.ncbi.nlm.nih.gov/entrez/eutils/"
        self.my_query = "PD-1%20ab%20agonist%5BTitle%2FAbstract%5D)%20AND%20(%222000%2F01%2F01%22%5BDate%20-%20Publication%5D%20%3A%20%223000%22%5BDate%20-%20Publication%5D"
        self.database = "pubmed"
        self.second_url = "esearch.fcgi?db={db}&term={query}&usehistory=y"
        self.final_url = self.base_url + self.second_url.format(
            db=self.database, query=self.my_query)
        self.http = ProxyManager("http://proxy.gtm.lilly.com:9000/")
        self.response = self.http.request('GET', self.final_url)
        self.http = ProxyManager("http://proxy.gtm.lilly.com:9000/")
        self.firstResponse = self.http.request('GET', self.final_url)

        self.base_url = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/"
        self.my_query = "id=29554659"
        self.database = "pubmed"
        self.second_url = "elink.fcgi?dbfrom=gene&db={db}&{query}"
        self.final_url = self.base_url + self.second_url.format(
            db=self.database, query=self.my_query)
        self.http = ProxyManager("http://proxy.gtm.lilly.com:9000/")
        self.secondResponse = self.http.request('GET', self.final_url)

    def get_response(self):
        return self.firstResponse, self.secondResponse
示例#3
0
def get_internet_item(url, html=True):
    """ get html or data from given url

    :param url: target site url string
    :param html: download html or data boolean
    :return: html string
    """

    if PROXY_MODE == "http_proxy":
        http = ProxyManager(proxy_url=PROXY_URL_PORT)

    elif PROXY_MODE == "auth_proxy":
        auth_proxy_headers = make_headers(proxy_basic_auth=PROXY_BASIC_AUTH)
        http = ProxyManager(proxy_url=PROXY_URL_PORT,
                            proxy_headers=auth_proxy_headers,
                            cert_reqs="CERT_REQUIRED",
                            ca_certs=certifi.where())

    else:
        http = PoolManager(cert_reqs="CERT_REQUIRED",
                           ca_certs=certifi.where())

    r = http.request("GET", url)

    if r.status != 200:
        raise ConnectionError("http request failure")

    if html:
        data = r.data.decode()

    else:
        data = r.data

    return data
def check_ip(ip_info, port_info, type):
    check_url = "https://bck.hermes.com/product-page?locale=us_en&productsku=H056289CC18"
    ip_url = "%s://%s:%s" % (type, ip_info, port_info)
    manager = ProxyManager(ip_url,
                           timeout=10,
                           cert_reqs='CERT_REQUIRED',
                           ca_certs=certifi.where())
    headers = util.make_headers(
        accept_encoding='gzip, deflate',
        keep_alive=True,
        user_agent=
        "Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:47.0) Gecko/20100101 Firefox/47.0"
    )
    headers['Accept-Language'] = "en-US,en;q=0.5"
    headers['Connection'] = 'keep-alive'
    headers[
        'Accept'] = "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8"
    try:
        response = manager.request('GET',
                                   check_url,
                                   preload_content=False,
                                   headers=headers)
        res = response.data
        print(res)
        json.loads(res)
        return True
    except Exception as ex:
        return False
示例#5
0
class TorUtility():

    def __init__(self):
        user_agent = 'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.9.0.7) Gecko/2009021910 Firefox/3.0.7'
        self.headers = {'User-Agent': user_agent}
        self.ip_url = 'http://icanhazip.com/'
        retries = Retry(connect=5, read=25, redirect=5)
        self.agent = ProxyManager(
            'http://localhost:8118/', retries=retries, timeout=Timeout(total=60.0))

    def renewTorIdentity(self, passAuth):
        try:
            s = socket.socket()
            s.connect(('localhost', 9051))
            s.send('AUTHENTICATE "{0}"\r\n'.format(passAuth))
            resp = s.recv(1024)

            if resp.startswith('250'):
                s.send("signal NEWNYM\r\n")
                resp = s.recv(1024)

                if resp.startswith('250'):
                    logger.info("Identity renewed")
                else:
                    logger.info("response 2:%s" % resp)

            else:
                logger.info("response 1:%s" % resp)

        except Exception as e:
            logger.error("Can't renew identity: %s" % e)

    def renew_connection(self):
        with Controller.from_port(port=9051) as controller:
            controller.authenticate('natalie')
            controller.signal(Signal.NEWNYM)

        logger.info('*' * 50)
        logger.info('\t' * 6 + 'Renew TOR IP: %s' %
                         self.request(self.ip_url))
        logger.info('*' * 50)

    def request(self, url):
        r = self.agent.request('GET', url)
        if r.status == 200:
            return r.data
        elif r.status == 403:
            self.renew_connection()
        else:
            logger.error('status %s' % r.status)
        return ''

    def current_ip(self):
        return self.request(self.ip_url)
示例#6
0
class TorUtility():
    def __init__(self):
        user_agent = 'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.9.0.7) Gecko/2009021910 Firefox/3.0.7'
        self.headers = {'User-Agent': user_agent}
        self.ip_url = 'http://icanhazip.com/'
        self.logger = logging.getLogger('gkp')
        retries = Retry(connect=5, read=5, redirect=5)
        self.agent = ProxyManager('http://localhost:8118/',
                                  retries=retries,
                                  timeout=Timeout(total=60.0))

    def renewTorIdentity(self, passAuth):
        try:
            s = socket.socket()
            s.connect(('localhost', 9051))
            s.send('AUTHENTICATE "{0}"\r\n'.format(passAuth))
            resp = s.recv(1024)

            if resp.startswith('250'):
                s.send("signal NEWNYM\r\n")
                resp = s.recv(1024)

                if resp.startswith('250'):
                    self.logger.info("Identity renewed")
                else:
                    self.logger.info("response 2:%s" % resp)

            else:
                self.logger.info("response 1:%s" % resp)

        except Exception as e:
            self.logger.error("Can't renew identity: %s" % e)

    def renew_connection(self):
        with Controller.from_port(port=9051) as controller:
            controller.authenticate('natalie')
            controller.signal(Signal.NEWNYM)

        self.logger.info('*' * 50)
        self.logger.info('\t' * 6 +
                         'Renew TOR IP: %s' % self.request(self.ip_url))
        self.logger.info('*' * 50)

    def request(self, url):
        r = self.agent.request('GET', url)
        if r.status == 200: return r.data
        elif r.status == 403: self.renew_connection()
        else: self.logger.error('status %s' % r.status)
        return ''

    def current_ip(self):
        return self.request(self.ip_url)
示例#7
0
def run_proxy(url):
	global lock
	#print(lock.locked())
	if(prox == ''):
		print('No proxys available.')
		return run(url)	
	print('Proxy: ' + prox)
	http = ProxyManager(prox)	
	try:
		data = {'attribute': 'value'}
		encoded_data = json.dumps(data).encode('utf-8')
		req = http.request(
		'POST',
		url,
		timeout = 3,
		body=encoded_data,
		headers={'Content-Type': 'html/text'})
		print(req.status)
		if(req.status == 404):
			print('Item Does not exist.')
			#return run(url)
			return
		if(req.status == 501):
			print('Proxy at api call limit')
			get_new_proxy()
			return run_proxy(url)
		if(req.status == 407):
			print('Authentication required')
			get_new_proxy()
			return run_proxy(url)
		if(req.status != 200):
			print('Unknown Status Code')
			print(req.status)
			get_new_proxy()
			return run_proxy(url)
	except:
		print('Request timed out.')
		get_new_proxy()
		return run(url)
	
	
	data = json.loads(req.data)
	req.release_conn()
	
	data = data['item']
	id = str(data['id'])
	print('ID: ' + id)
	file = open('ItemIds','a')
	file.write(id  + '\n')
	file.close()
    def get_uids(self, term):

        base_url = "https://www.ncbi.nlm.nih.gov/medgen/?term="
        term = term.replace(" ", "+")
        final_url = base_url + term
        http = urllib3.PoolManager()
        http = ProxyManager("http://proxy.gtm.lilly.com:9000/")
        response = http.request('GET', final_url)
        soup = BeautifulSoup(response.data, 'lxml')

        pattern = "<dd>[0-9]*</dd>"
        p = re.compile(pattern)
        ids = p.findall(str(soup))
        ids = [
            id.replace("<dd>", "").replace("</dd>", "").strip() for id in ids
        ]
        return ids
示例#9
0
class Downloader:
    def __init__(self, proxy_list):
        self.__proxyCounter = 0
        self.__proxyList = proxy_list
        self.__http = ProxyManager("http://" + self.__proxyList[self.__proxyCounter])

    def try_download(self, url, tries=0):
        try:
            r = self.__http.request('GET', url)
        except:
            if tries > 2:
                print("To many tries, updating proxy...")
                self.update_proxy()
                r = self.try_download(url)
            else:
                print("Error while downloading from \'%s\'. Trying again in 3 secs... [%d]" % (url, tries + 1))
                time.sleep(3)
                r = self.try_download(url, tries + 1)
        return r

    def update_proxy(self):
        self.__proxyCounter += 1
        if self.__proxyCounter >= len(proxyList):
            self.__proxyCounter = 0
        self.__http = ProxyManager("http://" + self.__proxyList[self.__proxyCounter])

    def download_to_file(self, url, file_adress, tries=0):
        print("Start downloading from: '{0}'".format(url))
        r = self.try_download(url)
        if r.status == 200:
            print("Downloaded. Saving to '{0}'".format(file_adress))
            f = open(file_adress, 'wb')
            f.write(r.data)
            f.close()
        elif r.status // 100 == 5:
            print("Something wrong with server (%s). Waiting 2 secs and trying again... [%d]" % (r.status, tries + 1))
            time.sleep(2)
            if tries < 5:
                self.download_to_file(url, file_adress, tries + 1)
            else:
                print("Too many tries. Aborting! Try to start update later")
                return -1
        else:
            print("Wrong response status: {0}".format(r.status))
示例#10
0
    def _check(self, protocol, proxy_url_set):
        valid_proxy_url_set = set()
        for url in proxy_url_set:
            header = {'content-type': 'text/html',
                      'User-Agent': user_agents[random.randint(0, len(user_agents)-1)]}
            proxy = {protocol: url}
            conection_pool = ProxyManager(url)
            try:
                response = conection_pool.request('GET', CHECK_URL[protocol], timeout=60, headers=header)
                if response.status == 200:
                    valid_proxy_url_set.add(url)
                    print 'Valid proxy url', url
                else:
                    print 'Invalid ', url
            except Exception as ex:
                print ex
                print 'Invalid ', url

        return valid_proxy_url_set
    def get_uids(self, term):

        # Base Query and More Proxy Management #
        base_url = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/"
        term = self.preprocess(term).replace(" ", "+")
        second_url = "esearch.fcgi?db={db}&term={query}&retmax=100&format=json"
        final_url = base_url + second_url.format(db=self.ontology, query=term)
        http = urllib3.PoolManager()
        http = ProxyManager("http://proxy.gtm.lilly.com:9000/")
        t.sleep(1)

        # Response data #
        response = http.request('GET', final_url)
        json_data = json.loads(response.data)

        # Updates number of search results #
        self.get_counts(int(json_data['esearchresult']['count']))

        # Returns ID List #
        return json_data['esearchresult']['idlist']
示例#12
0
def fetch_title(proxy: urllib3.ProxyManager, title_id) -> dict:
    url = f"https://mangadex.org/api/v2/manga/{title_id}"
    scrape_id = db.run_sql(
        "INSERT INTO scrape (proxy, url) VALUES (?, ?)",
        (proxy.proxy_url, url),
        return_last_insert_rowid=True,
    )

    resp = proxy.request("GET", url)
    assert resp.status in [200, 404], resp.data

    db.run_sql(
        """
        UPDATE scrape
        SET resp_status = ?,
            resp_body = ?,
            ended_at = datetime('now')
        WHERE id = ?;
        """,
        (resp.status, resp.data, scrape_id),
    )
    print("Saved title", title_id, "-", resp.status)
class AppleDetailSpider(AppDetailSpider):
    def __init__(self, error_dict):
        super(AppleDetailSpider, self).__init__(error_dict)
        self.market = 'apple'
        self.proxy = self.proxy_service.get_proxy('https')
        self.connection_pool = ProxyManager(self.proxy['https']) if self.proxy else PoolManager()

    @retry(2)
    def _scrape_market(self, app_id):
        scrape_url = APPLE_APP_URL.format(app_id=app_id)
        header = {'content-type': 'text/html',
                  'User-Agent': user_agents[random.randint(0, len(user_agents)-1)]}
        try:
            response = self.connection_pool.request('GET', scrape_url, timeout=60, retries=2, headers=header)
            if response:
                content = response.data
                if len(content) > REJECT_PAGE_SIZE:
                    if len(content) > NORMAL_APP_PAGE_SIZE:
                        self.proxy_service.manage(self.proxy, False)
                        print 'Succeed scrape app', app_id
                        logger.info('Succeed scrape app {}'.format(app_id))
                        return content
                    else:
                        print 'Invalid app', app_id
                        logger.info('Invalid app {}'.format(app_id))
                else:
                    logger.info('Reject visit app {}, use proxy {}'.format(app_id, self.proxy))
                    raise Exception('Reject visit app {}'.format(app_id))
            else:
                raise Exception('Response is None')

        except Exception as ex:
            self.proxy_service.manage(self.proxy, True)
            self.proxy = self.proxy_service.get_proxy('https')
            self.connection_pool = ProxyManager(self.proxy['https']) if self.proxy else PoolManager()
            raise ex
示例#14
0
 def get_ip(self):
     http = ProxyManager('http://127.0.0.1:8118')
     body = http.request('GET', 'http://icanhazip.com')
     return str(body.data, 'utf-8').replace('\n', '')
    def get_terms(self, term, id, number_of_results):

        # Make API call to get json_data #
        term = self.lemmatize(self.preprocess(term))

        # It stores a given score result that will be added to scores, then to results #
        json_dict = dict()

        # Base Query and More Proxy Management #
        base_url = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/"
        second_url = "esummary.fcgi?db=mesh&db=mesh&{query}&format=json"
        final_url = base_url + second_url.format(db=self.ontology,
                                                 query="id=" + id)
        http = urllib3.PoolManager()
        http = ProxyManager("http://proxy.gtm.lilly.com:9000/")
        t.sleep(1)

        # Response data #
        response = http.request('GET', final_url)
        json_data = json.loads(response.data)
        uids = json_data['result']['uids']

        # Holds a list of dictionaries, will be converted to dataframe #
        results = []

        # Take the minimum of what the threshold is, versus the number of search hits #
        threshold = min(self.threshold, number_of_results)

        # Loop through each uid in the uids list #
        for uid in uids:

            # Keeps track of uids that score at or above the scoring requirement, used for pruning #
            counter = 0

            # This represents json data from the UID that is CURRENTLY being looped through #
            json_section = json_data['result'][uid]

            # Check if ID is a disease #
            check_id = self.filter_by_disease(id, json_section)

            # If the search term is a disease... #
            if check_id:

                # Pure extracted data from json file before processing #
                scope_note = json_section["ds_scopenote"]
                mesh_id = json_section["ds_meshui"]
                mesh_terms = json_section["ds_meshterms"]

                # Intitialize score variables #
                score = None
                syn_score = None
                processed_term = self.stem(term)
                def_score = self.modified_jaccard_similarity(term, scope_note)

                # Keeps track of best scores for each uid #
                scores = []

                # If there's only one search result, take it (regardless of score), and return it #
                # Adding it to just the scores list is fine since it's the only output #
                if threshold == 1:
                    processed_mesh_term = self.stem(
                        self.lemmatize(self.preprocess(mesh_terms[0])))
                    syn_score = fuzz.ratio(
                        processed_mesh_term, processed_term
                    ) if len(processed_term.split()) == 1 and len(
                        processed_mesh_term) == 1 else self.jaccard_similarity(
                            processed_mesh_term, processed_term)
                    score = max(syn_score, def_score)
                    json_dict = {
                        'Ontology': self.ontology,
                        'UID': uid,
                        'Ontology_ID': mesh_id,
                        'Disease_Input': term,
                        "Synonym": mesh_terms[0],
                        "Description": scope_note,
                        'Number_of_Results': number_of_results,
                        'Synonym_Score': syn_score,
                        'Description_Score': def_score,
                        'Final_Score': syn_score + def_score,
                        'Holder': score
                    }
                    scores.append(json_dict)
                    return scores

                else:

                    # Loop through each synonym in mesh_terms for scoring #
                    for mesh_term in mesh_terms:

                        # Prepare synonymn for levenstein distance matching (through fuzzy library) #
                        processed_mesh_term = self.stem(
                            self.lemmatize(self.preprocess(mesh_term)))
                        syn_score = fuzz.ratio(
                            processed_mesh_term, processed_term) if len(
                                processed_term.split()) == 1 and len(
                                    processed_mesh_term
                                ) == 1 else self.jaccard_similarity(
                                    processed_mesh_term, processed_term)

                        # If term is only one word, just take the syn_score as its final score, otherwise take the max #
                        score = syn_score if len(term.split()) == 1 else max(
                            syn_score, def_score)

                        # If the score is >= 60, add it to the scores list #
                        json_dict = {
                            'Ontology': self.ontology,
                            'UID': uid,
                            'Ontology_ID': mesh_id,
                            'Disease_Input': term,
                            "Synonym": mesh_term,
                            "Description": scope_note,
                            'Number_of_Results': number_of_results,
                            'Synonym_Score': syn_score,
                            'Description_Score': def_score,
                            'Final_Score': syn_score + def_score,
                            'Holder': score
                        }
                        scores.append(json_dict)

                # This code takes scores, (as it has metadata for only ONE uid) and finds the best match #
                # Get the best score, if scores has results (it maybe empty) #
                if scores:

                    # Gets the dictionary with the highest score and it's corresponding data #
                    best_score_data = max(scores,
                                          key=lambda x: x['Final_Score'])
                    best_score = best_score_data['Holder']
                    results.append(best_score_data)

                    # If best score is greater than or equal to the threshold, increase counter (a step closer to threshold) #
                    if best_score >= self.score_threshold or threshold == 1:
                        counter += 1

                    # If threshold is met, then return results #
                    if counter == threshold:
                        return results

        return results
示例#16
0
    def update_db(self, year):
        filename = CVE_FEED_FILENAME.replace('$$$$', year) + '.json'
        file_path = path.join(CACHE_PATH, filename)
        meta_filename = CVE_FEED_FILENAME.replace('$$$$', year) + '.meta'
        meta_file_path = path.join(CACHE_PATH, year + '.meta')

        if environ.get('http_proxy') is not None:
            http = ProxyManager(environ.get('http_proxy'), maxsize=10)
        else:
            http = PoolManager()
        disable_warnings(urllib3_exceptions.InsecureRequestWarning)
        r = None
        meta = None
        try:
            r = http.request('GET',
                             CVE_FEED_URL + meta_filename,
                             preload_content=False)
        except Exception as e:
            print("[!] Error obtaining CVE meta data: " + str(e))

        if path.isfile(meta_file_path):
            with open(meta_file_path, 'r') as myfile:
                meta = myfile.read()
            if r is not None and meta is not None and r.data.decode(
                    'utf-8').replace('\r', '') == meta:
                return

        else:
            if r is not None:
                with open(meta_file_path, 'wb') as out_file:
                    copyfileobj(r, out_file)

        try:
            with http.request('GET',
                              CVE_FEED_URL + filename + '.zip',
                              preload_content=False) as r, open(
                                  file_path + '.zip', 'wb') as out_file:
                copyfileobj(r, out_file)
        except Exception as e:
            print("[!] Error downloading CVE feed: " + str(e))
            return
        try:
            archive = ZipFile(file_path + '.zip', 'r')
            xml_data = archive.extract(filename, CACHE_PATH)
        except Exception as e:
            print("[!] Error extracting the CVE archive: " + str(e))
            return

        cve_cache = []
        actions = []
        count = 0

        with open(file_path, encoding='utf-8') as data_file:
            data = json.loads(data_file.read())["CVE_Items"]
        for i in data:
            item = cve_item()
            item.id = i["cve"]["CVE_data_meta"]["ID"]
            for j in i['cve']['references']['reference_data']:
                item.references.append(j)
            item.summary = i['cve']['description']['description_data'][0][
                "value"]
            for j in i['configurations']['nodes']:
                if 'cpe' in j:
                    for k in j['cpe']:
                        item.affected.append({
                            "vuln":
                            k['vulnerable'],
                            "cpe22":
                            k['cpe22Uri'],
                            "cpe23":
                            k['cpe23Uri'],
                            "vStartE":
                            k.get('versionStartExcluding', ''),
                            "vStartI":
                            k.get('versionStartIncluding', ''),
                            "vEndE":
                            k.get('versionEndExcluding', ''),
                            "vEndI":
                            k.get('versionEndIncluding', '')
                        })
                elif 'children' in j:
                    for t in j['children']:
                        if 'cpe' in t:
                            for k in t['cpe']:
                                item.affected.append({
                                    "vuln":
                                    k['vulnerable'],
                                    "cpe22":
                                    k['cpe22Uri'],
                                    "cpe23":
                                    k['cpe23Uri'],
                                    "vStartE":
                                    k.get('versionStartExcluding', ''),
                                    "vStartI":
                                    k.get('versionStartIncluding', ''),
                                    "vEndE":
                                    k.get('versionEndExcluding', ''),
                                    "vEndI":
                                    k.get('versionEndIncluding', '')
                                })
            if 'baseMetricV3' in i['impact']:
                item.cvss['vector_string_v3'] = i['impact']['baseMetricV3'][
                    'cvssV3']['vectorString']
                item.cvss['score_v3'] = i['impact']['baseMetricV3']['cvssV3'][
                    'baseScore']
            if 'baseMetricV2' in i['impact']:
                item.cvss['vector_string_v2'] = i['impact']['baseMetricV2'][
                    'cvssV2']['vectorString']
                item.cvss['score_v2'] = i['impact']['baseMetricV2']['cvssV2'][
                    'baseScore']
            item.published = i['publishedDate']
            item.last_modified = i['lastModifiedDate']
            cve_cache.append(item)
            if USE_ELASTIC_SEARCH:
                actions.append({
                    "_index": "cve-" + year,
                    "_type": "vulns",
                    "_source": {
                        'cve_id': item.id,
                        'summary': item.summary,
                        'published': item.published,
                        'last_modified': item.last_modified,
                        'score_v3': item.cvss.get('score_v3', 0),
                        'score_v2': item.cvss.get('score_v2', 0),
                        'vector_string_v2':
                        item.cvss.get('vector_string_v2', 'NA'),
                        'vector_string_v3':
                        item.cvss.get('vector_string_v3', 'NA'),
                        'affected': item.affected,
                        'cache-index': count,
                    }
                })
                count = count + 1

        if USE_ELASTIC_SEARCH is True:
            try:
                if self.es.indices.exists(index="cve-" + year):
                    self.es.indices.delete(index='cve-' + year,
                                           ignore=[400, 404],
                                           request_timeout=60)
                mappings = {
                    "mappings": {
                        "vulns": {
                            "properties": {
                                "cve_id": {
                                    "type": "keyword"
                                },
                                "score_v2": {
                                    "type": "float"
                                },
                                "score_v3": {
                                    "type": "float"
                                },
                                "affected": {
                                    "type": "nested",
                                    "properties": {
                                        "cpe22": {
                                            "type": "keyword"
                                        },
                                        "cpe23": {
                                            "type": "keyword"
                                        },
                                        "vStartE": {
                                            "type": "keyword"
                                        },
                                        "vStartI": {
                                            "type": "keyword"
                                        },
                                        "vEndE": {
                                            "type": "keyword"
                                        },
                                        "vEndI": {
                                            "type": "keyword"
                                        }
                                    }
                                }
                            }
                        }
                    }
                }
                self.es.indices.create(index="cve-" + year,
                                       ignore=400,
                                       body=mappings)
                self.helpers.bulk(self.es, actions, request_timeout=60)
            except Exception as e:
                print("[!] Elasticsearch indexing error: " + str(e))

        try:
            dump(cve_cache, open(path.join(CACHE_PATH, year + '.db'), "wb"),
                 HIGHEST_PROTOCOL)
            remove(file_path + '.zip')
            remove(file_path)
        except PickleError as e:
            print("[!] Error while caching CVE data: " + str(e))
示例#17
0
文件: utils.py 项目: replyskumar/vms
    def update_db(self):
        if environ.get('http_proxy') is not None:
            http = ProxyManager(environ.get('http_proxy'), maxsize=10)
        else:
            http = PoolManager()
        disable_warnings(urllib3_exceptions.InsecureRequestWarning)

        r = None
        meta = None

        try:
            r = http.request('GET',
                             FEED_URL + 'official-cpe-dictionary_v2.3.meta',
                             preload_content=False)
        except Exception as e:
            print("[!] Error obtaining CPE dictionary meta data: " + str(e))

        if path.isfile(self.meta_file_path):
            with open(self.meta_file_path, 'r') as myfile:
                meta = myfile.read()
            if r is not None and r.data.decode('utf-8').replace('\r',
                                                                '') == meta:
                return
        else:
            if r is not None:
                with open(self.meta_file_path, 'wb') as out_file:
                    copyfileobj(r, out_file)
        try:
            with http.request(
                    'GET',
                    FEED_URL + 'official-cpe-dictionary_v2.3.xml.zip',
                    preload_content=False) as r, open(self.zipfile_location,
                                                      'wb') as out_file:
                copyfileobj(r, out_file)
        except Exception as e:
            print("[!] Error downloading CPE dictionary: " + str(e))
            return

        try:
            archive = ZipFile(self.zipfile_location, 'r')
            xml_data = archive.extract('official-cpe-dictionary_v2.3.xml',
                                       CACHE_PATH)
        except Exception as e:
            print("[!] Error extracting the CPE archive: " + str(e))
            return

        try:
            root = ET.parse(self.cpe_dictionary_filename).getroot()
        except ET.ParseError as e:
            print("[!] Error while parsing CPE dictionary: " + str(e))
            return

        cpe_dictionary = []
        cpe_names = {}
        actions = []
        count = 0
        for i in root.getchildren()[1:]:
            item = cpe_item()
            item.name = i.attrib['name']
            try:
                if i.attrib['deprecated'] is 'true':
                    item.deprecated = True
            except:
                item.deprecated = False

            for j in i.getchildren():
                if 'title' in j.tag:
                    item.title = j.text
                elif 'references' in j.tag:
                    for k in j.getchildren():
                        item.ref[k.attrib['href']] = k.text
                elif 'cpe23-item' in j.tag:
                    item.wfs = j.attrib['name']
            cpe_names[item.name] = item.title
            if USE_ELASTIC_SEARCH:
                actions.append({
                    "_index": "cpe-names",
                    "_type": "names",
                    "_source": {
                        'cpe_id': item.name,
                        'title': item.title,
                        'vendor': item.get_vendor(),
                        'product': item.get_product(),
                        'version': item.get_version(),
                        'wfs': item.wfs,
                        'cache-index': count,
                    }
                })
                count = count + 1
            cpe_dictionary.append(item)

        if USE_ELASTIC_SEARCH:
            try:
                if self.es.indices.exists(index="cpe-names"):
                    self.es.indices.delete(index='cpe-names',
                                           ignore=[400, 404],
                                           request_timeout=60)
                mappings = {
                    "mappings": {
                        "names": {
                            "properties": {
                                "cpe_id": {
                                    "type": "keyword"
                                },
                                "wfs": {
                                    "type": "keyword"
                                },
                                "product": {
                                    "type": "keyword"
                                },
                                "version": {
                                    "type": "keyword"
                                },
                                "vendor": {
                                    "type": "keyword"
                                }
                            }
                        }
                    }
                }
                self.es.indices.create(index="cpe-names",
                                       ignore=400,
                                       body=mappings)
                self.helpers.bulk(self.es, actions, request_timeout=60)
            except Exception as e:
                print("[!] Elasticsearch indexing error: " + str(e))

        try:
            dump(cpe_dictionary, open(self.cpe_cache_filename, "wb"),
                 HIGHEST_PROTOCOL)
            dump(cpe_names, open(self.cpe_names_filename, "wb"),
                 HIGHEST_PROTOCOL)
            remove(self.cpe_dictionary_filename)
            remove(self.zipfile_location)
        except PickleError as e:
            print("[!] Error while caching CPE data: " + str(e))
print("Importation OK")

index = 0
for username in usernames:
    index += 1
    # On fait la requete
    req = 'https://www.instagram.com/' + username + '/?__a=1'

    if len(proxy_list) >= 1:
        # On choisi le proxy au hasard
        proxy_number = random.randint(0, len(proxy_list) - 1)
        http = ProxyManager("http://" + proxy_list[proxy_number] + "/")
    else:
        http = PoolManager()

    webpage = http.request('GET', req)

    try:
        test = json.loads(webpage.data)
        webpage = str(webpage.data)
    except Exception as e:
        if len(proxy_list) >= 1:
            print("Ce proxy est cramé : " + proxy_list[proxy_number])
            proxy_list.remove(proxy_list[proxy_number])
        if len(proxy_list) < 1:
            print("Il n'y a plus de proxy disponible")
            with open(dest_file, "w") as output:
                writer = csv.writer(output, lineterminator='\n')
                for val in infos:
                    writer.writerow([val])
示例#19
0
def isight_load_data(a_url, a_query, a_headers):
    """

    :param a_url:
    :type a_url:
    :param a_query:
    :type a_query:
    :param a_headers:
    :type a_headers:
    :return:
    :rtype:
    """
    try:
        PySight_settings.logger.debug("param headers: %s %s", a_headers, a_url)
        proxy_request = ProxyManager(str(PySight_settings.proxy_adress))
        url_to_load = PySight_settings.isight_url + a_query
        PySight_settings.logger.debug(url_to_load)
        try:

            r = proxy_request.request('GET',
                                      a_url + a_query,
                                      None,
                                      headers=a_headers)
        except urllib.error.HTTPError as e:
            print(e.code)
            print(e.read())

        PySight_settings.logger.debug("headers %s: ", proxy_request.headers)

        PySight_settings.logger.debug("data %s: ", r.data)

        return_data_cleaned = r.data.replace('\n', '')
        # return_data_cleaned =

        json_return_data_cleaned = json.loads(
            return_data_cleaned.decode('utf8'))
        PySight_settings.logger.debug(json_return_data_cleaned)

        # print json.dumps(theJson,sort_keys=True,indent = 4, separators = (',', ': '))
        PySight_settings.logger.debug("Number of iocs: %s answer is: %s",
                                      len(json_return_data_cleaned['message']),
                                      json_return_data_cleaned)

        if not json_return_data_cleaned['success']:
            PySight_settings.logger.error(
                "Error with iSight connection %s",
                json_return_data_cleaned['message']['description'])
            PySight_settings.logger.error(json_return_data_cleaned)
            return False
        else:
            import time
            timestring = time.strftime("%Y%m%d-%H%M%S")
            f = open("debug/" + timestring, 'w')
            f.write(
                json.dumps(json_return_data_cleaned,
                           sort_keys=True,
                           indent=6,
                           separators=(',', ': ')))
            f.close()

            return json_return_data_cleaned
    except:
        print("Unexpected error: %s", sys.exc_info())
        return False
    def get_terms(self,
                  term,
                  id,
                  id_string,
                  number_of_results,
                  is_match=False):

        # Make API call to get xml data #
        term = self.lemmatize(self.preprocess(term))

        # Proxy Code and Base Query #
        base_url = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/"
        second_url = "esummary.fcgi?db=medgen&db=medgen&{query}"
        final_url = base_url + second_url.format(db=self.ontology,
                                                 query="id=" + id_string)
        http = urllib3.PoolManager()
        http = ProxyManager("http://proxy.gtm.lilly.com:9000/")
        t.sleep(1)
        response = http.request('GET', final_url)
        soup = BeautifulSoup(response.data, 'lxml')

        # Get the separate hits in lists #
        hits = soup.find_all('documentsummary')

        # Dictionary to store the results #
        results = []

        # Set threshold, take the min of the threshold requested and the total number of search results #
        threshold = min(self.threshold, number_of_results)

        # For every hit (each hit represents data from ONE UID) #
        for hit in hits:

            # Keeps track of meeting the threshold #
            counter = 0

            # Check if return is a disease #
            check = "Blank" if not len(hit.find("semanticid")) else hit.find(
                "semanticid").text.strip()

            # List of acceptable semantic types #
            semantic_types = [
                'T191', 'T047', 'T048', 'T019', 'T190', 'T033', 'T049', 'T046',
                'T184', "Blank"
            ]

            # If term is a disease, execute the following: #
            if check in semantic_types:

                # Get Concept ID #
                concept_id = "Blank" if not len(hit.find(
                    'conceptid')) else hit.find('conceptid').text.strip()

                # Get Title #
                title = hit.find('title').text.strip()

                # Get name tags for looping #
                name_tags = hit.find_all('name')

                # Get definition/description #
                definition = hit.find('definition').text.strip()
                def_score = self.modified_jaccard_similarity(term, definition)

                # Get SAB, CODE, SCUI, SDUI, and Title #
                processed_term = self.stem(term)
                new_title = self.stem(self.lemmatize(self.preprocess(title)))

                # Keeps track of best scores for each uid #
                scores = []

                # Loop through synonyms #
                for data in name_tags:

                    # Get the max syn_score between a synonym and the title #
                    new_text = self.stem(
                        self.lemmatize(self.preprocess(data.text)))
                    syn_score = max(fuzz.ratio(new_text, processed_term),
                                    fuzz.ratio(processed_term, new_title))
                    syn_score = max(
                        fuzz.ratio(new_text, processed_term),
                        fuzz.ratio(processed_term, new_title)
                    ) if len(new_text.split()) == 1 and len(
                        new_title.split()) == 1 and len(processed_term.split(
                        )) == 1 else self.jaccard_similarity(
                            new_text, processed_term)

                    # If score is 100 or the term is one word, take the syn_score #
                    score = syn_score if len(
                        term.split()) == 1 or syn_score == 100 else max(
                            syn_score, def_score)

                    # Intialize dictionary to add to results #
                    value = dict()
                    code, sab, scui, sdui = None, None, None, None
                    index = hits.index(hit)

                    # Add Basic Data MetaData to Dictionary #
                    value['Disease_Input'] = term
                    value['Ontology'] = self.ontology
                    value['Synonym'] = data.text
                    value['Description'] = definition
                    value['Semantic_Type'] = check
                    value['UID'] = id[index]
                    value['Ontology_ID'] = concept_id
                    value['Final_Score'] = syn_score + def_score
                    value['Synonym_Score'] = syn_score
                    value['Description_Score'] = def_score
                    value['Title'] = title
                    value['Number_of_Results'] = number_of_results
                    value['Holder'] = score

                    # Add extra metadata that may throw errors and add to dictionary #
                    try:
                        code = data['code']
                        value['CODE'] = code
                    except:
                        value['CODE'] = np.nan
                    try:
                        sab = data['sab']
                        value['SAB'] = sab
                    except:
                        value['SAB'] = np.nan
                    try:
                        scui = data['scui']
                        value['SCUI'] = scui
                    except:
                        value['SCUI'] = np.nan
                    try:
                        sdui = data['sdui']
                        value['SDUI'] = sdui
                    except:
                        value['SDUI'] = np.nan

                    scores.append(value)

                # This code takes scores, (as it has metadata for only ONE uid) and finds the best match #
                # Get the best score, if scores has results (it maybe empty) #
                if scores:

                    # Gets the dictionary with the highest score and it's corresponding data #
                    best_score_data = max(scores,
                                          key=lambda x: x['Final_Score'])
                    best_score = best_score_data['Holder']
                    results.append(best_score_data)

                    # If best score is greater than or equal to the threshold, increase counter (a step closer to threshold) #
                    if best_score >= self.score_threshold or threshold == 1:
                        counter += 1

                    # If threshold is met, then return results #
                    if counter == threshold:
                        return results

        return results
示例#21
0
class MainWindow(QWebView):
        def __init__(self):
                super().__init__()
                self.f = open('proxi.txt', 'r')
                self.initUI()

        def initUI(self):
             #   self.showFullScreen()
                self.i = 0
                self.prox = 0
                self.a = 0
                self.setWindowTitle('Safari')
            #    QNetworkProxy.setApplicationProxy(QNetworkProxy(QNetworkProxy.HttpProxy, "173.192.21.89", 25))
             #   QNetworkProxy.setApplicationProxy(QNetworkProxy(QNetworkProxy.HttpProxy, "88.159.123.151", 80))
            #    QNetworkProxy.setApplicationProxy(QNetworkProxy(QNetworkProxy.HttpProxy, "138.197.137.90", 3128))
              #  QNetworkProxy.setApplicationProxy(QNetworkProxy(QNetworkProxy.HttpProxy, "94.177.175.232", 3128))
               # self.load(QUrl('https://www.youtube.com/watch?v=-oV8nNeLpjY&list=PLmahvFaEUuOmzu6t8rZ4SUz3ECAC2jQf5'))
               # self.load(QUrl('https://youtu.be/-KqdN1uScHc'))
                #self.load(QUrl('https://vk.com/video447929742_456239017'))
                self.load(QUrl('https://www.youtube.com'))
                self.show()
                self.loadFinished.connect(self.prow)
        def prow(self):
            if self.a == 0:
                self.a = 1
                self.fin()

        def fin(self):
            print('Вход в цикл программы')
            self.i = 0
            while self.i<500:
                time.sleep(0.1)
                QtWidgets.qApp.processEvents()
                self.i = self.i + 1


            if self.prox == 0:
                QNetworkProxy.setApplicationProxy(QNetworkProxy(QNetworkProxy.HttpProxy, "138.197.137.90", 3128))
            elif self.prox == 1:
                QNetworkProxy.setApplicationProxy(QNetworkProxy(QNetworkProxy.HttpProxy, "35.185.80.76", 3128))
            elif self.prox == 2:
                QNetworkProxy.setApplicationProxy(QNetworkProxy(QNetworkProxy.HttpProxy, "212.237.15.178", 80))
            elif self.prox == 3:
                QNetworkProxy.setApplicationProxy(QNetworkProxy(QNetworkProxy.HttpProxy, "104.236.27.71", 80))
            elif self.prox == 4:
                QNetworkProxy.setApplicationProxy(QNetworkProxy(QNetworkProxy.HttpProxy, "144.217.100.67", 80))
            elif self.prox == 5:
                QNetworkProxy.setApplicationProxy(QNetworkProxy(QNetworkProxy.HttpProxy, "94.177.175.232", 80))
            elif self.prox == 6:
                QNetworkProxy.setApplicationProxy(QNetworkProxy(QNetworkProxy.HttpProxy, "149.56.42.236", 80))
            elif self.prox == 7:
                QNetworkProxy.setApplicationProxy(QNetworkProxy(QNetworkProxy.HttpProxy, "35.185.80.76", 3128))
            elif self.prox == 8:
                QNetworkProxy.setApplicationProxy(QNetworkProxy(QNetworkProxy.HttpProxy, "162.243.140.150", 8000))
            elif self.prox == 9:
                QNetworkProxy.setApplicationProxy(QNetworkProxy(QNetworkProxy.HttpProxy, "177.67.84.157", 8080))
            elif self.prox == 10:
                QNetworkProxy.setApplicationProxy(QNetworkProxy(QNetworkProxy.HttpProxy, "192.208.184.134", 8080))
            elif self.prox == 11:
                QNetworkProxy.setApplicationProxy(QNetworkProxy(QNetworkProxy.HttpProxy, "151.80.152.121", 8080))
            elif self.prox == 12:
                QNetworkProxy.setApplicationProxy(QNetworkProxy(QNetworkProxy.HttpProxy, "212.237.15.178", 8080))
            else:
                self.prox = -1
            print('Использованное proxi -', self.prox)
            print('Загрузка сайта')
            self.a = 0
            self.load(QUrl('https://youtu.be/-KqdN1uScHc'))
            self.prox = self.prox + 1
            print('Загрузился типо')

                #       def mouseReleaseEvent(self, e):
 #           print("xlixk")

                
        def finished(self):
                self.fin = self.fin + 1
                print("Load Finish")
                self.loading()

        def loading(self):
            QtWidgets.qApp.processEvents()
            if (self.fin > 0):
                self.fin = 0
                self.newsite()


        def newsite(self):
            while not self.request():
                QtWidgets.qApp.processEvents()
                print('Proxy bad')
            QtWidgets.qApp.processEvents()
            QNetworkProxy.setApplicationProxy(QNetworkProxy(QNetworkProxy.HttpProxy, self.proxi_ip, self.proxi_port))
            self.load(QUrl('https://vk.com/id447929742?z=video447929742_456239017%2F2819f4855e1e422801%2Fpl_wall_447929742'))
            return True


        def request(self):
            QtWidgets.qApp.processEvents()
            self.proxi()
            print(self.stroka2)
            self.prm = ProxyManager(str(self.stroka2))
            print(self.stroka2)
            try:
                QtWidgets.qApp.processEvents()
                r = self.prm.request('GET', 'https://www.yandex.ru/')
            except:
                return False
            return True

        def proxi(self):
            self.stroka = self.f.readline()
            self.stroka2 = 'http://' + self.stroka
            self.stroka2 = self.stroka2[:len(self.stroka2)-1]
            print(self.stroka)
            QtWidgets.qApp.processEvents()
            self.massiv = self.stroka.split(':')
            self.proxi_ip = str(self.massiv[0])
            self.proxi_port = int((self.massiv[1])[:len(self.massiv[1]) - 1])
            QtWidgets.qApp.processEvents()
            print(self.proxi_ip)
            print(self.proxi_port)