Пример #1
0
def startLoop(startNumber, mode, location):
    #START_NUMBER = 107200
    START_NUMBER = startNumber
    STOP_NUMBER = START_NUMBER + 10000
    STEP_NUMBER = 10

    #proxies = startPrivateProxy()
    proxyPath = 'D:/Proxy/Filter/good_proxy_list.txt'
    myProxy = ProxyEngine(proxyPath)
    myProxy.loadData()

    for i in range(START_NUMBER, STOP_NUMBER, STEP_NUMBER):
        user_agent = getRamdomUserAgent()
        headers = {'User-Agent': user_agent}
        proxyOK = False
        proxies = {}

        while not proxyOK:
            proxies = myProxy.nextProxy()
            try:
                urlTest = "http://icanhazip.com"
                resTest = requests.get(urlTest,
                                       proxies=proxies,
                                       headers=headers)
                print('using IP:', resTest.text)
                proxyOK = True

            except:
                print('Error with proxy:', proxies)

        if proxies:
            runDownLoad(i, proxies, headers, mode, location)
            time.sleep(15)
Пример #2
0
def main(startNumber, mode, location):
    #START_NUMBER = 107200
    START_NUMBER = startNumber
    STOP_NUMBER = START_NUMBER + 50000
    STEP_NUMBER = 100

    proxies = startPrivateProxy()

    for i in range(START_NUMBER, STOP_NUMBER, STEP_NUMBER):
        user_agent = getRamdomUserAgent()
        headers = {'User-Agent': user_agent}
        RunCode(i, proxies, headers, mode, location)
        time.sleep(10)
Пример #3
0
def startLoop(startNumber, mode, location):
	#START_NUMBER = 107200
	START_NUMBER = startNumber 
	STOP_NUMBER	 = START_NUMBER + 10000
	STEP_NUMBER = 10

	proxies = startPrivateProxy()

	for i in range(START_NUMBER, STOP_NUMBER, STEP_NUMBER):		
		user_agent = getRamdomUserAgent()		
		headers = {'User-Agent': user_agent}
		runDownLoad(i, proxies, headers, mode, location)
		time.sleep(15)
Пример #4
0
def verifyProxy(proxyList, outPath):

    statList = []

    dateStamp = sysHand.getDateStamp()

    statList.append('Start verifying proxy at ' + dateStamp)

    file = open(outPath, 'a', encoding='utf-8')

    for item in proxyList:
        user_agent = getRamdomUserAgent()
        headers = {'User-Agent': user_agent}
        proxies = {}

        proxies['http'] = item
        proxies['https'] = item

        print('Verifying:', item)
        print('using agent:', headers['User-Agent'])
        print('using proxy:', proxies)
        try:
            urlTest = "http://icanhazip.com"
            resTest = requests.get(urlTest, proxies=proxies, headers=headers)
            print('using IP:', resTest.text)
            file.write(str(item) + '\n')
            statList.append('Sucessfully verified IP ' + str(item))
            time.sleep(2)

        except urllib3.exceptions.ConnectTimeoutError:
            statList.append('Connection Time Out Error with IP ' + str(item))
            print('Connection Time Out Error with IP ' + str(item))
        except urllib3.exceptions.ConnectionError:
            print('Connection Error with IP ' + str(item))
            statList.append('Connection Error with IP ' + str(item))
        except urllib3.exceptions.MaxRetryError:
            print('Max Retry Error with IP ' + str(item))
            statList.append('Max Retry Error with IP ' + str(item))

        except Exception as e:
            statList.append('Error verifying IP ' + str(item))
            print(e)

    file.close()

    dateStamp = sysHand.getDateStamp()

    statList.append('Finish verifying proxy at ' + dateStamp)

    return statList
Пример #5
0
def main(dirOut):
    WORD = "z"
    START_NUMBER = 2
    STOP_NUMBER = 3
    STEP_NUMBER = 1

    proxies = startPrivateProxy()

    for i in range(START_NUMBER, STOP_NUMBER, STEP_NUMBER):
        print('processing list ', WORD, i)
        user_agent = getRamdomUserAgent()
        headers = {'User-Agent': user_agent}
        INDEX = str(i)
        pathOut = dirOut + '/list' + WORD + INDEX + ".txt"
        htmlContent = getLexico(WORD, INDEX, proxies, headers)
        wordData = processLexicoList(htmlContent, WORD)
        writeListToFile(wordData, pathOut)
        time.sleep(10)
Пример #6
0
def startLoop(startNumber, mode, location):
	#START_NUMBER = 107200
	START_NUMBER = startNumber 
	STOP_NUMBER	 = START_NUMBER + 10000
	STEP_NUMBER = 10

	#pathList = 'D:/Proxy/List/raw_text_rudnkh.txt'
	#pathList = 'D:/Proxy/List/raw_text_clarketm.txt'
	

	#PATH TO PROXY LIST
	#pathList = 'D:/Proxy/Filter/good_proxy_list.txt'
	pathList = 'D:/Proxy/List/raw_list_general.txt'
	myProxy  = ProxyEngine (pathList)
	myProxy.loadData()



	pathDict = 'E:/FULLTEXT/DICTIONARY/SPECIALTY/British_standards.txt'

	myDict = DictEngine(pathDict, START_NUMBER)
	myDict.loadData()





	for i in range(START_NUMBER, STOP_NUMBER, STEP_NUMBER):		
		user_agent = getRamdomUserAgent()		
		headers = {'User-Agent': user_agent}
		proxies = {}
		proxyOK = False
		while not proxyOK:
			try:
				proxies = myProxy.nextProxy()
				badIP = proxies['http']
				print('Attemping on', badIP, '...')
				urlTest = "http://icanhazip.com"
				resTest = requests.get(urlTest, proxies=proxies, headers=headers)
				#print('using IP:', resTest.text)
				proxyOK = True
			except Exception as e:
				print('Test failed on icanhazip.com')

		if proxies:
			print(proxies)
			nextWord = myDict.nextWord()
			print('nextWord', nextWord)

			#url = 'https://www.dictionary.com/browse/' + nextWord
			#https://dictionary.cambridge.org/dictionary/english/fancy

			url = 'https://dictionary.cambridge.org/dictionary/english/' + nextWord


			try:
				response = requests.get(url, proxies=proxies, headers=headers)
				if response.status_code == 200:
					pathLog = 'D:/Proxy/Filter/good_proxy_list.txt'
					goodIP = proxies['http']
					print('Discoved good proxy:', goodIP)
					with open(pathLog, "a") as myfile:
						myfile.write(goodIP + '\n')


			except Exception as e:
				#print (e)
				print('Test failed on dictionary.cambridge.org')
			
			#runDownLoad(i, proxies, headers, mode, location)
			time.sleep(3)
Пример #7
0
    if (response.status_code == DATA_STATUS_OK):
        if (response.content):
            try:
                soup = BeautifulSoup(response.content, 'lxml')
                #statusMessage = "Successfully get the word: " + word

                #print(data)
                return (str(soup))
            except:
                #statusMessage = "An exception occurred while getting " + word
                return (None)


if __name__ == "__main__":

    WORD = "0"
    INDEX = "3"
    dirOut = "E:/FULLTEXT/LEXICO/LIST/HTML"
    pathOut = dirOut + '/list' + WORD + INDEX + ".html"

    proxies = startPrivateProxy()

    user_agent = getRamdomUserAgent()
    headers = {'User-Agent': user_agent}

    htmlContent = getLexico(WORD, INDEX, proxies, headers)
    if (htmlContent):
        with open(pathOut, "w", encoding='utf-8') as file:
            file.write(htmlContent)
    openDir(dirOut)