def read_records_to_list(txt_file, encoding='utf-8', file_not_found_error=False, exit_all=True): records = [] message = '' error_text = '' error_def = False try: file = open(txt_file, 'r', encoding=encoding) # latin-1 | utf-8 for line in file: line = line.replace('\n', '') records.append(line) file.close() except FileNotFoundError: if file_not_found_error: error_def = True message = "--> File coulnd't be found in folder. --> '%s'" % txt_file error_text = None except Exception as e: error_def = True message = '--> An error occurred while reading file. --> "%s"' % txt_file error_text = e if error_def: Progress.exit_app(message=message, e=error_text, exit_all=exit_all) return records
def check_run(program_code, program='', reload_time=30, sound_error=True): # This def checks API and get the value of 'program_code'. # if program_code is True, def returns # else, def stucks untill program_code comes as True. message_pasted = False length_of_last_message = 0 length_of_last_message_MAX = 0 while True: try: run = connect_api( code=program_code, program=program ) # Mostly returns True or False Boolean up to what you set on API if run != True: # run only if "run" is True. run = None except Exception as e: run = None if run: if message_pasted: # If there was "False calistir" and 'count down' printed, # paste space as much as pasted text before. # Because after pyinstaller execution, print flush prints in bad view. message = 'Program is working now.' length_of_new_message = len(message) if length_of_last_message - length_of_new_message > 0: message = message + ' ' * (length_of_last_message_MAX - length_of_new_message) print("\r%s" % message, flush=True, end="") print() print('-' * 40) print() # Continue to run main program (RETURN DEF) break else: if not message_pasted: print() print('-' * 10) print() if sound_error: Progress.sound_notify() message_pasted = True now = time.time() time.sleep(0.01) message = '' while time.time() - now < reload_time: remaining_time = int(reload_time - (time.time() - now)) net_time_string = Progress.time_definition(remaining_time) message = '--> An error occurred while running program. Trying again in %s.' % net_time_string print("\r%s" % message, flush=True, end="") time.sleep(1) length_of_last_message = len(message) if length_of_last_message_MAX < length_of_last_message: length_of_last_message_MAX = length_of_last_message
def create_folder(folder_name, path='./', exit_all=True): folder_name = windows_folder_name(folder_name) path = path + folder_name try: if not os.path.exists(path): os.mkdir(path) return True except Exception as e: message = '--> An error occurred while creating folder. Please try again with running program as administrator or create folder by yourself.\n' \ 'Folder Name: %s' % folder_name Progress.exit_app(message=message, e=e, exit_all=exit_all) return False
def save_records_list(txt_file, records_list, overwrite=False, exit_all=True): try: if overwrite: file = open(txt_file, 'w', encoding='utf-8') else: file = open(txt_file, 'a', encoding='utf-8') for record in records_list: file.write('%s\n' % record) file.close() except Exception as e: message = '--> An error occurred while creating file. --> "%s"' % txt_file Progress.exit_app(message=message, e=e, exit_all=exit_all)
def turn_off_all_alerts(browser, accept=True, show_error=False, sound_for_error=False, exit_all=False): # options.add_argument("--disable-popup-blocking") # This is argument for selenium browser to block everything try: alert = browser.switch_to.alert if accept: alert.accept() else: alert.dismiss() except Exception as e: if sound_for_error: Progress.sound_notify() if show_error: Progress.exit_app(e=e, exit_all=exit_all)
def save_dict_with_pprint_pformat(file, dict_as_string, exit_all=False): if isinstance(dict_as_string, str): try: file_name, file_extension = os.path.splitext(file) if file_extension != '.txt': file = file_name + '.txt' dict_as_string_list = dict_as_string.split('\n') save_records_list(file, dict_as_string_list, overwrite=True, exit_all=exit_all) except Exception as e: message = '--> An error occurred while creating file. --> "%s"' % file Progress.exit_app(message=message, e=e, exit_all=exit_all)
def scroll_div(num): xpath_scroll_elem = '//div[contains(@role, "dialog")]//div/ul/..' class_scroll = browser.find_element_by_xpath( xpath_scroll_elem).get_attribute('class') jscommand = """ followers = document.querySelector(".%s"); followers.scrollTo(0, followers.scrollHeight); var lenOfPage=followers.scrollHeight; return lenOfPage; """ % class_scroll lenOfPage = browser.execute_script(jscommand) match = False now = time.time() time.sleep(0.01) count = 0 fetched = False while (match == False): lastCount = lenOfPage time.sleep(1) lenOfPage = browser.execute_script(jscommand) num_current_follows = len( browser.find_elements_by_xpath( "//a[contains(@class,'notranslate')]")) if lastCount == lenOfPage: if num > num_current_follows: fetched = False else: match = True else: fetched = True if fetched: count = 0 else: count += 1 Progress.progress(count=num_current_follows, total=num, now=now, message='Fetching...') if count > 15: print( '\n--> It has been too long time that program could not fetch new data. Now will ignore and continue to process.' ) match = True
def save_records_data(txt_file, val_list, message='File updating...', exit_all=True): # This def is for saving data with columns like excel but into the txt file try: print(message) file = open(txt_file, 'a', encoding='utf-8') for val in val_list: file.write(str(val)) file.write('\n') file.write('-' * 20) file.write('\n') file.write('-' * 40) file.write('\n') file.close() print('File saved.') except Exception as e: message = '--> An error occurred while saving file.' Progress.exit_app(message=message, e=e, exit_all=exit_all)
def dump_data(file, data, exit_all=False): try: if isinstance(data, dict) or isinstance(data, list): file_name, file_extension = os.path.splitext(file) if file_extension != '.pickle': file = file_name + '.pickle' pickle_out = open(file, "wb") pickle.dump(data, pickle_out) pickle_out.close() # elif isinstance(data, list): # file_name, file_extension = os.path.splitext(file) # if file_extension != '.npy': # file = file_name + '.npy' # # np.save(file, data) else: message = "--> Data type is not acceptable. Data type only can be a 'list' or 'dict'." Progress.exit_app(message=message, exit_all=exit_all) except Exception as e: message = '--> An error occurred while creating file. --> "%s"' % file Progress.exit_app(message=message, e=e, exit_all=exit_all)
def timestamp_def(seperate=False, exit_all=True, alternative='timestamp_error', with_space=False): try: now_date = datetime.now() date = now_date.strftime("%Y.%m.%d") hour = now_date.strftime("%H.%M.%S") if with_space: now = date + ' - ' + hour else: now = date + '-' + hour except Exception as e: message = "--> An error occurred while creating timestamp." Progress.exit_app(message=message, e=e, exit_all=exit_all) now = alternative date = alternative hour = alternative if seperate: return date, hour else: return now
def read_dumped_data(file, data_type=dict, file_not_found_error=False, exit_all=True): result = None try: if data_type == dict: file_name, file_extension = os.path.splitext(file) if file_extension != '.pickle': file = file_name + '.pickle' if not os.path.exists(file): if file_not_found_error: message = "! ! File couldn't be found in folder. --> '%s'" % ( file) Progress.exit_app(message=message, exit_all=exit_all) return None pickle_in = open(file, "rb") result = pickle.load(pickle_in) elif data_type == list: file_name, file_extension = os.path.splitext(file) if file_extension != '.npy': file = file_name + '.npy' if not os.path.exists(file): if file_not_found_error: message = "! ! File couldn't be found in folder. --> '%s'" % ( file) Progress.exit_app(message=message, exit_all=exit_all) return None result = np.load(file).tolist() else: message = "--> File extension is not acceptable. File extension only can be '.pickle' or '.npy'." Progress.exit_app(message=message, exit_all=exit_all) except Exception as e: message = '--> An error occurred while reading file. --> "%s"' % file Progress.exit_app(e=e, message=message, exit_all=exit_all) return result
def captcha_solve(browser, cost_file='costs.txt', ANTICAPTCHA_KEY=None, save_cost=True, captcha_sound=True, domain=None): if not ANTICAPTCHA_KEY: ANTICAPTCHA_KEY = os.getenv('ANTICAPTCHA_KEY') if not domain: domain = browser.current_url xpath = '//*[@id = "g-recaptcha-response"]' try: browser.find_element_by_xpath(xpath) # Captcha found in page exist_captcha = True except: # Captcha CAN NOT found in page exist_captcha = False user_answer = None cost = 0 start_time = time.time() if exist_captcha: if captcha_sound: Progress.sound_notify_times(times=1) print('--> reCAPTCHA solving. It might take some time, please wait...') key = '' try: SITE_KEY = None try: # TRY normal captcha box xpath = '//*[contains(@class,"g-recaptcha")]' captcha_box = browser.find_element_by_xpath(xpath) SITE_KEY = captcha_box.get_attribute('data-sitekey') if not SITE_KEY: raise Exception except: # Normal captcha box COULD NOT BE FOUND. Find site key from new generation of reCAPTCHA xpath = '//iframe[contains(@role, "presentation")]' captcha_box = browser.find_element_by_xpath(xpath) captcha_src = captcha_box.get_attribute('src') if 'k=' in captcha_src and '&' in captcha_src: captcha_src_list = captcha_src.split('&') for i in captcha_src_list: if i.startswith('k='): SITE_KEY = i.replace('k=', '') break if not SITE_KEY: raise Exception user_answer = NoCaptchaTaskProxyless.NoCaptchaTaskProxyless( anticaptcha_key=ANTICAPTCHA_KEY).captcha_handler( websiteURL=domain, websiteKey=SITE_KEY) if 'errorDescription' in user_answer: raise Exception key = user_answer['solution']['gRecaptchaResponse'] try: cost = user_answer['cost'] cost = float(cost) except: cost = 0 # Code worked untill here so there is no error. error_captcha = False except Exception as e: error_captcha = True message = '--> An error occurred while solving reCAPTCHA. Processing is in progress.' if 'errorDescription' in user_answer: message_from_system = user_answer['errorDescription'] message = message + '\n' + str(message_from_system) Progress.exit_app(e=e, message=message, exit_all=False) if not error_captcha: if 'endTime' in user_answer and 'createTime' in user_answer: end_time = user_answer['endTime'] create_time = user_answer['createTime'] pass_time = end_time - create_time else: pass_time = time.time() - start_time print('\nCalculation time: %s' % Progress.time_definition(pass_time)) print('reCAPTCHA solved. Price: $%s' % cost) if save_cost: if cost != 0 and isinstance(cost, (int, float)): read_record = File.read_records_to_list(txt_file=cost_file, file_not_found_error=False, exit_all=False) try: balance = float(read_record[0]) except: balance = 0 balance += cost File.save_records_list(txt_file=cost_file, records_list=[balance], overwrite=True, exit_all=False) # ADD SOLUTION TO THE PAGE. try: browser.execute_script('document.getElementById("g-recaptcha-response").innerHTML = "%s"' % key) except: pass return exist_captcha
def read_records_data_to_dict(txt_file, show_progress=True, file_not_found_error=True, exit_all=True): # This def is for reading data with columns like excel but from plain text file read_dict = dict() try: total = 0 file_exist = True try: file = open(txt_file, 'r', encoding='utf-8') for line in file: total += 1 file.close() except FileNotFoundError: file_exist = False if file_not_found_error: message = "--> File coulnd't be found in folder. --> '%s'" % txt_file Progress.exit_app(message=message, exit_all=exit_all) if file_exist: count = 0 file = open(txt_file, 'r', encoding='utf-8') now = time.time() time.sleep(0.01) new_line = True key = 1 for line in file: if key not in read_dict.keys(): read_dict[key] = list() line = line[:-1] if show_progress: count += 1 Progress.progress( count=count, total=total, now=now, message='Reading records...', ) if line == '-' * 40: key += 1 new_line = True continue if line == '-' * 20: new_line = True continue if new_line: read_dict[key].append(line) else: read_dict[key][-1] = read_dict[key][-1] + line new_line = False if show_progress: print() file.close() for key in list(read_dict.keys()): if not len(read_dict[key]): del read_dict[key] except Exception as e: if show_progress: print() message = "--> An error occurred while reading file -> '%s'" % txt_file Progress.exit_app(e=e, message=message, exit_all=exit_all) return read_dict
def internet_connection(timeout=4, reload_time=30, wait_for_network=True, sound_error=True): user_agent = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/74.0.3729.108 Safari/537.36' header = {"User-Agent": user_agent} url_list = [ # 'https://api.myip.com/', # 'https://api.ipify.org/', 'https://www.yahoo.com/', 'https://www.bing.com/', 'https://www.google.com/', 'https://www.amazon.com/', 'https://www.amazon.com.tr/', 'https://www.microsoft.com/', 'https://www.apple.com/', ] message_pasted = False length_of_last_message = 0 length_of_last_message_MAX = 0 while True: try: url = random.choice(url_list) network = True response = requests.get(url, timeout=timeout, headers=header) if response.status_code != 200: raise Exception except: # There is no internet connection. network = False if not wait_for_network and network: # if there is internet connection OR def overwritten to not wait for network, RETURN return network if network: if message_pasted: # If there was "False network" and 'count down' printed, # paste space as much as pasted text before. # Because after pyinstaller execution, print flush, prints in bad view. message = 'Connection established.' length_of_new_message = len(message) if length_of_last_message - length_of_new_message > 0: message = message + ' ' * (length_of_last_message_MAX - length_of_new_message) print("\r%s" % message, flush=True, end="") print() print() print('-' * 40) print() # Continue to run main program (RETURN DEF) break else: if not message_pasted: print() print('-' * 10) print() if sound_error: Progress.sound_notify() message_pasted = True print(url) # DEBUG now = time.time() time.sleep(0.01) message = '' while time.time() - now < reload_time: remaining_time = int(reload_time - (time.time() - now)) net_time_string = Progress.time_definition(remaining_time) message = '--> Error on internet connection. Trying again in %s.' % net_time_string print("\r%s" % message, flush=True, end="") time.sleep(1) length_of_last_message = len(message) if length_of_last_message_MAX < length_of_last_message: length_of_last_message_MAX = length_of_last_message
# In first loop, it will raise Exception and will create Browser first time. browser.current_url except Exception as e: options = webdriver.ChromeOptions() options.add_argument('user-agent={%s}' % user_agent) options.add_argument('--blink-settings=imagesEnabled=false' ) # Remove images from pages to open fast browser = webdriver.Chrome(options=options, executable_path=driver) count_reflesh = 0 while not login_succesful: count_reflesh += 1 if count_reflesh > 3: browser.quit() message = 'Program shutting down because of errors.' Progress.exit_app(message=message, exit_all=True) # Go to the link and check the xpath given if element present on the page. url = 'https://www.instagram.com/accounts/login/' xpath = '//div[@id = "react-root"]' Selenium.check_page(browser, url, xpath, 10) try: WebDriverWait(browser, 5).until( EC.presence_of_element_located((By.NAME, "username"))) except: message = 'Login form could not be found on page. Will load again.' Progress.exit_app(message=message, exit_all=False) continue element_username = browser.find_element_by_name("username")
def connect_api(https=True, domain=None, endpoint='api/external_program/', code='all', program='', inform_user_periodically=False, show_error=False, sound_error=False, exit_all=False, extra_data={}): if not domain: domain = os.getenv("domain") start = time.time() time.sleep(0.01) x = 0 db_settings_dict = {} if https: url_first = 'https' else: url_first = 'http' user_agent = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/74.0.3729.108 Safari/537.36' while True: try: x += 1 url = '%s://%s/%s' % (url_first, domain, endpoint) # Define, if needed (User-Agent, Accept, Referer etc.) headers = { "User-Agent": user_agent, # 'accept': '*/*', # 'accept-encoding': 'gzip, deflate, br', # 'accept-language': 'en-US,en;q=0.9,tr;q=0.8,pl;q=0.7', } if endpoint == 'api/external_program/': data = { 'key': code, 'program': program, } data.update(extra_data) else: data = {} response = requests.request("GET", url, headers=headers, data=data, timeout=10) response.encoding = 'UTF-8' response = response.json() # My API returns a dictionary which have 'ayar' and 'parametre' in keys. if code == 'all': if endpoint == 'api/external_program/': for setting in response: parameter = setting['parametre'] parameter = String.from_string_to_type( parameter.lower, 'try_all') db_settings_dict[setting['ayar']] = parameter else: db_settings_dict = response return db_settings_dict else: if len(response) and response: response = String.from_string_to_type(response, 'try_all') return response except Exception as e: if inform_user_periodically: if x % 2 == 0: message = '\nAn error occurred while running, trying again...' print() print('-' * 40) print(message) print() if x >= 3: end = time.time() passed_time = end - start message = 'An error occurred while running program. Please try again.\n' \ '(Trying time: %s)' % Progress.time_definition(passed_time) if sound_error: Progress.sound_notify() if show_error: Progress.exit_app(message=message, exit_all=exit_all) print() else: if exit_all: Progress.exit_app(exit_all=exit_all) break
def get_proxy_orbit(selenium=True, get_random=True, count_loop=1, save_false_proxies=True, error_file='Recorded FALSE Orbit Proxies.txt', save_ok_proxies=True, ok_file='Recorded OK Orbit Proxies.txt', number_of_min_saved_proxies=7, number_of_save_proxies=15, run_test=True, test_header=None, test_url=None, test_timeout=1, sound_error=True, allow_print=True, no_proxy=True, for_https=True, API_KEY=''): # You can use this function with whether count_loop or get_random. # count_loop helps you to run it in while with using count_loop+=1 and you can receive proxies 1 by 1 in lines of proxy file. # if get_random set True, you get proxy randomly from proxy file without looking count_loop. if allow_print: print() print('--> Proxy scraper is started.') print() if get_random: count_loop = random.randint(1, 101) if number_of_save_proxies < 2: number_of_save_proxies = 2 # Proxy Orbit send as list when count is more than 1 user_agent = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/74.0.3729.108 Safari/537.36' header = {"User-Agent": user_agent} url = '' if run_test: if not test_header: test_header = header if not test_url: url = 'https://api.myip.com/' # url = 'https://api.ipify.org/' else: url = test_url if for_https: if url.lower().startswith('http://'): url = url.lower().replace('http://', 'https://', 1) else: if url.lower().startswith('https://'): url = url.lower().replace('https://', 'http://', 1) proxy_decide = '' again = True curl = '' while again: url_proxy = "http://api.proxyorbit.com/v1/" url_proxy += '?ssl=%s' % str(for_https).lower() url_proxy += '&protocols=http' url_proxy += '&count=%s' % number_of_save_proxies url_proxy += '&token=%s' % API_KEY check_internet = True # will use this to check internet connection without proxy only once. count_loop += 1 again = False # will leave while unless again defined True if count_loop % 10 == 0 and no_proxy: # def will return NON-PROXY each 10 times if allow_print: print('Default proxy settings setted.') if selenium: proxy_decide = '--no-proxy-server' else: proxy_decide = {} else: # def will return NON-PROXY each 10 times error_ip_list = File.read_records_to_list( error_file, file_not_found_error=False, exit_all=False) ok_ip_list = File.read_records_to_list(ok_file, file_not_found_error=False, exit_all=False) ok_ip_save_list = [] for i in ok_ip_list: ok_ip_save_list.append(i) if ((len(ok_ip_list) < number_of_min_saved_proxies) and save_ok_proxies): # if number saved proxies to the file less than minimum required number of proxies, # will crawl more new proxies. check_internet = False # internet connection checked, so will not check it in next commands. internet_connection(timeout=4, reload_time=30, wait_for_network=True, sound_error=sound_error) try: message = 'Proxies are grabbed.' logging.log(logging.INFO, message + ' | URL: %s' % url_proxy) if allow_print: print(message) print() resp = requests.get(url_proxy) resp = resp.json() except: message = 'Error occurred while crawling new proxies.' logging.log(logging.ERROR, message) if allow_print: print('\n--> ' + message) again = True count_loop -= 1 Progress.count_down(30) # continue to loop until get the new proxies. continue curls = [] for pr in resp: curls.append(pr['curl']) count_ip = 0 for curl in curls: if save_false_proxies: # check if new proxy is not one of the proxies which doesn't work. if curl not in error_ip_list: if save_ok_proxies: if curl not in ok_ip_save_list: count_ip += 1 ok_ip_save_list.append(curl) else: count_ip += 1 ok_ip_save_list.append(curl) else: if save_ok_proxies: # add our new proxy to the list of all proxies to save this in our proxy file in the future. if curl not in ok_ip_save_list: count_ip += 1 ok_ip_save_list.append(curl) if save_ok_proxies: if len(ok_ip_save_list) > number_of_save_proxies: ok_ip_save_list = random.sample( ok_ip_save_list, number_of_save_proxies) File.save_records_list(ok_file, ok_ip_save_list, overwrite=True, exit_all=False) if not len(ok_ip_save_list): again = True message = "Proxy couldn't get. Trying again..." Progress.exit_app(message=message, exit_all=False) continue if get_random: curl = random.choice(ok_ip_save_list) else: # Remaining calculated to get a proxy from our list, from LAST to FIRST. remaining = count_loop % len(ok_ip_save_list) remaining = len(ok_ip_save_list) - remaining if remaining >= len(ok_ip_save_list): remaining = 0 curl = ok_ip_save_list[remaining] curl = curl.replace(' ', '') curl = curl.replace('\n', '') proxy_decide = {} if for_https: proxy_decide['https'] = curl else: proxy_decide['http'] = curl if save_false_proxies: if curl in error_ip_list: File.write_ok_and_false_proxy(curl, error_file=error_file, ok_file=ok_file) again = True continue if run_test: try: if check_internet: internet_connection(timeout=test_timeout, reload_time=30, wait_for_network=True, sound_error=sound_error) response = requests.get(url, proxies=proxy_decide, timeout=test_timeout, stream=True, headers=test_header) if test_url: # if any url overwritten on def, just check the status code. if response.status_code != 200: raise Exception else: response = response.json() if response == {}: raise Exception if save_ok_proxies: File.save_records_list(ok_file, ok_ip_save_list, overwrite=True, exit_all=False) except Exception as e: message = "Proxy doesn't work. Next proxy is testing...\n" \ "Proxy: %s" % (curl) if not get_random: message += '\tProxy Number: %s' % remaining if allow_print: print(message) print() again = True if save_false_proxies: File.write_ok_and_false_proxy(curl, error_file=error_file, ok_file=ok_file) continue ip_port = curl.split('//')[-1] if selenium: proxy_decide = '--proxy-server=%s' % (ip_port) else: # proxy_decide defined above. pass if allow_print: if get_random: print("Proxy activated.\nProxy: %s" % (curl)) else: print("Proxy activated. Proxy Number: %s.\nProxy: %s" % (count_loop, curl)) if allow_print: print() if get_random: return proxy_decide, curl else: return count_loop, proxy_decide, curl
def get_proxy(selenium=True, get_random=True, count_loop=1, save_false_proxies=True, error_file='Recorded FALSE Proxies.txt', save_ok_proxies=True, ok_file='Recorded OK Proxies.txt', number_of_min_saved_proxies=20, number_of_save_proxies=40, run_test=True, test_header=None, test_url=None, test_timeout=1, sound_error=True, allow_print=True, no_proxy=True, for_https=True): # You can use this function with whether count_loop or get_random. # count_loop helps you to run it in while with using count_loop+=1 and you can receive proxies 1 by 1 in lines of proxy file. # if get_random set True, you get proxy randomly from proxy file without looking count_loop. if get_random: count_loop = random.randint(1, 101) user_agent = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/74.0.3729.108 Safari/537.36' header = {"User-Agent": user_agent} if run_test: if not test_header: test_header = header if not test_url: url = 'https://api.myip.com/' # url = 'https://api.ipify.org/' else: url = test_url if for_https: if url.lower().startswith('http://'): url = url.lower().replace('http://', 'https://', 1) else: if url.lower().startswith('https://'): url = url.lower().replace('https://', 'http://', 1) proxy_decide = '' url_proxies = [ 'https://hidemy.name/tr/proxy-list/?type=s#list', 'https://www.us-proxy.org/', 'https://www.sslproxies.org/', ] again = True force_scrap = False while again: random_proxy = random.randint(0, len(url_proxies) - 1) url_proxy = url_proxies[random_proxy] check_internet = True # will use this to check internet connection without proxy only once. count_loop += 1 again = False # will leave while unless again defined True if count_loop % 10 == 0 and no_proxy: # def will return NON-PROXY each 10 times if allow_print: print('Default proxy settings setted.') if selenium: proxy_decide = '--no-proxy-server' else: proxy_decide = {} record_ip_type = '' else: # def will return NON-PROXY each 10 times error_ip_list = File.read_records_to_list( error_file, file_not_found_error=False, exit_all=False) ok_ip_list = File.read_records_to_list(ok_file, file_not_found_error=False, exit_all=False) ok_ip_save_list = [] for i in ok_ip_list: ok_ip_save_list.append(i) if ((len(ok_ip_list) < number_of_min_saved_proxies) and save_ok_proxies) or force_scrap: # if number saved proxies to the file less than minimum required number of proxies, # will crawl more new proxies. check_internet = False # internet connection checked, so will not check it in next commands. internet_connection(timeout=4, reload_time=30, wait_for_network=True, sound_error=sound_error) try: print('Proxies are grabbed:') print(url_proxy) page = requests.get( url_proxy, headers=header, timeout=4, ) except: message = 'Error occurred while crawling new proxies.' logging.log(logging.ERROR, message) if allow_print: print('\n--> ' + message) again = True count_loop -= 1 # continue to loop until get the new proxies. continue tree = html.fromstring(page.content) if 'hidemy.name' in url_proxy: ips = tree.xpath( '//div[@class = "table_block"]/table//tbody/tr/td[1]' ) # list of all ips ports = tree.xpath( '//div[@class = "table_block"]/table//tbody/tr/td[2]' ) # list of all ports types = tree.xpath( '//div[@class = "table_block"]/table//tbody/tr/td[5]' ) # list of all types elif 'us-proxy' in url_proxy: ips = tree.xpath( '//div[contains(@class, "fpl-list")]//table//tr/td[1]' ) # list of all ips ports = tree.xpath( '//div[contains(@class, "fpl-list")]//table//tr/td[2]' ) # list of all ports types = tree.xpath( '//div[contains(@class, "fpl-list")]//table//tr/td[7]' ) # list of all types elif 'sslproxies' in url_proxy: ips = tree.xpath( '//div[contains(@class, "fpl-list")]//table//tr/td[1]' ) # list of all ips ports = tree.xpath( '//div[contains(@class, "fpl-list")]//table//tr/td[2]' ) # list of all ports types = tree.xpath( '//div[contains(@class, "fpl-list")]//table//tr/td[7]' ) # list of all types count_ip = 0 if len(ips) == len(ports) == len(types): pass else: again = True count_loop -= 1 # continue to loop untill get the new proxies. continue for ip, port, type in zip(ips, ports, types): # if ip.text and port.text and type.text: # pass # else: # continue add_type = '' if 'hidemy.name' in url_proxy: if 'http' in type.text.lower(): if add_type: add_type += ',' add_type += 'HTTP' if 'https' in type.text.lower(): if add_type: add_type += ',' add_type += 'HTTPS' elif 'us-proxy' in url_proxy: add_type += 'HTTP' if 'yes' in type.text.lower(): add_type += ',HTTPS' elif 'sslproxies' in url_proxy: add_type += 'HTTP' if 'yes' in type.text.lower(): add_type += ',HTTPS' if add_type: add_type = ',' + add_type else: continue # NO HTTP OR HTTPS PROXY (Such as Socks4, Socks5 proxy) ip_from_page = ip.text.replace(' ', '') add_ip_type = '%s:%s%s' % (ip_from_page, port.text, add_type) try: ipaddress.ip_address(ip_from_page) except: continue if save_false_proxies: # check if new proxy is not one of the proxies which doesn't work. if add_ip_type not in error_ip_list: if save_ok_proxies: if add_ip_type not in ok_ip_save_list: count_ip += 1 ok_ip_save_list.append(add_ip_type) else: count_ip += 1 ok_ip_save_list.append(add_ip_type) else: if save_ok_proxies: # add our new proxy to the list of all proxies to save this in our proxy file in the future. if add_ip_type not in ok_ip_save_list: count_ip += 1 ok_ip_save_list.append(add_ip_type) if save_ok_proxies: if len(ok_ip_save_list) > number_of_save_proxies: ok_ip_save_list_new = random.sample( ok_ip_save_list, number_of_save_proxies) for elem in ok_ip_save_list: elem_types = elem.split(',')[1:] if 'HTTPS' in elem_types: if elem not in ok_ip_save_list_new: ok_ip_save_list_new.append(elem) ok_ip_save_list = ok_ip_save_list_new File.save_records_list(ok_file, ok_ip_save_list, overwrite=True, exit_all=False) http_count = 0 https_count = 0 for elem in ok_ip_save_list: elem_types = elem.split(',')[1:] if 'HTTP' in elem_types: http_count += 1 if 'HTTPS' in elem_types: https_count += 1 if https_count <= 3 or http_count <= 3: again = True force_scrap = True count_loop -= 1 continue else: force_scrap = False if not len(ok_ip_save_list): again = True message = "Proxy couldn't get. Trying again..." Progress.exit_app(message=message, exit_all=False) continue if get_random: record_ip_type = random.choice(ok_ip_save_list) else: # Remaining calculated to get a proxy from our list, from LAST to FIRST. remaining = count_loop % len(ok_ip_save_list) remaining = len(ok_ip_save_list) - remaining if remaining >= len(ok_ip_save_list): remaining = 0 record_ip_type = ok_ip_save_list[remaining] record_ip_type = record_ip_type.replace(' ', '') record_ip_type = record_ip_type.replace('\n', '') record_ip = record_ip_type.split(',', 1)[0] record_types = record_ip_type.split(',')[1:] record_ip_list = record_ip.split(':') if len(record_ip_list) != 2: message = "Proxy doesn't work. Next proxy is testing...\n" \ "IP-Port: %s" % record_ip if not get_random: message += "\tProxy Number: %s" % (remaining) if allow_print: print(message) print() again = True if save_false_proxies: File.write_ok_and_false_proxy(record_ip_type, error_file=error_file, ok_file=ok_file) continue if for_https: if 'HTTPS' not in record_types: again = True continue else: if 'HTTP' not in record_types: again = True continue ip = record_ip_list[0] port = record_ip_list[1] proxy_decide = {} for type in record_types: proxy_decide[type.lower()] = "http://%s" % record_ip if save_false_proxies: if record_ip_type in error_ip_list: File.write_ok_and_false_proxy(record_ip_type, error_file=error_file, ok_file=ok_file) again = True continue if run_test: try: if check_internet: internet_connection(timeout=test_timeout, reload_time=30, wait_for_network=True, sound_error=sound_error) response = requests.get(url, proxies=proxy_decide, timeout=test_timeout, stream=True, headers=test_header) if test_url: # if any url overwritten on def, just check the status code. if response.status_code != 200: raise Exception else: response = response.json() if response == {}: raise Exception if save_ok_proxies: File.save_records_list(ok_file, ok_ip_save_list, overwrite=True, exit_all=False) except Exception as e: message = "Proxy doesn't work. Next proxy is testing...\n" \ "IP: %s\tPort: %s" % (ip, port) if not get_random: message += '\tProxy Number: %s' % remaining if allow_print: print(message) print() again = True if save_false_proxies: File.write_ok_and_false_proxy(record_ip_type, error_file=error_file, ok_file=ok_file) continue if selenium: proxy_decide = '--proxy-server=%s:%s' % (ip, port) else: # proxy_decide defined above. pass if allow_print: if get_random: print("Proxy activated.\nIP: %s\tPort: %s" % (ip, port)) else: print( "Proxy activated. Proxy Number: %s.\nIP: %s\tPort: %s" % (count_loop, ip, port)) if allow_print: print() if get_random: return proxy_decide, record_ip_type else: return count_loop, proxy_decide, record_ip_type
def send_email(message, subject, recipients, attach_file_name=None, attach_file_text=None, login_mail=None, pwd=None, sender='Email Sender', sound_error=True, show_error=True, exit_all=False, debug_mode=0): if not login_mail: login_mail = os.getenv('login_mail') if not pwd: pwd = os.getenv('pwd') try: msg = MIMEMultipart() msg['Subject'] = subject msg['From'] = sender recipient = ", ".join(recipients) msg['To'] = recipient msg.attach(MIMEText(message)) if attach_file_name and attach_file_text: f = StringIO() # write some content to 'f' f.write(attach_file_text) f.seek(0) attach = MIMEBase('application', "octet-stream") attach.set_payload(f.read()) encoders.encode_base64(attach) attach.add_header('Content-Disposition', 'attachment', filename=attach_file_name) msg.attach(attach) server = smtplib.SMTP('smtp.gmail.com', 587) server.set_debuglevel(debug_mode) # Prints all process if debug == 1 server.ehlo() server.starttls() server.ehlo() server.login(login_mail, pwd) # Send the email server.sendmail(sender, recipients, msg.as_string()) server.close() return True except Exception as e: if sound_error: Progress.sound_notify() if show_error: message = '--> An error occurred while sending email.' Progress.exit_app(e=e, message=message, exit_all=exit_all) else: Progress.exit_app(e=e, exit_all=exit_all) return False
def excel_read_to_dict(excel, number_of_sheet=0, exit_all=False): all_data = dict() headers = dict() try: # Check and add xlsx or xls if there is not at the end. file_name, file_extension = os.path.splitext(excel) if file_extension != '.xlsx' or file_extension != '.xls': excel = file_name + '.xlsx' # check all versions of the file name if it is exist in directory. # (Checking with all lower and capital characters for excel name if it is equal any file.) excel = find_file(excel) if not os.path.exists(excel): # So given file name could not be found in directory with any combinations of capital and lower characters. excel2 = None # switch between xlsx and xls if file_extension != '.xlsx': excel2 = file_name + '.xls' elif file_extension != '.xls': excel2 = file_name + '.xlsx' if excel2: # if given file name is xlsx, it switched to xls in "excel2" # if given file name is xls, it switched to xlsx in "excel2" # and checking again... excel2 = find_file(excel2) if not os.path.exists(excel2): message = "! ! File couldn't be found in folder. --> '%s' or '%s'" % ( excel, excel2) Progress.exit_app(message=message, exit_all=exit_all) return all_data, headers else: excel = excel2 else: message = "! ! File couldn't be found in folder. --> '%s'" % ( excel) Progress.exit_app(message=message, exit_all=exit_all) return all_data, headers workbook = xlrd.open_workbook(excel) # sheet sheet = workbook.sheet_by_index(number_of_sheet) # page number_of_column = sheet.ncols number_of_row = len(sheet.col(0)) count = 0 total = number_of_row now = time.time() message = 'Reading excel...' time.sleep(0.01) number_of_data = 0 number_of_header = 0 for y in range(number_of_row): key = sheet.cell_value(rowx=y, colx=0) try: key = int(key) except: pass # I only get integer keys which means excel rows which has integer at first cell. # This is for not getting header rows in my dictionary. # and I design my excels with ID column at first column. if isinstance(key, int): number_of_data += 1 all_data[number_of_data] = list() for x in range(number_of_column): val = sheet.cell_value(rowx=y, colx=x) val = String.float_to_integer(val, force_number=False) all_data[number_of_data].append(val) else: number_of_header += 1 headers[number_of_header] = list() for x in range(number_of_column): val = sheet.cell_value(rowx=y, colx=x) val = String.float_to_integer(val, force_number=False) headers[number_of_header].append(val) count += 1 Progress.progress( count=count, total=total, now=now, message=message, ) except PermissionError: message = "--> '%s' can't access to this file.\nIt is probably because the file is open. If this excel is open, please close it and re-run program." % excel Progress.exit_app(message=message, exit_all=exit_all) except Exception as e: message = "--> An error occurred while reading file... '%s'" % excel Progress.exit_app(e=e, message=message, exit_all=exit_all) print('\nNumber of item: %s' % len(all_data)) # it returns a dictionary from 3 rows excel file as: # all_data = { # 1: ['1st Column Value', '2nd Column Value', '3rd Column Value', '4th Column Value', '5th Column Value', ], # 2: ['1st Column Value', '2nd Column Value', '3rd Column Value', '4th Column Value', '5th Column Value', ], # 3: ['1st Column Value', '2nd Column Value', '3rd Column Value', '4th Column Value', '5th Column Value', ], # } return all_data, headers
def excel_create( excel, all_data, headers=None, sizes=None, locations=None, page_name='Page1', exit_all=False, ): if not headers: headers = list() # Check and add xlsx or xls if there is not at the end. file_name, file_extension = os.path.splitext(excel) if file_extension != '.xlsx' or file_extension != '.xls': excel = file_name + '.xlsx' try: message = "'%s' --> Creating..." % excel total = len(all_data) print(message) if not total: message = '\n--> No data.' print(message) return if len(all_data): length_max = 0 for val in all_data.values(): try: if length_max < len(val): # Find the row which has maximum length if isinstance(val[-1], dict): length_max = len(val) - 1 else: length_max = len(val) except: pass i = 0 while len(headers) < length_max: # if maximum length of any row larger than HEADERS, add "Header %i" rest of the headers i += 1 headers.append('Header %s' % i) for key in list(all_data.keys()): # if length of Headers larger than any row, add empty cell end of the row while len(headers) > len(all_data[key]): all_data[key].append('') if sizes: while len(headers) > len(sizes): sizes.append(20) else: sizes = list() for head in headers: sizes.append(25) if locations: while len(headers) > len(locations): locations.append('left') else: locations = list() for head in headers: locations.append('left') attrs_loc = dict() for val in all_data.values(): for elem in val: if isinstance(elem, dict): for name, attr in elem.items(): if name not in attrs_loc.keys(): headers.append(name) sizes.append(20) locations.append('left') attrs_loc[name] = len(headers) workbook = xlsxwriter.Workbook(excel) worksheet = workbook.add_worksheet(page_name) worksheet.freeze_panes(1, 0) cell_format_header = workbook.add_format({'border': 1}) cell_format_header.set_pattern(1) cell_format_header.set_bg_color('orange') cell_format_header.set_align('center') cell_format_header.set_align('vcenter') cell_format_header.set_bold() cell_format_center_regular = workbook.add_format({'border': 1}) cell_format_center_regular.set_align('center') cell_format_center_regular.set_align('vcenter') cell_format_regular = workbook.add_format({'border': 1}) cell_format_regular.set_align('left') cell_format_regular.set_align('vcenter') cell_format_right_regular = workbook.add_format({'border': 1}) cell_format_right_regular.set_align('right') cell_format_right_regular.set_align('vcenter') cell_format_copyr = workbook.add_format({'border': 1}) cell_format_copyr.set_pattern(1) cell_format_copyr.set_bg_color('FABF8F') cell_format_copyr.set_align('center') cell_format_copyr.set_align('vcenter') cell_format_copyr.set_bold() row = 0 col = 0 set_say = 0 worksheet.write(row, col, 'ID', cell_format_header) worksheet.set_column(set_say, set_say, 8) set_say += 1 col += 1 for head, size in zip(headers, sizes): worksheet.write(row, col, head, cell_format_header) worksheet.set_column(set_say, set_say, size) set_say += 1 col += 1 worksheet.write(row, col, 'Automated by BerkayMizrak.com', cell_format_copyr) worksheet.set_column(set_say, set_say, 34) row += 1 count = 0 now = time.time() time.sleep(0.01) id_count = 0 for val in all_data.values(): id_count += 1 col = 0 worksheet.write(row, col, id_count, cell_format_center_regular) for elem in val: if isinstance(elem, dict): continue col += 1 if col > len(locations): go_left = True else: go_left = False if locations[col - 1] == 'center': worksheet.write(row, col, elem, cell_format_center_regular) elif locations[col - 1] == 'right': worksheet.write(row, col, elem, cell_format_right_regular) else: go_left = True if go_left: try: elem = int(elem) worksheet.write(row, col, elem, cell_format_center_regular) except: worksheet.write(row, col, elem, cell_format_regular) for elem in val: if isinstance(elem, dict): for name, attr in elem.items(): worksheet.write(row, attrs_loc[name], attr, cell_format_regular) row += 1 count += 1 Progress.progress( count=count, total=total, now=now, ) print() workbook.close() message = "'%s' Data Saved to Excel -->> '%s'" % (count, excel) print(message) except PermissionError: message = "--> '%s' can't access to this file.\nIt is probably because the file is open. If this excel is open, please close it and re-run program." % excel Progress.exit_app(message=message, exit_all=exit_all) except Exception as e: message = "--> An error occurred while creating file... '%s'" % excel Progress.exit_app(e=e, message=message, exit_all=exit_all)
def create_word(word, my_rows): try: # Check and add docx if there is not at the end. file_name, file_extension = os.path.splitext(word) if file_extension != '.docx': word = file_name + '.xlsx' message = "'%s' --> Creating..." % word total = len(my_rows) print(message) if not total: message = '\n--> No data.' print(message) return document = Document() """ EXTRA DETAILS FOR DESIGN: document.add_heading('Document Title', 0) p = document.add_paragraph('A plain paragraph having some ') p.add_run('bold').bold = True p.add_run(' and some ') p.add_run('italic.').italic = True document.add_heading('Heading, level 1', level=1) document.add_picture('monty-truth.png', width=Inches(1.25)) records = ( (3, '101', 'Spam'), (7, '422', 'Eggs'), (4, '631', 'Spam, spam, eggs, and spam') ) document.add_page_break() """ styles = document.styles style = document.styles['Normal'] font = style.font font.name = 'Arial' font.size = Pt(12) style = styles.add_style('MyHeader1', WD_STYLE_TYPE.PARAGRAPH) font = style.font font.bold = True font.name = 'Arial' font.size = Pt(14) style = styles.add_style('MyHeader2', WD_STYLE_TYPE.PARAGRAPH) font = style.font font.name = 'Arial' font.size = Pt(13) for row in my_rows: table = row.get('table', False) if not table: text = row.get('text', '') style_name = row.get('style', None) location = row.get('location', 0) bold = row.get('bold', False) italic = row.get('italic', False) underline = row.get('underline', False) size = row.get('size', 12) paragraph = document.add_paragraph() run = paragraph.add_run(text) if style_name: style = document.styles[style_name] paragraph.style = style else: run.bold = bold run.italic = italic run.underline = underline font = style.font font.size = Pt(size) paragraph.alignment = location # 0: left, 1: center, 2: right, 3: justify else: data = row.get('data', []) border = row.get('border', False) if len(data): table_obj = document.add_table( rows=0, cols=len(data[0]), ) if border: table_obj.style = 'TableGrid' for count_row, table_row in enumerate(data): row_cells = table_obj.add_row().cells for enum, cell in enumerate(table_row): row_cells[enum].text = str(cell) if count_row == 0: for count_cell in range(len(table_row)): row_cells[count_cell].paragraphs[0].runs[ 0].font.bold = True row_cells[count_cell].paragraphs[ 0].alignment = 1 print() document.save(word) message = "Word Created -->> '%s'" % (word) print(message) except PermissionError: message = "--> '%s' can't access to this file.\nIt is probably because the file is open. If this word is open, please close it and re-run program." % word Progress.exit_app(message=message, exit_all=False) except Exception as e: message = "--> An error occurred while creating file... '%s'" % word Progress.exit_app(e=e, message=message, exit_all=False)