def get_global_params(present_website: str): """ 获取全局参数 Args: present_website:网站名 """ try: # r = requests.get(url=g_var.INTERFACE_HOST + "/v1/get/config/") headers = { 'Connection': 'close', } requests.adapters.DEFAULT_RETRIES = g_var.DEFAULT_RETRIES url = g_var.INTERFACE_HOST + "/v1/get/config/?UUID=" + g_var.UUID print(url) with requests.get(url=g_var.INTERFACE_HOST + "/v1/get/config/?UUID=" + g_var.UUID, headers=headers, timeout=g_var.TIMEOUT) as r: if int(r.status_code / 100) == 4: g_var.logger.error("【3001】 获取不到参数", r.status_code) sys.exit(0) global_config = json.loads(r.text) sql_host = global_config["sql_host"] sql_port = int(global_config["sql_port"]) sql_user = global_config["sql_user"] sql_pass = global_config["sql_pass"] sql_database = global_config["sql_database"] g_var.SQL_CONFIG = { "host": sql_host, "port": sql_port, "user": sql_user, "pass": sql_pass, "database": sql_database } g_var.TIMEOUT = global_config["timeout"] g_var.ERR_COUNT = global_config["err_count"] g_var.PROXY_ERR_MAX = global_config["proxy_err_count"] g_var.SEND_STATUS_INTERVAL = global_config["send_status"] g_var.VERIFY_URL1 = global_config["verify_url1"] g_var.VERIFY_KEY1 = global_config["verify_key1"] g_var.VERIFY_URL2 = global_config["verify_url2"] g_var.VERIFY_KEY2 = global_config["verify_key2"] g_var.EMAIL_INTERVAL_TIME = global_config["email_interval_time"] g_var.EMAIL_TIME = global_config["email_time"] g_var.CPU_MAX = global_config["cpu_max"] g_var.RAM_MAX = global_config["ram_max"] g_var.THREAD_COUNT = global_config["thread_count"] g_var.CAPTCHA_ERR_MAX = global_config["verify_err_count"] g_var.RETRY_COUNT_MAX = g_var.ERR_COUNT g_var.REMAIN_TASK_COUNT = g_var.ALL_COUNT MysqlHandler().startDB() # MysqlHandler() = MysqlHandler() # 实例化一个mysql_handler对象 #g_var.logger = Logger(g_var.ENV_DIR + '/logs/' + present_website + '_' + g_var.UUID + '.log', # level='info').logger # 实例化一个logger对象 except: # 若程序启动没有获取到配置,则直接停止运行 g_var.logger.error("【3001】 获取不到参数") exit()
def __register_one(self): g_var.logger.info("register。。。") # 获取authenticity_token、_csrf_token、_rapgenius_session值 authenticity_token, csrf_token, rapgenius_session = get_authenticity_token_signup( ) if authenticity_token == 0: return -1 elif authenticity_token == -1: return -2 # 获取headers headers = generate_headers(0, csrf_token, rapgenius_session) if headers == -1: g_var.logger.info("获取headers失败。。。") return -2 # 注册数据 registerData = generate_register_data(authenticity_token) url_register = 'https://genius.com/account' g_var.logger.info("提交注册中。。。") html = requestsW.post(url_register, proxies=ip_proxy("ch"), data=registerData, headers=headers, timeout=g_var.TIMEOUT, vpn='ch') if html == -1: return html # 注册成功验证 user_id_list = re.findall('CURRENT_USER = {"id":(.*?),"login":'******'_rapgenius_session=(.*?);', html.headers['Set-Cookie']) # 插入数据库 try: sql = "INSERT INTO genius_com(username, password, mail, user_id) VALUES('" + registerData['user[login]'] + \ "', '" + registerData['user[password]'] + "', '" + registerData['user[email]'] + "', '" + user_id_list[0] +"');" last_row_id = MysqlHandler().insert(sql) if last_row_id != -1: registerData["id"] = last_row_id registerData["user_id"] = user_id_list[0] registerData["name"] = registerData['user[login]'] registerData["rapgenius_session"] = session_list[0] return registerData else: g_var.ERR_CODE = 2004 g_var.ERR_MSG = "数据库插入用户注册数据失败..." g_var.logger.error("数据库插入用户注册数据失败...") return 0 except Exception as e: g_var.logger.info(e) g_var.ERR_CODE = 2004 g_var.ERR_MSG = "数据库插入用户注册数据出现异常..." g_var.logger.error("数据库插入用户注册数据出现异常...") return 0
def generate_login_data(): # 获取登录数据 # 定义一个id的全局变量,初始值为-1,如果为-1,就去读一下config.json,获取id值。之后所有登录都是对这个id操作,而不用再去读config.json if g_var.USER_ID == -1: # 如果g_var.USER_ID == -1,就让第一个线程去config.json中读取id值到全局变量g_var.USER_ID中 if g_var.login_data_config_lock.acquire(): if g_var.USER_ID == -1: with open(g_var.ENV_DIR + '/genius_com/config.json', encoding='utf-8') as f: data = json.load(f) g_var.USER_ID = data["currentId"] g_var.login_data_config_lock.release() else: pass # 从全局变量g_var.USER_ID获取上一个被使用的id,并用这个id去数据库取下一个可用id,在最后主线程结束时,将g_var.USER_ID保存到config.json中 if g_var.login_data_g_var_lock.acquire(): sql = "SELECT * FROM genius_com AS g WHERE g.`id` > " + str( g_var.USER_ID) + " and g.`status` = 0 ORDER BY g.`id` LIMIT 0, 1;" userInfo = MysqlHandler().select(sql) g_var.logger.info("logindata:" + str(userInfo)) # 如果userInfo == None,再从头开始取数据 if userInfo == None: g_var.USER_ID = 0 sql = "SELECT * FROM genius_com AS g WHERE g.`id` > " + str( g_var.USER_ID ) + " and g.`status` = 0 ORDER BY g.`id` LIMIT 0, 1;" userInfo = MysqlHandler().select(sql) g_var.logger.info(userInfo) # 如果再次取还是为空,则说明数据库中没有可用账号 if userInfo == None: g_var.logger.error("当前数据库账号池为空,或所有账号状态异常") g_var.ERR_CODE = 2003 g_var.ERR_MSG = g_var.ERR_MSG + "|_|" + "数据库中没有可用用户,请先注册后再启动本程序!" # 数据库中没有可用用户,则停止程序 g_var.SPIDER_STATUS = 3 # 停止定时发送状态线程 else: g_var.USER_ID = userInfo[0] else: g_var.USER_ID = userInfo[0] g_var.login_data_g_var_lock.release() return userInfo
def __register_one(self, Session, present_website: str): # user,pass, try: headers = {} headers['Connection'] = 'close' headers['user_agent'] = project_util.get_user_agent() res = Session.get("https://www.reddit.com/register/?actionSource=header_signup",proxies=Session.proxies, headers=headers, timeout=g_var.TIMEOUT) re_res = re.search('<input type="hidden" name="csrf_token" value="(.*?)">', res.text) if re_res.group(): csrf_token = re_res.group(1) else: g_var.ERR_CODE = "2001" g_var.ERR_MSG = g_var.ERR_MSG + "|_|没有获取到token" g_var.logger.error("没有获取到token") return -1 google_code = project_util.google_captcha(requests.session(), "6LeTnxkTAAAAAN9QEuDZRpn90WwKk_R1TRW_g-JC", "https://www.reddit.com") if len(google_code) < 5: g_var.ERR_CODE = "2010" g_var.ERR_MSG = g_var.ERR_MSG + "|_|" g_var.logger.error("没有获取到谷歌验证码") user = project_util.generate_random_string(6, 12) pwd = project_util.generate_random_string(10, 16) email = user + "@hotmail.com" data = { "csrf_token": csrf_token, "g-recaptcha-response": google_code, "dest": "https://www.reddit.com", "password": pwd, "username": user, "email": email, } # headers["content-type"]="application/x-www-form-urlencoded" res = Session.post("https://www.reddit.com/register", headers=headers,proxies=Session.proxies, data=data, timeout=g_var.TIMEOUT) # if res.json():#成功结果:{"dest": "https://www.reddit.com"} if self.__dictExistValue(res.json(), "dest"): self.captcha_err_count = 0 sql = "INSERT INTO reddit_com(username, password, mail, status) VALUES('" + user + \ "', '" + pwd + "', '" + email + "', '" + str(0) + "');" print("正在进入sql:", sql) last_row_id = MysqlHandler().insert(sql) if last_row_id != -1: userId = last_row_id return {'user_id': userId, 'name': user, 'password': pwd, 'mail': email} else: g_var.logger.error("数据库插入失败") return -1 else: g_var.logger.info("验证码错误或邮箱名重复!result:", res.text) self.captcha_err_count = self.captcha_err_count + 1 return -1 except Exception as e: g_var.ERR_CODE = "2100" g_var.ERR_MSG = g_var.ERR_MSG + "|_|"+"ip出现问题 请求失败" g_var.logger.info("未知错误:", e) return -1
def __register_one(self, Session, present_website): g_var.logger.info("register...") headers = generate_headers(0) if headers == -1: g_var.logger.info("获取注册headers失败...") return -1 csrf_token = get_csrf(Session, headers) if csrf_token == -1: return -1 elif csrf_token == -2: return -2 del headers["Host"] del headers["Accept"] headers["accept"] = "application/json" headers["x-csrf-token"] = csrf_token registerData = generate_register_data() if registerData == -2: g_var.logger.info("未生成正确注册数据...") return -2 url_register = 'https://www.indiegogo.com/accounts.json' g_var.logger.info("提交注册中...") html = Session.post(url_register, json=registerData, headers=headers, timeout=g_var.TIMEOUT) if html == -1: return html # 注册成功与否验证 if html.status_code not in [200, 201]: g_var.logger.info(html.status_code) return -2 try: cookie = str(html.cookies.get_dict()) success_user = json.loads(html.content) sql = "INSERT INTO indiegogo_com(id, username, password, mail) VALUES('" + str(success_user['account']['id']) + \ "', '" + success_user['account']['email'] + "', '" + registerData['account']['password'] + "', '" + success_user['account']['email'] + "');" last_row_id = MysqlHandler().insert(sql) userData = {} if last_row_id != -1: userData["id"] = last_row_id userData["firstname"] = success_user['account']['first_name'] userData["lastname"] = success_user['account']['last_name'] userData["cookie"] = cookie return userData else: g_var.ERR_CODE = 2004 g_var.ERR_MSG = "数据库插入失败..." g_var.logger.error("数据库插入失败") return 0 except Exception as e: g_var.logger.info(e) g_var.ERR_CODE = 2004 g_var.ERR_MSG = "数据库插入失败..." g_var.logger.error("数据库插入失败") return 0
def __login(self, VPN, userInfo): # 使用账号密码登录 user_id = userInfo[0] username = userInfo[1] password = userInfo[2] loginData = { 'username': userInfo[1], 'password': userInfo[2], } retry_count = 0 while retry_count < g_var.RETRY_COUNT_MAX: retry_count = retry_count + 1 url_login = '******' try: g_var.logger.info("使用账号密码登录...") headers = generate_headers(0) if headers == -1: return -1 html = Session.post(url_login, headers=headers, data=loginData, timeout=g_var.TIMEOUT) self.proxy_err_count = 0 break except: g_var.logger.error("账号密码登录超时") self.proxy_err_count = self.proxy_err_count + 1 time.sleep(g_var.SLEEP_TIME) proxies = ip_proxy(VPN) Session.proxies = proxies continue if retry_count == g_var.RETRY_COUNT_MAX: g_var.SPIDER_STATUS = 3 g_var.logger.error("连续登录失败!程序停止") return -1 if html.status_code != 200: # 如果登录失败将数据库中的status改为异常 sql = "UPDATE wattpad_com SET status=1 WHERE id=" + str( user_id) + ";" MysqlHandler().update(sql) return 1 # 账号异常,重新取号登录 token_list = re.findall('token=(.*?);', html.headers['Set-Cookie']) # 如果登录成功,则返回token_list和username给下一步发新文章 loginSuccessData = { 'id': user_id, 'username': loginData['username'], 'token': token_list[0], } return loginSuccessData
def __postMessage(self, loginData): # 获取headers headers = generate_headers(2, loginData=loginData) if headers == -1: g_var.logger.info("获取headers失败。。。") return -1 data = generate_new_link_data() g_var.logger.info(data) if data == -1: # 获取不到链接,程序停止 g_var.SPIDER_STATUS = 3 return -1 url_postLink = 'https://genius.com/api/users/' + str( loginData['user_id']) + '?text_format=html,markdown' g_var.logger.info("发送链接中...") res = requestsW.put(url_postLink, proxies=ip_proxy("ch"), headers=headers, json=data, timeout=g_var.TIMEOUT, vpn='ch') if res == -1: return res if res.status_code == 200: g_var.logger.info("链接发送成功!" + loginData["name"]) # 将链接、用户id存入article表 url = 'https://genius.com/' + loginData["name"] sql = "INSERT INTO genius_com_article(url, user_id) VALUES('" + url + "', '" + str( loginData['id']) + "');" if g_var.insert_article_lock.acquire(): last_row_id = MysqlHandler().insert(sql) g_var.insert_article_lock.release() if last_row_id != -1: g_var.logger.info("insert article OK") else: g_var.logger.error("数据库插入连接数据错误!") return 0 return loginData else: g_var.logger.error("链接发送失败!" + str(res.status_code)) g_var.ERR_CODE = 5000 g_var.ERR_MSG = g_var.ERR_MSG + "|_|" + "链接发送失败,未知错误!" return 0
def __postMessage(self, Session, loginData, personalData, VPN): g_var.logger.info("send link...") loginData['token'] = personalData headers = generate_headers(3, loginData) if headers == -1: g_var.logger.info("获取登录headers失败...") return -1 put_data = get_put_data(loginData) if put_data == -2: g_var.logger.info("获取链接数据失败...") return -2 g_var.logger.info("正在发送个人链接...") url_sendLink = 'https://www.indiegogo.com/private_api/profiles/' + str( loginData['id']) html = Session.put(url_sendLink, json=put_data, cookies=eval(loginData['cookie']), headers=headers, timeout=g_var.TIMEOUT) if html == -1: return -1 if html.status_code == 200: g_var.logger.info("链接发送成功!" + loginData["firstname"]) # 将链接、用户存入indiegogo_com_article表 url = 'https://www.indiegogo.com/individuals/' + str( loginData['id']) sql = "INSERT INTO indiegogo_com_article(url, user_id) VALUES('" + url + "', '" + str( loginData['id']) + "');" if g_var.insert_article_lock.acquire(): last_row_id = MysqlHandler().insert(sql) g_var.insert_article_lock.release() if last_row_id != -1: g_var.logger.info("insert article OK") else: g_var.logger.error("数据库插入链接错误!") return 0 return loginData else: g_var.logger.error("链接发送失败!..." + str(html.status_code)) g_var.ERR_CODE = 5000 g_var.ERR_MSG = g_var.ERR_MSG + "|_|" + "链接发送失败,未知错误!" return 0
def __login(self, Session, VPN, userInfo): g_var.logger.info("login...") headers = generate_headers(1) if headers == -1: g_var.logger.info("获取登录headers失败...") return -1 csrf_token = get_csrf(Session) if csrf_token == -1: g_var.logger.info("获取x-csrf-token失败...") return -1 headers["x-csrf-token"] = csrf_token user_id = userInfo[0] data_user = { "account": { 'email': userInfo[4], 'password': userInfo[3], } } g_var.logger.info("使用账号密码登录...") html = Session.post(url_login, headers=headers, json=data_user, timeout=g_var.TIMEOUT) if html == -1: return -1 if 'account' not in json.loads(html.text).keys(): # 如果登录失败将数据库中的status改为异常 sql = "UPDATE indiegogo_com SET status=1 WHERE id=" + str( user_id) + ";" MysqlHandler().update(sql) g_var.ERR_CODE = 2003 g_var.ERR_MSG = "用户无法使用..." return 1 # 账号异常,重新取号登录 cookie = str(html.cookies.get_dict()) g_var.logger.info(cookie) loginSuccessData = { 'id': user_id, 'firstname': userInfo[1], 'lastname': userInfo[2], 'cookie': cookie, } return loginSuccessData
def __postMessage(self, loginData): headers = generate_headers(1, loginData) if headers == -1: return -1 link = get_new_link() if link == -1: # 获取不到链接,程序停止 g_var.SPIDER_STATUS = 3 return -1 url_putLink = 'https://www.wattpad.com/api/v3/users/' + loginData[ 'username'] linkData = { 'website': link, } g_var.logger.info("发送链接中...") html = requestsW.put(url_putLink, proxies=ip_proxy("en"), headers=headers, json=linkData, timeout=g_var.TIMEOUT) if html == -1: return html if html.status_code == 200: g_var.logger.info("链接发送成功!" + loginData["username"]) url = 'https://www.wattpad.com/user/' + loginData["username"] # 将链接、用户存入wattpad_com_article表 sql = "INSERT INTO wattpad_com_article(url, user_id) VALUES('" + url + "', '" + str( loginData['id']) + "');" if g_var.insert_article_lock.acquire(): last_row_id = MysqlHandler().insert(sql) g_var.insert_article_lock.release() if last_row_id != -1: g_var.logger.info("insert article OK") else: g_var.logger.error("数据库插入链接错误!") return 0 return linkData else: g_var.logger.error("链接发送失败!\n" + html.status_code) g_var.ERR_CODE = 5000 g_var.ERR_MSG = g_var.ERR_MSG + "|_|" + "链接发送失败,未知错误!" return 0
def login(self, Session, present_website: str, VPN, userInfo): """ 登录 根据用户信息userInfo中是否包含cookie 1、有cookie直接构造loginData返回,跳过登录流程 2、没有cookie,需要post登录请求,获取到cookie,再构造loginData返回 Args: Session:Session对象 present_website:当前网站名,用于数据库表名 VPN:使用国内or国外代理 userInfo:用户信息 userInfo[0]:id [1]:username [2]passwod [3]:emial [4]:status [5]cookie Mysql Update: # 如果cookie失效,将该cookie从数据库中清除,并重新从数据库中获取登录账号密码 sql = "UPDATE 网站名 SET cookie='' WHERE id=" + str(loginData['id']) + ";" status = MysqlHandler().update(sql) if status == 0: g_var.logger.info("cookie失效,清除cookie update OK") return {"error": -2} else: g_var.logger.error("数据库清除cookie错误!") return {"error": 1} Returns: 成功返回loginData loginData = { 'id': user_id, 'username': username, 'password': password, 'cookie': cookie, } 失败返回状态值: 1:表示账号密码失效,密码被改或账号被网站删除 -1:表示requests请求页面失败,需要更换代理 -2:页面发生改变,获取不到页面上的一些token值 -3:数据库插入更新等错误 """ user_id = userInfo[0] username = userInfo[1] password = userInfo[2] if userInfo[5] != None and userInfo[5] != "": # userInfo[5]保存cookie值,如果cookie不为空,则使用cookie g_var.logger.info("返回cookie" + userInfo[5]) cookie = userInfo[5] loginData = { 'id': user_id, 'username': username, 'password': password, 'cookie': str(cookie), } return loginData else: google_captchas = google_captcha( "", "6Ld23sMSAAAAALfyXkI9d0nHmzOH9jZZNuh66nql", "https://www.diigo.com/sign-in?referInfo=https%3A%2F%2Fwww.diigo.com" ) if google_captchas == -1: return "谷歌打码失败" i = 0 while i < g_var.ERR_COUNT: i += 1 try: Session.proxies = ip_proxy() res = requests.get( "https://www.diigo.com/sign-in?referInfo=https%3A%2F%2Fwww.diigo.com", headers=self.headers, proxies=Session.proxies) login_token = re.search( 'name="loginToken" value="(\w{32})"', res.text) if login_token: login_token = login_token.group(1) print(login_token) else: return "为获取登陆cookie" cookies = res.cookies.get_dict() if res == -1: return res data = { "referInfo": "https://www.diigo.com", "loginToken": login_token, "username": username, "password": password, "g-recaptcha-response": google_captchas, "recaptcha": "v2", } self.headers["X-Requested-With"] = "XMLHttpRequest" self.headers[ "Referer"] = "https://www.diigo.com/sign-in?referInfo=https%3A%2F%2Fwww.diigo.com" self.headers[ "Content-Type"] = "application/x-www-form-urlencoded; charset=UTF-8" g_var.logger.info("正在登录中") res = requests.post("https://www.diigo.com/sign-in", headers=self.headers, data=data, cookies=cookies, proxies=Session.proxies) g_var.logger.info("登录结束") g_var.logger.info(res.text) if not '"status":1' in res.text: return -2 else: break except Exception as e: g_var.logger.info("正在换ip" + str(e)) save_cookies = str(res.cookies.get_dict()) if "diigoandlogincookie" in save_cookies: sql = "UPDATE %s SET cookie=\"%s\" WHERE id=%s ;" % ( present_website, save_cookies, user_id) g_var.logger.info(sql) status = MysqlHandler().update(sql) if status == 0: g_var.logger.info("cookie失效,清除cookie update OK") return { 'id': user_id, 'username': username, 'password': password, 'cookie': save_cookies, } else: g_var.logger.error("数据库清除cookie错误!") return {"error": 1} else: return -1 pass
def registerAndSendProfile(self, present_website, VPN): while self.success_count < self.assignment_num: # 每次循环检测当前错误状态 if self.__monitor_status() == -1: break self.now_count = self.now_count + 1 retry_count = 0 register_signal = 0 while retry_count < g_var.RETRY_COUNT_MAX: retry_count = retry_count + 1 userData = self.__register_one(present_website) if userData == 0: g_var.logger.info("注册成功,但数据库存储失败!") self.failed_count = self.failed_count + 1 register_signal = 1 continue elif userData == -1: g_var.ERR_MSG = g_var.ERR_MSG + "|_|代理连续错误!" g_var.logger.info("代理错误!") retry_count = g_var.RETRY_COUNT_MAX elif userData == -2: g_var.logger.info("注册失败,可能密码不符合要求等原因!") self.failed_count = self.failed_count + 1 continue else: # 注册成功 self.failed_count = 0 break time.sleep(g_var.SLEEP_TIME) if retry_count == g_var.RETRY_COUNT_MAX: # 连续出错说明发生了一些问题,需要停止程序 g_var.SPIDER_STATUS = 3 g_var.ERR_MSG = g_var.ERR_MSG + "|_|连续注册出错,程序停止" g_var.logger.error("连续注册失败!程序停止") break if register_signal == 1: continue # 2、登录 login_signal = 0 retry_count = 0 while retry_count < g_var.RETRY_COUNT_MAX: retry_count = retry_count + 1 loginData = self.login(present_website, VPN, userData) if loginData == 1: # 返回1表示登录失败,将数据库中的status改为异常 g_var.logger.info('使用当前账号密码登录失败。。。') sql = "UPDATE" + present_website + "SET status=1 WHERE id=" + str( userInfo[0]) + ";" status = MysqlHandler().update(sql) if status == -1: return -1 self.failed_count = self.failed_count + 1 login_signal = 1 break elif loginData == 0: self.failed_count = self.failed_count + 1 login_signal = 1 break elif loginData == -1: # 代理问题,更换代理 g_var.ERR_MSG = g_var.ERR_MSG + "|_|代理连续错误" g_var.logger.info("代理错误") retry_count = g_var.RETRY_COUNT_MAX elif loginData == -2: g_var.logger.info("登录失败,但可以使用此账户继续尝试,不跳出循环") self.failed_count = self.failed_count + 1 continue else: self.failed_count = 0 break if retry_count == g_var.RETRY_COUNT_MAX: # 连续出错说明发生了一些问题,需要停止程序 g_var.SPIDER_STATUS = 3 g_var.ERR_MSG = g_var.ERR_MSG + "|_|连续登录出错,程序停止" g_var.logger.error("login:连续登录失败!程序停止") break if login_signal == 1: continue # 2、发个人简介 retry_count = 0 while retry_count < g_var.RETRY_COUNT_MAX: time.sleep(g_var.SLEEP_TIME) retry_count = retry_count + 1 status = self.__send_profile(loginData) if status == 0: # 发个人简介成功 self.success_count = self.success_count + 1 self.failed_count = 0 break elif status == -1: # 代理问题,更换代理 g_var.ERR_MSG = g_var.ERR_MSG + "|_|代理连续错误" g_var.logger.info("代理错误") retry_count = g_var.RETRY_COUNT_MAX elif status == -2: self.failed_count = self.failed_count + 1 break elif status == 1: g_var.logger.info( "获取authenticity_token和session_id值失败;链接发布成功,但数据存储失败!") self.failed_count = self.failed_count + 1 continue if retry_count == g_var.RETRY_COUNT_MAX: # 连续出错说明发生了一些问题,需要停止程序 g_var.SPIDER_STATUS = 3 g_var.ERR_MSG = g_var.ERR_MSG + "|_|连续发链接出错,程序停止" g_var.logger.error("连续发链接出错,程序停止") break g_var.logger.info("成功发送" + str(self.success_count) + "则个人简介。")
def __login(self, Session, VPN, userInfo): g_var.logger.info('login。。。。。。') # 获取authenticity_token、_csrf_token、_rapgenius_session值 authenticity_token, csrf_token, rapgenius_session = get_authenticity_token_login( Session) if authenticity_token == -1: g_var.logger.info( '登陆账号前未获取到authenticity_token值或_csrf_token值或_rapgenius_session值。。。' ) return -1 # 获取headers headers = generate_headers(1, csrf_token, rapgenius_session) if headers == -1: g_var.logger.info("获取headers失败。。。") return -1 username = userInfo[1] password = userInfo[2] loginData = { 'authenticity_token': authenticity_token, 'user_session[login]': userInfo[1], 'user_session[password]': userInfo[2], 'user_session[remember_me]': '0', 'user_session[remember_me]': '1', } retry_count = 0 while retry_count < g_var.RETRY_COUNT_MAX: retry_count = retry_count + 1 url_login = '******' try: g_var.logger.info("使用账号密码登录...") html = Session.post(url=url_login, headers=headers, data=loginData, timeout=g_var.TIMEOUT) break except Exception as e: g_var.logger.error(e) g_var.logger.error("账号密码登录超时") g_var.ERR_CODE = "5000" g_var.ERR_MSG = "登录出错|_|" + str(e) continue if retry_count == g_var.RETRY_COUNT_MAX: g_var.SPIDER_STATUS = 3 g_var.logger.error("连续登录失败!程序停止") return -1 prove = "Looks like the site is more popular than we thought! We're going to send you on your way in just a sec." if prove in html.text: g_var.ERR_CODE = "2001" g_var.ERR_MSG = '代理异常。。。' g_var.logger.info("使用账号密码登录被识别为机器人登录,需要更换代理...") return -1 user_id_list = re.findall('CURRENT_USER = {"id":(.*?),"login":'******'_rapgenius_session=(.*?);', html.headers['Set-Cookie']) g_var.logger.info(session_list) if not session_list: # 如果登录失败将数据库中的status改为异常 sql = "UPDATE genius_com SET status=1 WHERE id=" + str(id) + ";" MysqlHandler().update(sql) return 1 # 账号异常,重新取号登录 # 返回使用账号密码登录的loginData loginSuccessData = { 'id': userInfo[0], 'user_id': user_id_list[0], 'name': loginData['user_session[login]'], 'authenticity_token': authenticity_token, 'rapgenius_session': session_list[0] } return loginSuccessData
def __postMessage(self, Session, loginData: dict) -> dict: # 根据loginData的长度,长度为2表示账号密码登录,长度为3表示cookie登录 # loginData = { # 'id': user_id, # 'name': username, # 'cookie': Cookie # } # try: headers = {"user-agent": project_util.get_user_agent(), "content-type": "application/json; charset=UTF-8"} article = project_util.get_new_article() articleR = self.__article_sumbit(article[1]) if article == {"error": -1}: # 获取不到文章,程序停止 g_var.SPIDER_STATUS = 3 return {"error": -1} else: if len(loginData) == 2: print("11111111111",loginData) res_accessToken = Session.get("https://www.reddit.com/user/%s/submit" % loginData["name"], headers=headers,proxies=Session.proxies, timeout=g_var.TIMEOUT) else: res_accessToken = Session.get("https://www.reddit.com/user/%s/submit" % loginData["name"], cookies=eval(loginData["cookie"]), headers=headers,proxies=Session.proxies, timeout=g_var.TIMEOUT) re_res = re.search('{"accessToken":"(.{18,64})",', res_accessToken.text) if re_res.group(): accessToken = re_res.group(1) else: g_var.logger.error("发送文章token 错误") return {"error": -1} contentData = {"sr": "u_" + loginData["name"], "api_type": "json", "show_error_list": "true", "title": "woaini" + article[0], "spoiler": "true", "nsfw": "false", "kind": "self", "original_content": "true", "submit_type": "profile", "post_to_twitter": "false", "sendreplies": "true", "richtext_json": articleR, # "text":articleList[1], "validate_on_submit": "true"} headers["content-type"] = "application/x-www-form-urlencoded" headers["authorization"] = "Bearer " + accessToken if len(loginData) == 2: res = Session.post( "https://oauth.reddit.com/api/submit?resubmit=true&redditWebClient=desktop2x&app=desktop2x-client-production&rtj=only&raw_json=1&gilding_detail=1" % loginData["name"], data=contentData,proxies=Session.proxies, headers=headers, timeout=g_var.TIMEOUT) else: res = Session.post("https://oauth.reddit.com/api/submit?resubmit=true&redditWebClient=desktop2x&app=desktop2x-client-production&rtj=only&raw_json=1&gilding_detail=1", data=contentData,proxies=Session.proxies, cookies=eval(loginData["cookie"]), headers=headers, timeout=g_var.TIMEOUT) if self.__dictExistValue(res.json(), "json", "data", "url"): resultUrl = res.json()["json"]["data"]["url"] # 将文章链接、标题、用户存入article表 sql = "INSERT INTO reddit_com_article(url, keyword, user_id) VALUES('" + resultUrl + "', '" + article[ 0] + "', '" + str(loginData['id']) + "');" if g_var.insert_article_lock.acquire(): last_row_id = MysqlHandler().insert(sql) g_var.insert_article_lock.release() # status = MysqlHandler().update(sql) return {"ok": 0} else: g_var.logger.error("文章发送失败!\n" + res.text) g_var.ERR_CODE = 5000 g_var.ERR_MSG = g_var.ERR_MSG + "|_|" + "文章发送失败,未知错误!" return {"error": 1} except Exception as e: g_var.ERR_CODE = "2100" g_var.ERR_MSG = g_var.ERR_MSG + "|_|"+"ip出现问题 请求失败" g_var.logger.error("发送文章错误!\n" ,e) return {"error": 1}
def __send_profile(self, userData): """ 发个人简介 Args: Session:Session对象 loginData:用户信息,包括user_id,username,password,email,cookie Returns: 成功返回:0 失败返回状态值: 1:数据库存储失败 -1:连续代理错误或页面发生改变等取不到关键数据等,需要停止程序 -2:本次出错,继续循环 """ g_var.logger.info('send profile......') headers = generate_headers(1) if headers == -1: return -1 g_var.logger.info('authenticity_token, session_id...') authenticity_token, session_id = get_authenticity_token(userData) if authenticity_token == -1 or session_id == -1: return -1 elif authenticity_token == -2 or session_id == -2: return 1 headers['Referer'] = 'https://www.sbnation.com/users/' + userData[ 'username'] + '/edit_profile' headers['Cookie'] = '_session_id=' + session_id titleLink = get_new_title_and_link() if titleLink == -1: return -1 multipart_encoder = MultipartEncoder( fields={ 'utf8': '✓', '_method': 'patch', 'authenticity_token': authenticity_token, 'profile_image[filename]': ('', '', 'application/octet-stream'), 'profile_image[filename_cache]': '', 'network_membership[bio]': '', 'network_membership[signature]': '', 'network_membership[public_email]': '', 'network_membership[website_name]': titleLink[0], 'network_membership[website_url]': titleLink[1], 'network_membership[facebook_page_url]': '', 'network_membership[facebook_page_url]': '', 'network_membership[network_membership_items_attributes][0][key]': 'MLB', 'network_membership[network_membership_items_attributes][0][value]': '', 'network_membership[network_membership_items_attributes][1][key]': 'NFL', 'network_membership[network_membership_items_attributes][1][value]': '', 'network_membership[network_membership_items_attributes][2][key]': 'NBA', 'network_membership[network_membership_items_attributes][2][value]': '', 'network_membership[network_membership_items_attributes][3][key]': 'NHL', 'network_membership[network_membership_items_attributes][3][value]': '', 'network_membership[network_membership_items_attributes][4][key]': 'NCAAF', 'network_membership[network_membership_items_attributes][4][value]': '', 'network_membership[network_membership_items_attributes][5][key]': 'NCAAB', 'network_membership[network_membership_items_attributes][5][value]': '', 'network_membership[network_membership_items_attributes][6][key]': 'MMA', 'network_membership[network_membership_items_attributes][6][value]': '', 'network_membership[network_membership_items_attributes][7][key]': 'Golf', 'network_membership[network_membership_items_attributes][7][value]': '', 'network_membership[network_membership_items_attributes][8][key]': 'NASCAR', 'network_membership[network_membership_items_attributes][8][value]': '', 'network_membership[network_membership_items_attributes][9][key]': 'Boxing', 'network_membership[network_membership_items_attributes][9][value]': '', 'network_membership[network_membership_items_attributes][10][key]': 'Soccer', 'network_membership[network_membership_items_attributes][10][value]': '', 'network_membership[network_membership_items_attributes][11][key]': 'MLS', 'network_membership[network_membership_items_attributes][11][value]': '', 'network_membership[network_membership_items_attributes][12][key]': 'EPL', 'network_membership[network_membership_items_attributes][12][value]': '', 'network_membership[network_membership_items_attributes][13][key]': 'Football League Championship', 'network_membership[network_membership_items_attributes][13][value]': '', 'network_membership[network_membership_items_attributes][14][key]': 'FIFA', 'network_membership[network_membership_items_attributes][14][value]': '', 'network_membership[network_membership_items_attributes][15][key]': 'Bundesliga', 'network_membership[network_membership_items_attributes][15][value]': '', 'network_membership[network_membership_items_attributes][16][key]': 'Serie A', 'network_membership[network_membership_items_attributes][16][value]': '', 'network_membership[network_membership_items_attributes][17][key]': 'La Liga', 'network_membership[network_membership_items_attributes][17][value]': '', 'network_membership[network_membership_items_attributes][18][key]': 'Cycling', 'network_membership[network_membership_items_attributes][18][value]': '', 'network_membership[network_membership_items_attributes][19][key]': 'Tennis', 'network_membership[network_membership_items_attributes][19][value]': '', 'network_membership[network_membership_items_attributes][20][key]': 'General', 'network_membership[network_membership_items_attributes][20][value]': '', 'commit': 'Update', }, boundary='----WebKitFormBoundary' + generate_random_string( 16, 16, 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ'), ) headers['Content-Type'] = multipart_encoder.content_type g_var.logger.info("发布个人简介的链接...") url_link = 'https://www.sbnation.com/users/' + userData[ 'username'] + '/update_profile' html = requestsW.post(url_link, proxies=ip_proxy("en"), data=multipart_encoder, headers=headers, timeout=g_var.TIMEOUT) if html == -1: return html if html.status_code != 200: g_var.logger.info('链接发布失败。。。') g_var.logger.info(html.text) return -2 try: url = 'https://www.sbnation.com/users/' + userData['username'] sql = "INSERT INTO sbnation_com_article(url, user_id) VALUES('" + url + "', '" + str( userData["id"]) + "');" last_row_id = MysqlHandler().insert(sql) g_var.logger.info(last_row_id) if last_row_id != -1: g_var.logger.info('链接发送成功!' + userData['username']) return 0 else: g_var.ERR_CODE = 2004 g_var.ERR_MSG = "数据库插入用户注册数据失败..." g_var.logger.error("数据库插入用户注册数据失败...") return 1 except Exception as e: g_var.logger.info(e) g_var.ERR_CODE = 2004 g_var.ERR_MSG = "数据库插入用户注册数据异常..." g_var.logger.error("数据库插入用户注册数据异常...") return 1
get_global_params(present_website) # 检查cpu和内存状态 while psutil.virtual_memory( ).percent > g_var.RAM_MAX or psutil.cpu_percent(None) > g_var.CPU_MAX: g_var.logger.info("cpu或内存不足,挂起" + str(g_var.SEND_STATUS_INTERVAL) + "s") g_var.SPIDER_STATUS = 1 close_signal = send_spider_block_status() if close_signal == 1: quit() time.sleep(g_var.SEND_STATUS_INTERVAL) # 在注册的主线程前,先取出数据库中最后的id存入config.json中,这样下次发文章开始取到的就是最新注册的账号 sql = "select * from " + present_website + " order by id DESC limit 1" userInfo = MysqlHandler().select(sql) if userInfo != None: user_id = int(userInfo[0]) else: user_id = 0 current_id = {"currentId": user_id} with open(g_var.ENV_DIR + '/' + present_website + '/config.json', 'w') as f: json.dump(current_id, f) # 开始执行程序 g_var.SPIDER_STATUS = 2 # 考虑平均分配不是正好分的情况:例如94个任务分配给10个线程,先94/10取整,每个线程9个任务,剩余4个任务给前4个线程每个加1个任务 EACH_THREAD_ASSIGNMENT_NUM = int(g_var.ALL_COUNT /
def __login(self, Session, VPN, userInfo) -> dict: try : # 从传入的userInfo中判断是否包含cookie,有cookie直接跳过登录流程, # 没有cookie或cookie过期再执行登录流程 # 判断用户信息中是否包含cookie if userInfo[5] != None and userInfo[5] != "": print("正在获取cookie") # userInfo[5]保存cookie值,如果cookie不为空,则使用cookie g_var.logger.info("返回cookie" + userInfo[5]) user_id = userInfo[0] username = userInfo[1] Cookie = userInfo[5] # 长度为3,loginData包含cookie loginData = { 'id': user_id, 'name': username, 'cookie': Cookie } return loginData else: print("用账号密码登录中") # cookie为空,使用账号密码登录 user_id = userInfo[0] username = userInfo[1] password = userInfo[2] res = Session.get("https://www.reddit.com/register/?actionSource=header_signup",proxies=Session.proxies, timeout=g_var.TIMEOUT) re_res = re.search('<input type="hidden" name="csrf_token" value="(.*?)">', res.text) if re_res.group(): csrf_token = re_res.group(1) else: g_var.logger.info("注册未获取到token",re_res) return {"error": -1} # res.headers["content-type"]="application/x-www-form-urlencoded" data = { "csrf_token": csrf_token, "otp": "", "dest": "https://www.reddit.com", "password": password, "username": username, } print("正在提交参数",data) print(data) retry_count = 0 while retry_count < g_var.RETRY_COUNT_MAX: retry_count = retry_count + 1 try: g_var.logger.info("使用账号密码登录...") res = Session.post("https://www.reddit.com/login",proxies=Session.proxies, data=data, timeout=g_var.TIMEOUT) # print("登录text",res.text) cookie = res.cookies.get_dict() print("这里是cookie",cookie) self.proxy_err_count = 0 break except Exception as e: g_var.logger.error("账号密码登录超时:",e) self.proxy_err_count = self.proxy_err_count + 1 time.sleep(g_var.SLEEP_TIME) proxies = ip_proxy(VPN) Session.proxies = proxies continue if retry_count == g_var.RETRY_COUNT_MAX: g_var.SPIDER_STATUS = 3 g_var.logger.error("连续登录失败!程序停止") return {"error": -1} if not self.__dictExistValue(cookie, "reddit_session"): # 如果登录失败将数据库中的status改为异常 TODO t注释 # sql = "UPDATE reddit_com SET status=1 WHERE id=" + str(user_id) + ";" # MysqlHandler().update(sql) return {"error": 1} # 账号异常,重新取号登录 else: print("正在存入cookie") # 如果登录成功,则返回id和username给下一步发新文章 user_id = userInfo[0] # 长度为2,使用账号密码登录的loginData sql = "UPDATE reddit_com SET cookie=\"" + str(cookie) + "\" WHERE id=" + str( user_id) + ";" status = MysqlHandler().update(sql) if status == 0: g_var.logger.info("update cookie OK") else: g_var.logger.error("数据库更新cookie错误!") return {"error": 1} loginData = { 'id': user_id, 'name': username } return loginData except Exception as e: g_var.ERR_CODE = "2100" g_var.ERR_MSG = g_var.ERR_MSG + "|_|"+"ip出现问题 请求失败" g_var.logger.error("登录错误:",e) return {"error": 1}
def start(self, present_website, VPN): while self.success_count < self.assignment_num: # 每次循环检测当前错误状态 if self.__monitor_status() == -1: break self.now_count = self.now_count + 1 # 设置Session对象 Session = get_Session(VPN) if Session == -1: self.failed_count = self.failed_count + 1 continue # 1、注册 email_and_passwd = get_email(present_website) if email_and_passwd == -1: self.failed_count = self.failed_count + 1 continue retry_count = 0 register_signal = 0 while retry_count < g_var.RETRY_COUNT_MAX: retry_count = retry_count + 1 registerData = self.__register_one(Session, present_website, email_and_passwd) if registerData == 0: g_var.logger.info("注册失败,报错原因需要更换邮箱,跳出本循环") self.failed_count = self.failed_count + 1 register_signal = 1 break elif registerData == -1: g_var.ERR_MSG = g_var.ERR_MSG + "|_|代理连续错误" g_var.logger.info("代理错误") retry_count = g_var.RETRY_COUNT_MAX elif registerData == -2: g_var.logger.info("注册失败,可能是邮箱密码不符合要求等原因,邮箱可以继续使用,不跳出循环") self.failed_count = self.failed_count + 1 continue else: # 注册成功 self.failed_count = 0 break time.sleep(g_var.SLEEP_TIME) if retry_count == g_var.RETRY_COUNT_MAX: # 连续出错说明发生了一些问题,需要停止程序 g_var.SPIDER_STATUS = 3 g_var.ERR_MSG = g_var.ERR_MSG + "|_|连续注册出错,程序停止" g_var.logger.error("连续注册失败!程序停止") break if register_signal == 1: continue # 2、登录 Session = get_Session(VPN) if Session == -1: self.failed_count = self.failed_count + 1 continue # 构造一个userInfo userInfo: tuple = (registerData['id'], registerData['username'], registerData['password'], registerData['email'], '0', registerData['cookie']) login_signal = 0 # 记录状态,成功为0,失败为1 retry_count = 0 while retry_count < g_var.RETRY_COUNT_MAX: retry_count = retry_count + 1 loginData = self.login(Session, present_website, VPN, userInfo) if loginData == 1: # 返回1表示登录失败,将数据库中的status改为异常 g_var.logger.info('使用当前账号密码登录失败。。。') sql = "UPDATE" + present_website + "SET status=1 WHERE id=" + str( userInfo[0]) + ";" status = MysqlHandler().update(sql) if status == -1: return -1 self.failed_count = self.failed_count + 1 login_signal = 1 break elif loginData == 0: self.failed_count = self.failed_count + 1 login_signal = 1 break elif loginData == -1: # 代理问题,更换代理 g_var.ERR_MSG = g_var.ERR_MSG + "|_|代理连续错误" g_var.logger.info("代理错误") retry_count = g_var.RETRY_COUNT_MAX elif loginData == -2: g_var.logger.info("登录失败,但可以使用此账户继续尝试,不跳出循环") self.failed_count = self.failed_count + 1 continue else: self.failed_count = 0 break if retry_count == g_var.RETRY_COUNT_MAX: # 连续出错说明发生了一些问题,需要停止程序 g_var.SPIDER_STATUS = 3 g_var.ERR_MSG = g_var.ERR_MSG + "|_|连续登录出错,程序停止" g_var.logger.error("连续登录失败!程序停止") break if login_signal == 1: continue # 3、发文章 retry_count = 0 while retry_count < g_var.RETRY_COUNT_MAX: time.sleep(g_var.SLEEP_TIME) retry_count = retry_count + 1 status = self.__postMessage(Session, loginData, present_website) if status == 'ok': # 发文章成功 self.success_count = self.success_count + 1 self.failed_count = 0 break elif status == 1: sql = "UPDATE " + present_website + " SET cookie='' WHERE id=" + str( loginData['id']) + ";" status = MysqlHandler().update(sql) if status == 0: g_var.logger.info("cookie失效,清除cookie update OK") else: g_var.logger.error("数据库清除cookie错误!") self.failed_count = self.failed_count + 1 break elif status == 0: self.failed_count = self.failed_count + 1 break elif status == -1: g_var.ERR_MSG = g_var.ERR_MSG + "|_|代理连续错误" g_var.logger.info("代理错误") retry_count = g_var.RETRY_COUNT_MAX elif status == -2: self.failed_count = self.failed_count + 1 continue if retry_count == g_var.RETRY_COUNT_MAX: # 连续出错说明发生了一些问题,需要停止程序 g_var.SPIDER_STATUS = 3 g_var.ERR_MSG = g_var.ERR_MSG + "|_|连续发文章出错,程序停止" g_var.logger.error("连续发文章出错,程序停止") break g_var.logger.info("成功注册账户并发送文章" + str(self.success_count) + "篇")
def loginAndPostMessage(self, present_website, VPN: str): while self.success_count < self.assignment_num: # 每次循环检测当前错误状态 if self.__monitor_status() == -1: break self.now_count = self.now_count + 1 Session = get_Session(VPN) if Session == -1: self.failed_count = self.failed_count + 1 continue # 从数据库中获取用户信息 userInfo = generate_login_data(present_website) g_var.logger.info(userInfo) if userInfo == None: g_var.ERR_CODE = 2001 g_var.ERR_MSG = g_var.ERR_MSG + "无法获取proxy!" g_var.logger.error("数据库中获取用户失败,本线程停止!") return -1 # 1、登录 login_signal = 0 retry_count = 0 while retry_count < g_var.RETRY_COUNT_MAX: retry_count = retry_count + 1 loginData = self.login(Session, present_website, VPN, userInfo) if loginData == 1: # 返回1表示登录失败,将数据库中的status改为异常 g_var.logger.info('使用当前账号密码登录失败。。。') sql = "UPDATE" + present_website + "SET status=1 WHERE id=" + str( userInfo[0]) + ";" status = MysqlHandler().update(sql) if status == -1: return -1 self.failed_count = self.failed_count + 1 login_signal = 1 break elif loginData == 0: self.failed_count = self.failed_count + 1 login_signal = 1 break elif loginData == -1: # 代理问题,更换代理 g_var.ERR_MSG = g_var.ERR_MSG + "|_|代理连续错误" g_var.logger.info("代理错误") retry_count = g_var.RETRY_COUNT_MAX elif loginData == -2: g_var.logger.info("登录失败,但可以使用此账户继续尝试,不跳出循环") self.failed_count = self.failed_count + 1 continue else: self.failed_count = 0 break if retry_count == g_var.RETRY_COUNT_MAX: # 连续出错说明发生了一些问题,需要停止程序 g_var.SPIDER_STATUS = 3 g_var.ERR_MSG = g_var.ERR_MSG + "|_|连续登录出错,程序停止" g_var.logger.error("login:连续登录失败!程序停止") break if login_signal == 1: continue # 2、发文章 retry_count = 0 while retry_count < g_var.RETRY_COUNT_MAX: time.sleep(g_var.SLEEP_TIME) retry_count = retry_count + 1 status = self.__postMessage(Session, loginData, present_website) if status == 'ok': # 发文章成功 self.success_count = self.success_count + 1 self.failed_count = 0 break elif status == 1: sql = "UPDATE " + present_website + " SET cookie='' WHERE id=" + str( loginData['id']) + ";" status = MysqlHandler().update(sql) if status == 0: g_var.logger.info("cookie失效,清除cookie update OK") else: g_var.logger.error("数据库清除cookie错误!") self.failed_count = self.failed_count + 1 break elif status == 0: self.failed_count = self.failed_count + 1 break elif status == -1: g_var.ERR_MSG = g_var.ERR_MSG + "|_|代理连续错误" g_var.logger.info("代理错误") retry_count = g_var.RETRY_COUNT_MAX elif status == -2: self.failed_count = self.failed_count + 1 continue if retry_count == g_var.RETRY_COUNT_MAX: # 连续出错说明发生了一些问题,需要停止程序 g_var.SPIDER_STATUS = 3 g_var.ERR_MSG = g_var.ERR_MSG + "|_|连续发文章出错,程序停止" g_var.logger.error("连续发文章出错,程序停止") break g_var.logger.info("成功发送" + str(self.success_count) + "篇文章")
def __register_one(self, Session, present_website: str, email_and_passwd): """ 注册一个账户,需要实现注册、激活、并将注册数据存入数据库的功能 Args: Session:Session对象 present_website:当前网站名,用于数据库表名 email_and_passwd:邮箱账户和密码,email_and_passwd[0]是邮箱,[1]是密码 Returns: 注册成功返回注册数据字典对象registerData,需要包含id, username, password, email, cookie(在访问激活链接时能取到,\ 取不到返回空) user_id这样获取:(示例) # 将注册的账户写入数据库(sql自己写,这边只是个示例) sql = "INSERT INTO "+present_website+"(username, password, mail, status, cookie) VALUES('" + \ username + "', '" + password + "', '" + email + "', '" + str(0) + cookie + "');" last_row_id = MysqlHandler().insert(sql) if last_row_id != -1: registerData["user_id"] = last_row_id return registerData else: g_var.logger.error("数据库插入用户注册数据失败") return 0 注册失败返回状态码 0:某些报错需要跳出while循环,更换邮箱 -1:连续代理错误或页面发生改变等取不到关键数据等,需要停止程序 -2:注册失败,可能是打码出错等原因,邮箱可以继续使用(邮箱资源成本较高,因此要确保注册成功后再更换邮箱),不跳出循环 """ email = email_and_passwd[0] emailpwd = email_and_passwd[1] Session = requestsW.Session() Session.proxies = ip_proxy() headers = {"User-Agent": get_user_agent()} # headers["x-requested-with"] = "XMLHttpRequest" headers["referer"] = "https://www.boredpanda.com/add-new-post/" headers[ "content-type"] = "application/x-www-form-urlencoded; charset=UTF-8" user = email.split("@")[0] pwd = emailpwd data = { "action": "contribution_signup", "user_email": email, "user_full_name": user, "user_pass": pwd, "redirect": "https://www.boredpanda.com/add-new-post/" } res = Session.post( "https://www.boredpanda.com/blog/wp-admin/admin-ajax.php", proxies=Session.proxies, data=data) if 'user_id' not in res.text: g_var.ERR_MSG = g_var.ERR_MSG + "|_|发送邮箱,注册失败" g_var.logger.info("|_|发送邮箱,注册失败") return 0 # TODO 邮箱新方法 res = EmailVerify( username=email, password=emailpwd, re_text= 'Click .{0,50} href="(http://\w{5,15}.ct.sendgrid.net/ls/click\?upn=.{300,600})">here</a>' ).execute_Start() if res == -1: g_var.ERR_MSG = g_var.ERR_MSG + "|_|邮箱激活失败" g_var.logger.info("|_|邮箱激活失败") return 0 res = Session.get(res["data"], headers=headers) if "boredpanda_auth" not in Session.cookies.get_dict(): g_var.ERR_MSG = g_var.ERR_MSG + "|_|邮箱获取链接后,请求失败" g_var.logger.info("|_|邮箱获取链接后,请求失败") return 0 sql = """INSERT INTO %s (username, password, mail, status, cookie) VALUES("%s", "%s", "%s", "%s", "%s");""" % ( present_website, user, pwd, email_and_passwd[0], 0, str(Session.cookies.get_dict())) g_var.logger.info(sql) last_row_id = MysqlHandler().insert(sql) if last_row_id == -1: g_var.ERR_MSG = g_var.ERR_MSG + "|_|数据库插入失败" g_var.logger.info("|_|数据库插入失败") return 0 tlList = get_new_title_and_link() title, url = tlList[0], tlList[1] data = { "action": "save_settings_form", "settingsDisplay": title, "settingsWebsite": url, "settingsFacebook": url, "settingsTwitter": url, "settingsFlickr": url, "settingsSlack": "", "settingsBio": title, "settingsAdminBox": "" } res = Session.post( "https://www.boredpanda.com/blog/wp-admin/admin-ajax.php", proxies=Session.proxies, headers=headers, data=data) success = 0 if "success" not in res.text: g_var.ERR_MSG = g_var.ERR_MSG + "|_|修改个人链接,请求失败" g_var.logger.info("|_|修改个人链接,请求失败") return 0 data = { "action": "save_privacy_settings_form", "allowContactMe": "true", "ninjaPanda": "false", } # proxies=ip_proxy() res = Session.post( "https://www.boredpanda.com/blog/wp-admin/admin-ajax.php", headers=headers, proxies=Session.proxies, data=data) if "success" not in res.text: g_var.ERR_MSG = g_var.ERR_MSG + "|_|允许个人资料访问,请求失败" g_var.logger.info("|_|允许个人资料访问,请求失败") return 0 sql = "INSERT INTO %s_article(url, keyword, user_id) VALUES('%s', '%s', '%s');" % ( present_website, "https://www.boredpanda.com/author/%s/" % user, title, last_row_id) if g_var.insert_article_lock.acquire(): last_row_id = MysqlHandler().insert(sql) if last_row_id == -1: g_var.ERR_MSG = g_var.ERR_MSG + "|_|MYSQL插入文章失败" g_var.logger.info("|_|MYSQL插入文章失败") return 0
def login(self, Session, present_website: str, VPN, userInfo): """ 登录 根据用户信息userInfo中cookie是否为空 1、有cookie,跳过登录流程,直接构造loginData返回 2、没有cookie,需要post登录请求,获取到cookie存入数据库,再构造loginData返回 Args: Session:Session对象 present_website:当前网站名,用于数据库表名 VPN:使用国内or国外代理 userInfo:用户信息 userInfo[0]:id [1]:username [2]passwod [3]:emial [4]:status [5]cookie Returns: 成功返回loginData loginData = { 'id': user_id, 'username': username, 'password': password, 'email': email, 'cookie': cookie, } 失败返回状态值: 1:表示账号密码失效,密码被改或账号被网站删除,将数据库中状态改为1,并跳出循环重新取账号 0:跳出循环,重新取号 -1:连续代理错误或页面发生改变等取不到关键数据等,需要停止程序 -2:本次出错,不跳出循环 Mysql Update示例: # 如果cookie失效,将该cookie从数据库中清除,并重新从数据库中获取登录账号密码 sql = "UPDATE %s SET cookie='%s' WHERE id=%s ;" % (pastebin_com, save_cookies, user_id) status = MysqlHandler().update(sql) if status == 0: g_var.logger.info("cookie失效,清除cookie update OK") return {"error": -2} else: g_var.logger.error("数据库清除cookie错误!") return {"error": 1} """ user_id = userInfo[0] username = userInfo[1] password = userInfo[2] email = userInfo[3] cookie = userInfo[5] if userInfo[5] != None and userInfo[5] != "": # userInfo[5]保存cookie值,如果cookie不为空,则使用cookie g_var.logger.info("返回cookie" + userInfo[5]) loginData = { 'id': user_id, 'username': username, 'password': password, 'email': email, 'cookie': cookie, } return loginData else: # cookie为空,使用账号密码登录 login_url = "https://pastebin.com/login" data = { 'submit_hidden': 'submit_hidden', 'user_name': username, 'user_password': password, 'submit': 'Login', } headers = { 'origin': 'https://pastebin.com', 'referer': 'https://pastebin.com/login' } g_var.logger.info("账号登录中...") result = Session.post(login_url, data=data, headers=headers, timeout=g_var.TIMEOUT) if result == -1: g_var.logger.error("代理出错,登录超时") return -1 login_success_signal = "this is your personal Pastebin" if login_success_signal in result.text: g_var.logger.info("login success!") cookie = str(Session.cookies.get_dict()) sql = "UPDATE " + present_website + " SET cookie=\"" + cookie + "\" WHERE id=" + str( userInfo[0]) + ";" status = MysqlHandler().update(sql) if status == 0: g_var.logger.info("update cookie OK") else: g_var.logger.error("数据库更新cookie错误!") return -2 loginData = { 'id': user_id, 'username': username, 'password': password, 'email': email, 'cookie': cookie } return loginData else: g_var.logger.info("login fail!") g_var.logger.info(result.text) return 0
def __postMessage(self, Session, loginData: dict, present_website): """ 发文章 Args: Session:Session对象 loginData:用户信息,包括id,username,password,cookie present_website:当前网站名,用于数据库表名 Returns: 成功返回状态值:0 失败返回状态值: 1:表示账号密码失效,密码被改或账号被网站删除 -1:连续代理错误,停止程序 -2:页面发生改变,获取不到页面上的一些token值 -3:数据库插入更新等错误 -4:cookie过期 """ if loginData["cookie"] != "": Session.cookies = loginData["cookie"] title_link = get_new_title_and_link() postUrl = title_link[1] users = loginData["username"] # Sesstion.cookies=cookies self.headers[ "Content-Type"] = "application/x-www-form-urlencoded; charset=UTF-8" self.headers["Referer"] = "https://www.diigo.com/user/t3wjogvjklzwh3zi" self.headers["Origin"] = "https://www.diigo.com" # res=requests.post("https://superio.diigo.com/fetch_meta",headers=headers,data={"url":postUrl},verify=False) # print(res.text) # if not project_util.dictExistValue(res.json(),"title"): # return "请求失败" self.headers["X-Requested-With"] = "XMLHttpRequest" # res=Sesstion.get("https://www.diigo.com/tag_mana2/load_recommended_tags",params={"title":res.json()["title"],"url":postUrl},headers=headers,cookies=cookies,verify=False) # if not project_util.dictExistValue(res.json(),"tags"): # return "提交网站有问题" data = { "title": title_link[0], "tags": title_link[0], "description": title_link[0], "unread": False, "private": False, "url": postUrl, "lists": "", "groups": "", } res = requestsW.post("https://www.diigo.com/item/save/bookmark", cookies=eval(loginData["cookie"]), headers=self.headers, data=data) if res == -1: return res g_var.logger.info(res.text) g_var.logger.info(loginData) if project_util.dictExistValue(res.json(), "items"): res_url = "https://www.diigo.com/user/" + users sql = "INSERT INTO %s_article(url, keyword, user_id) VALUES('%s', '%s', '%s');" % ( present_website, res_url, title_link[0], loginData["id"]) if g_var.insert_article_lock.acquire(): last_row_id = MysqlHandler().insert(sql) if last_row_id == -1: return -1 g_var.insert_article_lock.release() return 0 else: return -1
def __register_one(self, Session, present_website: str, email_and_passwd): """ 注册一个账户,需要实现注册、激活、并将注册数据存入数据库的功能 Args: Session:Session对象 present_website:当前网站名,用于数据库表名 email_and_passwd:邮箱账户和密码,email_and_passwd[0]是邮箱,[1]是密码 Returns: 注册成功返回注册数据字典对象registerData,需要包含id, username, password, email, cookie(在访问激活链接时能取到,\ 取不到返回空) user_id这样获取:(示例) # 将注册的账户写入数据库(sql自己写,这边只是个示例) sql = "INSERT INTO "+present_website+"(username, password, mail, status, cookie) VALUES('" + \ username + "', '" + password + "', '" + email + "', '" + str(0) + cookie + "');" last_row_id = MysqlHandler().insert(sql) if last_row_id != -1: registerData["user_id"] = last_row_id return registerData else: g_var.logger.error("数据库插入用户注册数据失败") return 0 注册失败返回状态码 0:某些报错需要跳出while循环,更换邮箱 -1:连续代理错误或页面发生改变等取不到关键数据等,需要停止程序 -2:注册失败,可能是打码出错等原因,邮箱可以继续使用(邮箱资源成本较高,因此要确保注册成功后再更换邮箱),不跳出循环 """ username = generate_random_string(15, 20) password = generate_random_string(10, 15) g_var.logger.info("username:"******"password:"******"https://pastebin.com/etc/captcha/random.php" captcha_code = identify_captcha_1(Session, captcha_url, present_website) if captcha_code == -1: g_var.logger.info("代理连续错误") return -1 elif captcha_code == -2: g_var.logger.info("识别验证码失败") return -2 g_var.logger.info("captcha_code:" + captcha_code) registerData = { 'user_notifications': '1', 'submit_hidden': 'submit_hidden', 'user_name': username, 'user_email': email_and_passwd[0], 'user_password': password, 'user_terms': 'on', 'captcha_solution': captcha_code, 'submit': 'Create My Account', } headers = { 'content-type': 'application/x-www-form-urlencoded', 'origin': 'https://pastebin.com', 'referer': 'https://pastebin.com/signup', 'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/81.0.4044.129 Safari/537.36', } url = "https://pastebin.com/signup.php" result = Session.post(url, headers=headers, data=registerData, timeout=g_var.TIMEOUT) if result == -1: g_var.logger.error("提交注册信息超时") return -1 success_signal = "Please click on the activation link to activate your account." if success_signal in result.text: g_var.logger.info("注册成功!" + result.text) else: email_used_signal = "The email address you picked is already in use" if email_used_signal in result.text: g_var.logger.info("邮箱已经被注册!" + result.text) return 0 # 跳出循环更换邮箱 re_text = '(https://pastebin.com/activate_account.php\?.*?)"' email_verify_obj = EmailVerify(email_and_passwd[0], email_and_passwd[1], re_text) verify_url = account_activation(Session, email_verify_obj) if verify_url == -1: g_var.logger.info("2分钟内未收到激活邮件,激活失败!") resend_url = "https://pastebin.com/resend.php" headers = { ':authority': 'pastebin.com', ':method': 'POST', ':path': '/resend.php', ':scheme': 'https', 'accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9', 'accept-encoding': 'gzip, deflate, br', 'accept-language': 'zh-CN,zh;q=0.9,en;q=0.8', 'cache-control': 'max-age=0', 'content-type': 'application/x-www-form-urlencoded', 'cookie': '__cfduid=d4e164af370e44d4de219e208cd6779061589887894; PHPSESSID=8ehi7ipdvopsmtqq9oaf1uk000; _ga=GA1.2.65423653.1589887895; _gid=GA1.2.805292117.1589887895; refit=L3Byb2ZpbGU%3D; _gat_UA-58643-34=1', 'origin': 'https://pastebin.com', 'referer': 'https://pastebin.com/resend.php?e=3', 'sec-fetch-dest': 'document', 'sec-fetch-mode': 'navigate', 'sec-fetch-site': 'same-origin', 'sec-fetch-user': '******', 'upgrade-insecure-requests': '1', 'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/81.0.4044.138 Safari/537.36', } resendData = {} result = Session.post(resend_url, headers=headers, data=resendData, timeout=g_var.TIMEOUT) if result == -1: g_var.logger.error("提交重新激活超时") return -1 verify_url = account_activation(Session, email_verify_obj) if verify_url == -1: g_var.logger.error("又没激活成功") return 0 g_var.logger.info("verify_url" + verify_url) headers = { 'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/81.0.4044.129 Safari/537.36', } result = Session.get(url=verify_url, headers=headers, timeout=g_var.TIMEOUT) if result == -1: g_var.logger.error("访问激活链接超时") return -1 # 获取cookie cookie = str(Session.cookies.get_dict()) sql = "INSERT INTO " + present_website + "(username, password, mail, status, cookie) VALUES('" + username + \ "', '" + password + "', '" + email_and_passwd[0] + "', '" + str(0) + "', \"" + cookie + "\");" last_row_id = MysqlHandler().insert(sql) if last_row_id != -1: registerData = dict() registerData["id"] = last_row_id registerData['username'] = username registerData['password'] = password registerData['email'] = email_and_passwd[0] registerData['cookie'] = cookie return registerData else: g_var.logger.error("数据库插入用户注册数据失败") return 0
def __register_one(self, Session, present_website: str, email_and_passwd): """ 注册一个账户 Args: Session:Session对象 present_website:当前网站名,用于数据库表名 email_and_passwd:邮箱账户和密码,email_and_passwd[0]是邮箱,[1]是密码 Returns: 注册成功返回注册数据字典对象registerData,需要包含user_id, username, password, email user_id这样获取:(示例) # 将注册的账户写入数据库(sql自己写,这边只是个示例) sql = "INSERT INTO "+present_website+"(username, password, mail, status) VALUES('" + name + \ "', '" + psd + "', '" + email_and_passwd[0] + "', '" + str(0) + "');" last_row_id = MysqlHandler().insert(sql) if last_row_id != -1: registerData["id"] = last_row_id return registerData else: g_var.logger.error("数据库插入用户注册数据失败") return 0 注册失败返回状态码 0:更换email 返回0 或其他错误,但是激活失败或插入数据库失败 -1:表示requests请求页面失败,需要更换代理 -2:注册失败,可能是邮箱密码不符合要求、或ip被封等原因,需要排查 """ user = project_util.generate_random_string(12, 16) pwd = project_util.generate_random_string(10, 12) email_list = email_and_passwd if email_list == -1: g_var.SPIDER_STATUS = 2 g_var.ERR_MSG = g_var.ERR_MSG + "|_|NO email" g_var.logger.info("NO email") return 0 verify_email = Session.get( "https://www.diigo.com/user_mana2/check_email?email=" + email_list[0], timeout=g_var.TIMEOUT, headers=self.headers, proxies=Session.proxies) # 验证邮箱是否可用 verify_user = Session.get( "https://www.diigo.com/user_mana2/check_name?username="******"1": g_var.SPIDER_STATUS = 2 g_var.ERR_MSG = g_var.ERR_MSG + "|_|账号密码或邮箱已经被注册" g_var.logger.info("账号密码或邮箱已经被注册") return 0 # time.sleep(3) google_captchas = google_captcha( "", "6Ld23sMSAAAAALfyXkI9d0nHmzOH9jZZNuh66nql", "https://www.diigo.com/sign-up?plan=free") if google_captchas == -1: g_var.SPIDER_STATUS = 2 g_var.ERR_MSG = g_var.ERR_MSG + "|_|谷歌打码失败" g_var.logger.info("谷歌打码失败") return -2 res = requestsW.get("https://www.diigo.com/", headers=self.headers, proxies=Session.proxies) # 打开首页 if res == -1: return res cookies = res.cookies.get_dict() i = 0 while i < g_var.ERR_COUNT: i += 1 try: Session.proxies = ip_proxy() res = requests.get("https://www.diigo.com/sign-up?plan=free", headers=self.headers, cookies=cookies, proxies=Session.proxies, verify=False) user_input = re.search('id="username" name="(\w{32})">', res.text) email_input = re.search('id=\'email\' name="(\w{32})">', res.text) pwd_input = re.search('id=\'password\' name="(\w{32})"', res.text) if not user_input and email_input and pwd_input: # TODO 获取不到参数 return "注册无法打开网页" else: user_input = user_input.group(1) email_input = email_input.group(1) pwd_input = pwd_input.group(1) data = { "plan": "free", "g-recaptcha-response": google_captchas, user_input: user, email_input: email_and_passwd[0], pwd_input: pwd, } self.headers["X-Requested-With"] = "XMLHttpRequest" self.headers[ "Referer"] = "https://www.diigo.com/sign-up?plan=free" self.headers[ "Content-Type"] = "application/x-www-form-urlencoded; charset=UTF-8" res = requests.post( "https://www.diigo.com/user_mana2/register_2", headers=self.headers, cookies=cookies, data=data, proxies=Session.proxies, verify=False) print(res.json()) if project_util.dictExistValue(res.json(), "status"): if res.json()["status"] == 1: cookies.update(res.cookies.get_dict()) savec = cookies res = requestsW.post( "https://www.diigo.com/user_mana2/resend_verify", cookies=cookies, headers=self.headers, data={"email": email_and_passwd[0]}, proxies=Session.proxies) print("重新发送邮箱:", res.text) emailinfo = EmailVerify( email_and_passwd[0], email_and_passwd[1], 'href="(https://www.diigo.com/user_mana2/register_verify/\w{32})"' ).execute_Start() print("这里是邮箱参数:", emailinfo) if emailinfo["data"] != -1: Session = requestsW.session() res = Session.get(emailinfo["data"], headers=self.headers, proxies=Session.proxies, cookies=cookies) sql = """INSERT INTO %s (username, password, mail, status, cookie) VALUES("%s", "%s", "%s", "%s", "%s");""" % ( "diigo_com", user, pwd, email_and_passwd[0], 0, savec) g_var.logger.info(sql) last_row_id = MysqlHandler().insert(sql) if last_row_id != -1: registerData = { "username": user, "password": pwd, "email": email_and_passwd[0], "cookie": savec, } registerData["id"] = int(last_row_id) return registerData return { "user": user, "pwd": pwd, "email": email_and_passwd[0], "cookies": Session.cookies.get_dict() } # if project_util.dictExistValue(res.cookies.get_dict(),"diigoandlogincookie"): # 注册成功并登陆cookie # saveCookie = str(Session.cookies.get_dict()) # # print({"user": user, "pwd": pwd, "email": email_and_passwd[0], "cookies": saveCookie}) # return {"user": user, "pwd": pwd, "email": email_and_passwd[0], "cookies": saveCookie} elif res.json()["status"] == -2: if "captcha error" in res.json()["status"]: g_var.SPIDER_STATUS = 2 g_var.ERR_MSG = g_var.ERR_MSG + "|_|谷歌打码失败" g_var.logger.info("谷歌打码失败") return -2 return -2 except Exception as e: res = requestsW.get("https://www.diigo.com/", headers=self.headers, proxies=Session.proxies, verify=False) # 打开首页 cookies = res.cookies.get_dict() g_var.logger.info(e) g_var.logger.info("正在换ip", e) return 0
def __register_one(self, present_website, email_info): g_var.logger.info("register...") headers = generate_headers(0) if headers == -1: g_var.logger.info("获取注册headers失败...") return -2 registerData = generate_register_data(present_website, email_info) g_var.logger.info(registerData) if registerData == -1: g_var.logger.info("未生成正确注册数据...") return -2 url_register = 'https://www.wattpad.com/signup?nextUrl=/home' g_var.logger.info("提交注册中...") html = requestsW.post(url_register, proxies=ip_proxy("en"), data=registerData, headers=headers, timeout=g_var.TIMEOUT) if html == -1: return -1 # 注册成功与否验证 prove_info = 'Hi @' + registerData['username'] if prove_info not in html.text: g_var.logger.info(html.text) g_var.logger.info("IP被封等原因...") return -2 token_list = re.findall('token=(.*?);', html.headers['Set-Cookie']) del headers['Origin'] del headers['Content-Type'] del headers['Referer'] time.sleep(2) verify_url = get_verify_url(email_info) if verify_url == -1: g_var.logger.info("未读取到邮箱验证的url...") return -3 g_var.logger.info("邮件的url正在验证中...") html = requestsW.get(url=verify_url, proxies=ip_proxy("en"), headers=headers, timeout=g_var.TIMEOUT) if html == -1: return -1 if html.status_code == 200: sql = "INSERT INTO wattpad_com(username, password, mail, status) VALUES('" + registerData['username'] + \ "', '" + registerData['password'] + "', '" + registerData['email'] + "', '" + str(0) + "');" last_row_id = MysqlHandler().insert(sql) if last_row_id != -1: registerData["id"] = last_row_id registerData["token"] = token_list[0] return registerData else: g_var.ERR_CODE = 2004 g_var.ERR_MSG = "数据库插入用户注册数据失败..." g_var.logger.error("数据库插入用户注册数据失败...") return 0 else: g_var.ERR_CODE = 3006 g_var.ERR_MSG = "邮箱验证失败..." g_var.logger.error("邮箱验证失败!\n") return -3
def loginAndPostMessage(self, present_website, VPN: str): while self.success_count < self.assignment_num: # 每次循环检测当前错误状态 if self.__monitor_status() == -1: break self.now_count = self.now_count + 1 Session = get_Session(VPN) if Session == -1: self.failed_count = self.failed_count + 1 continue # 从数据库中获取用户信息 userInfo = generate_login_data(present_website) g_var.logger.info(userInfo) if userInfo == None: g_var.ERR_CODE = 2001 g_var.ERR_MSG = g_var.ERR_MSG + "无法获取proxy!" g_var.logger.error("数据库中获取用户失败,本线程停止!") return -1 # 1、登录 login_signal = 0 retry_count = 0 while retry_count < g_var.RETRY_COUNT_MAX: retry_count = retry_count + 1 loginData = self.login(Session, present_website, VPN, userInfo) if loginData == -1: # 代理问题,更换代理 g_var.ERR_MSG = g_var.ERR_MSG + "|_|代理连续错误" g_var.logger.info("代理错误") retry_count = g_var.RETRY_COUNT_MAX elif loginData == -2: # 账号异常,跳出本循环 self.failed_count = self.failed_count + 1 login_signal = 1 break else: self.failed_count = 0 self.proxy_err_count = 0 break if retry_count == g_var.RETRY_COUNT_MAX: # 连续出错说明发生了一些问题,需要停止程序 g_var.SPIDER_STATUS = 3 g_var.ERR_MSG = g_var.ERR_MSG + "|_|连续登录出错,程序停止" g_var.logger.error("login:连续登录失败!程序停止") break if login_signal == 1: continue # 2、发文章 retry_count = 0 while retry_count < g_var.RETRY_COUNT_MAX: time.sleep(g_var.SLEEP_TIME) retry_count = retry_count + 1 status = self.__postMessage(Session, loginData, present_website) if status == 0: # 发文章成功 self.success_count = self.success_count + 1 self.failed_count = 0 self.proxy_err_count = 0 break elif status == -1: # 返回值为-1,更换代理 g_var.ERR_MSG = g_var.ERR_MSG + "|_|代理连续错误" g_var.logger.info("代理错误") retry_count = g_var.RETRY_COUNT_MAX elif status == -2: # 返回值为-1,某些必须停止的错误,程序停止 self.failed_count = self.failed_count + 1 g_var.SPIDER_STATUS = 3 break elif status == -3: self.failed_count = self.failed_count + 1 elif status == -4: sql = "UPDATE %s SET cookie=null WHERE id=%s ;" % ( present_website, loginData["id"]) g_var.logger.info(sql) status = MysqlHandler().update(sql) if status != 0: g_var.logger.error("数据库清除cookie错误!") return {"error": 1} break if retry_count == g_var.RETRY_COUNT_MAX: # 连续出错说明发生了一些问题,需要停止程序 g_var.SPIDER_STATUS = 3 g_var.ERR_MSG = g_var.ERR_MSG + "|_|连续发文章出错,程序停止" g_var.logger.error("连续发文章出错,程序停止") break g_var.logger.info("成功发送" + str(self.success_count) + "篇文章")
def start(self, present_website, VPN): while self.success_count < self.assignment_num: # 每次循环检测当前错误状态 if self.__monitor_status() == -1: break self.now_count = self.now_count + 1 # 设置Session对象 Session = get_Session(VPN) if Session == -1: self.failed_count = self.failed_count + 1 continue # 1、注册 # 获取邮箱 retry_count = 0 email_and_passwd = get_email(present_website) if email_and_passwd == -1: retry_count = g_var.RETRY_COUNT_MAX g_var.ERR_MSG = g_var.ERR_MSG + "|_|没有邮箱了" g_var.logger.error("没有邮箱了") continue while retry_count < g_var.RETRY_COUNT_MAX: retry_count = retry_count + 1 registerData = self.__register_one(Session, present_website, email_and_passwd) if registerData == -1: g_var.ERR_MSG = g_var.ERR_MSG + "|_|代理连续错误" g_var.logger.info("代理错误") retry_count = g_var.RETRY_COUNT_MAX elif registerData == -2: g_var.logger.info("注册失败,可能是邮箱密码不符合要求、或ip被封等原因,请排查!") self.proxy_err_count = self.proxy_err_count + 1 proxies = ip_proxy(VPN) if proxies == {"error": -1}: g_var.logger.info("获取代理错误") self.failed_count = self.failed_count + 1 Session.proxies = proxies elif registerData == 0: # 注册成功,但激活失败 email_and_passwd = get_email(present_website) if email_and_passwd == -1: retry_count = g_var.RETRY_COUNT_MAX g_var.ERR_MSG = g_var.ERR_MSG + "|_|没有邮箱了" g_var.logger.error("没有邮箱了") continue retry_count = 0 else: # 注册成功 self.failed_count = 0 break if retry_count == g_var.RETRY_COUNT_MAX: # 连续出错说明发生了一些问题,需要停止程序 g_var.SPIDER_STATUS = 3 g_var.ERR_MSG = g_var.ERR_MSG + "|_|连续注册出错,程序停止" g_var.logger.error("start:连续注册失败!程序停止") break # 2、登录 Session = get_Session(VPN) if Session == -1: self.failed_count = self.failed_count + 1 continue # 构造一个userInfo g_var.logger.info(registerData) userInfo = [ int(registerData['id']), registerData['username'], registerData['password'], registerData['email'], 0, str(registerData['cookie']) ] login_signal = 0 # 记录状态,成功为0,失败为1 retry_count = 0 while retry_count < g_var.RETRY_COUNT_MAX: retry_count = retry_count + 1 loginData = self.login(Session, present_website, VPN, userInfo) if loginData == -1: # 代理问题,更换代理 g_var.ERR_MSG = g_var.ERR_MSG + "|_|代理连续错误" g_var.logger.info("代理错误") retry_count = g_var.RETRY_COUNT_MAX elif loginData == -2: # 账号异常,跳出本循环 self.failed_count = self.failed_count + 1 login_signal = 1 break else: self.failed_count = 0 self.proxy_err_count = 0 break if retry_count == g_var.RETRY_COUNT_MAX: g_var.SPIDER_STATUS = 3 g_var.ERR_MSG = g_var.ERR_MSG + "|_|连续登录出错,程序停止" g_var.logger.error("start:连续登录失败!程序停止") break if login_signal == 1: continue # 3、发文章 retry_count = 0 while retry_count < g_var.RETRY_COUNT_MAX: retry_count = retry_count + 1 status = self.__postMessage(Session, loginData, present_website) if status == 0: # 发文章成功 self.success_count = self.success_count + 1 self.failed_count = 0 self.proxy_err_count = 0 break elif status == -1: g_var.ERR_MSG = g_var.ERR_MSG + "|_|代理连续错误" g_var.logger.info("代理错误") retry_count = g_var.RETRY_COUNT_MAX elif status == -2: # 某些必须停止的错误,程序停止 self.failed_count = self.failed_count + 1 g_var.SPIDER_STATUS = 3 break elif status == -3: self.failed_count = self.failed_count + 1 elif status == -4: sql = "UPDATE %s SET cookie=null WHERE id=%s ;" % ( present_website, loginData["id"]) g_var.logger.info(sql) status = MysqlHandler().update(sql) if status != 0: g_var.logger.error("数据库清除cookie错误!") return {"error": 1} break if retry_count == g_var.RETRY_COUNT_MAX: # 连续出错说明发生了一些问题,需要停止程序 g_var.SPIDER_STATUS = 3 g_var.ERR_MSG = g_var.ERR_MSG + "|_|连续发文章出错,程序停止" g_var.logger.error("连续发文章出错,程序停止") break g_var.logger.info("成功注册账户并发送文章" + str(self.success_count) + "篇")
def __register_one(self, present_website): """ 注册一个账户,需要实现注册、激活、并将注册数据存入数据库的功能 Args: Session:Session对象 present_website:当前网站名,用于数据库表名 email_and_passwd:邮箱账户和密码,email_and_passwd[0]是邮箱,[1]是密码 Returns: 注册成功返回注册数据字典对象registerData,需要包含id, username, password, email, cookie(在访问激活链接时能取到,\ 取不到返回空) user_id这样获取:(示例) # 将注册的账户写入数据库(sql自己写,这边只是个示例) sql = "INSERT INTO "+present_website+"(username, password, mail, status, cookie) VALUES('" + \ username + "', '" + password + "', '" + email + "', '" + str(0) + cookie + "');" last_row_id = MysqlHandler().insert(sql) if last_row_id != -1: registerData["user_id"] = last_row_id return registerData else: g_var.logger.error("数据库插入用户注册数据失败") return 0 注册失败返回状态码 0:数据库存储失败 -1:连续代理错误或页面发生改变等取不到关键数据等,需要停止程序 -2:注册失败,可能是打码出错等原因 """ g_var.logger.info('register......') headers = generate_headers(0) if headers == -1: return -1 g_var.logger.info('session_id......') session_id = get_session_id() if session_id == -1: return -1 elif session_id == -2: return -2 googlekey = '6LefyhkTAAAAANpeEKwwgimNneiKWXRQtEqFZbat' captcha_value = google_captcha( "", googlekey, 'https://auth.voxmedia.com/signup?return_to=https://www.sbnation.com/' ) if captcha_value == -1: return -2 headers['cookie'] = session_id username = generate_random_string(8, 12) password = generate_random_string(10, 14) community_id = random.randint(210, 299) g_var.logger.info('community_id.....') g_var.logger.info(community_id) email = username + '@hotmail.com' registerData = { 'g-recaptcha-response': captcha_value, 'user[username]': username, 'user[password]': password, 'user[email]': email, 'user[newsletter]': 'false', 'community_id': community_id, } g_var.logger.info('开始提交注册信息...') url_login = '******' html = requestsW.post(url_login, proxies=ip_proxy("en"), data=registerData, headers=headers, timeout=g_var.TIMEOUT) if html == -1: return html try: g_var.logger.info(html.text) res_data = json.loads(html.text) except Exception as e: g_var.logger.info(e) g_var.logger.info('注册失败,返回信息解析失败。。。') g_var.logger.info(html.text) return -2 if not res_data['success']: g_var.logger.info('注册失败。。。') g_var.logger.info(html.text) return -2 try: sql = "INSERT INTO " + present_website + "(username, password, mail) VALUES('" + \ username + "', '" + password + "', '" + email + "');" last_row_id = MysqlHandler().insert(sql) g_var.logger.info(last_row_id) if last_row_id != -1: g_var.logger.info('注册成功!' + username) userData = { 'id': last_row_id, 'username': username, 'password': password, } return userData else: g_var.ERR_CODE = 2004 g_var.ERR_MSG = "数据库插入用户注册数据失败..." g_var.logger.error("数据库插入用户注册数据失败...") return 0 except Exception as e: g_var.logger.info(e) g_var.ERR_CODE = 2004 g_var.ERR_MSG = "数据库插入用户注册数据异常..." g_var.logger.error("数据库插入用户注册数据异常...") return 0
# 线程开始执行 for i in range(0, len(t_list)): t_list[i].setDaemon(True) t_list[i].start() # 定时发送状态 close_send_status_signal = 0 # 发送消息的循环要等所有线程停止才能跳出循环 wait_signal = 0 while g_var.SPIDER_STATUS != 3 or close_send_status_signal != 1: close_send_status_signal, wait_signal = send_spider_status( obj_list, t_list) time.sleep(g_var.SEND_STATUS_INTERVAL) if wait_signal == 0: # 等待所有线程结束 for i in range(0, len(t_list)): t_list[i].join() g_var.logger.info("等待所有线程结束") elif wait_signal == 1: # 不等待其他线程结束,直接停止 g_var.logger.info("不等待其他线程结束,直接停止") pass # 程序结束前,将全局变量g_var.USER_ID写入config.json current_id = {"currentId": g_var.USER_ID} with open(g_var.ENV_DIR + '/genius_com/config.json', 'w') as f: json.dump(current_id, f) MysqlHandler().dbclose() # 关闭数据库 g_var.logger.info("主线程结束!共计完成" + str(g_var.SUCCESS_COUNT) + "个\n\n\n\n\n")
def __register_one(self, Session, present_website: str, email_and_passwd): """ 注册一个账户,需要实现注册、激活、并将注册数据存入数据库的功能 Args: Session:Session对象 present_website:当前网站名,用于数据库表名 email_and_passwd:邮箱账户和密码,email_and_passwd[0]是邮箱,[1]是密码 Returns: 注册成功返回注册数据字典对象registerData,需要包含id, username, password, email, cookie(在访问激活链接时能取到,\ 取不到返回空) user_id这样获取:(示例) # 将注册的账户写入数据库(sql自己写,这边只是个示例) sql = "INSERT INTO "+present_website+"(username, password, mail, status, cookie) VALUES('" + \ username + "', '" + password + "', '" + email + "', '" + str(0) + cookie + "');" last_row_id = MysqlHandler().insert(sql) if last_row_id != -1: registerData["user_id"] = last_row_id return registerData else: g_var.logger.error("数据库插入用户注册数据失败") return 0 注册失败返回状态码 0:某些报错需要跳出while循环,更换邮箱 -1:连续代理错误或页面发生改变等取不到关键数据等,需要停止程序 -2:注册失败,可能是打码出错等原因,邮箱可以继续使用(邮箱资源成本较高,因此要确保注册成功后再更换邮箱),不跳出循环 """ FullName = generate_random_string(15, 20) password = generate_random_string(10, 15) # googlekey = "6LevIjoUAAAAAEJnsStYfxxf5CEQgST01NxAwH8v" # pageurl = "https://www.scoop.it/subscribe?&token=&sn=&showForm=true" # recaptcha_value = google_captcha(Session, googlekey, pageurl) recaptcha_value = input("请输入验证码:") registerData = { 'jsDetectedTimeZone': 'Asia/Shanghai', 'pc': '', 'displayName': FullName, 'shortName': FullName, 'email': email_and_passwd[0], 'password': password, 'avatar': '', 'upload-image-original-url': '', 'job': 'My personal brand or blog', 'g-recaptcha-response': recaptcha_value, 'subscribe': '' } headers = { 'content-type': 'application/x-www-form-urlencoded', 'cookie': 'messagesUtk=beaaffd5ab3d4039b7e27015e4203fda;', 'origin': 'https://www.scoop.it', 'referer': 'https://www.scoop.it/subscribe?&token=&sn=&showForm=true', 'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/81.0.4044.129 Safari/537.36', } url = "https://www.scoop.it/subscribe?&token=&sn=&showForm=true" result = Session.post(url, headers=headers, data=registerData, timeout=g_var.TIMEOUT) if result == -1: g_var.logger.error("提交注册信息超时") return -1 g_var.logger.info("提交注册..." + result.text) re_text = '(https://www.scoop.it/confirm\?.*?)" ' email_verify_obj = EmailVerify(email_and_passwd[0], email_and_passwd[1], re_text) verify_url = account_activation(Session, email_verify_obj) result = Session.get(url=verify_url, headers=headers, timeout=g_var.TIMEOUT) if result == -1: g_var.logger.error("访问激活链接超时") return -1 # 获取cookie # 也就是这边取不到cookie,还需要填写自己感兴趣的领域??? print(result.cookies.get_dict()) cookie = str(result.cookies.get_dict()) sql = "INSERT INTO " + present_website + "(username, password, mail, status, cookie) VALUES('" + FullName + \ "', '" + password + "', '" + email_and_passwd[0] + "', '" + str(0) + "', '" +cookie + "');" last_row_id = MysqlHandler().insert(sql) if last_row_id != -1: registerData = dict() registerData["user_id"] = last_row_id return registerData else: g_var.logger.error("数据库插入用户注册数据失败") return 0