class DBConfig(object): def __init__(self, ): self.config = ConfigParser() self.name = "config.ini" self.sql_path = os.path.join(ROOT_PATH, self.name) self.log = LogHandler("db") def add_db_config(self, dbtype, host, port, user, password, database, charset): """ 增加或修改数据库配置,配置文件位置config/config.ini :param dbtype: 数据库类型 :param host: 主机 :param port: 端口 :param user: 用户名 :param password: 密码 :param database: 数据库名称 :param charset: 字符集 :return: True 增加或修改成功 """ self.config.read(self.sql_path, encoding="utf-8") if dbtype in self.config: # TODO 设置数据库配置 self.config.set(dbtype, "host", host) self.config.set(dbtype, "port", port) self.config.set(dbtype, "user", user) self.config.set(dbtype, "password", password) self.config.set(dbtype, "database", database) self.config.set(dbtype, "charset", charset) with open(self.sql_path, "w", encoding="utf8") as f: self.config.write(f) self.log.info( "Amend the success , Modifying the data %s" % [dbtype, host, port, user, password, database, charset]) return True else: # TODO 修改数据库配置 self.config.add_section(dbtype) self.config.set(dbtype, "host", host) self.config.set(dbtype, "port", port) self.config.set(dbtype, "user", user) self.config.set(dbtype, "password", password) self.config.set(dbtype, "database", database) self.config.set(dbtype, "charset", charset) with open(self.sql_path, "w+", encoding="utf8") as f: self.config.write(f) self.log.info( "Amend the success , Modifying the data %s" % [dbtype, host, port, user, password, database, charset]) return True def get_db_config(self, dbtyep): """ 返回数据库相关配置 :param dbtyep: 数据库类型 :return: dict(数据库配置) None不存在 """ # TODO 获取配置 self.config.read(self.sql_path, encoding="utf-8") if dbtyep in self.config: options = self.config.items(dbtyep) option = {x: y for x, y in options} for k, v in option.items(): if k == "port": option[k] = int(v) self.log.info("success %s" % option) return option else: self.log.error("Parameter error %s" % dbtyep) return None def update_config(self, section, option, value): """ 根据传入参数修改相关配置 :param section: 块 :param option: 修改key :param value: 修改值 :return: True 修改成功 False 参数错误 """ # TODO 新增其他配置 self.config.read(self.sql_path, encoding="utf-8") if section in self.config.sections(): if option in self.config.options(section): self.config.set(section, option, value) # return '需要修改' self.log.info("Need to be modified") else: self.log.error("Parameter error %s" % option) return None else: self.log.error("Parameter error %s" % section) return None with open(self.sql_path, "w", encoding="utf8") as f: self.config.write(f) self.log.info("Amend the success") return True def add_config(self, section, option, value): """ 独立创建其他配置文件 :param section: 块 :param option: 修改key :param value: 修改值 :return: True 修改成功 """ self.config.read(self.sql_path, encoding="utf-8") if section not in self.config.sections(): self.config.add_section(section) self.config.set(section, option, value) with open(self.sql_path, "w+", encoding="utf8") as f: self.config.write(f) self.log.info("Amend the success") elif section in self.config.sections(): self.config.set(section, option, value) with open(self.sql_path, "w+", encoding="utf8") as f: self.config.write(f) self.log.info("Amend the success")
class NetEase(object): def __init__(self): """ 构造默认 header request session """ self.header = { "Accept": "*/*", "Accept-Encoding": "gzip,deflate,sdch", "Accept-Language": "zh-CN,zh;q=0.8,gl;q=0.6,zh-TW;q=0.4", "Connection": "keep-alive", "Content-Type": "application/x-www-form-urlencoded", "Host": "music.163.com", "Referer": "http://music.163.com", "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/78.0.3904.108 Safari/537.36" } self.session = requests.session() self.log = LogHandler('NeteaseApi') def _raw_request(self, method, url, data=None): """ 实际发起请求方法 :param method: POST | GET :param url: url :param data: 请求携带的数据 :return: response """ if method == "GET": response = self.session.get(url, params=data, headers=self.header, timeout=DEFAULT_TIMEOUT) elif method == "POST": response = self.session.post(url, data=data, headers=self.header, timeout=DEFAULT_TIMEOUT) return response def _get_form_data(self, encrypt_data): """ 获取加密后的 form data 参数 :param encrypt_data: 待加密的参数 :return: 加密后的参数 {"params":"", "encSecKey":""} """ key = netease.create_key(16) return { "params": netease.aes(netease.aes(encrypt_data, netease.NONCE), key), "encSecKey": netease.rsa(key, netease.PUBKEY, netease.MODULUS) } def request(self, method, path, data={}, default={"code": -1}): """ 统一请求方法 :param method: POST | GET :param path: 路径 :param data: 未加密的 data :param default: 默认的 response :return: response """ url = "{}{}".format(BASE_URL, path) response = default csrf_token = "" data.update({"csrf_token": csrf_token}) params = self._get_form_data(json.dumps(data).encode('utf-8')) try: self.log.debug( '[Netease api] url: {};\trequest data: {};\tparams: {}'. format(url, data, params)) response = self._raw_request(method, url, params) response = response.json() self.log.debug('[Netease api] url: {};\tresponse data: {}'.format( url, response)) except requests.exceptions.RequestException as e: self.log.error('[Netease api] request error: {}'.format(e)) except ValueError as e: self.log.error( "[Netease api] request error; Path: {}, response: {}".format( path, response.text[:200])) finally: return response def songs_url(self, song_id): """ 获取音乐的实际 url,外链 {ids: "[514235010]", level: "standard", encodeType: "aac", csrf_token: ""} :param song_id: 音乐 id :return: 带有外链的 json 串 """ path = "/weapi/song/enhance/player/url/v1?csrf_token=" params = { 'ids': '[' + str(song_id) + ']', 'level': 'standard', 'encodeType': 'aac', 'csrf_token': '' } return self.request(POST, path, params) def songs_lyric(self, song_id): """ 获取音乐歌词 {id: "186453", lv: -1, tv: -1, csrf_token: ""} :param song_id: :return: """ path = "/weapi/song/lyric?csrf_token=" params = {'id': str(song_id), 'lv': -1, 'tv': -1, 'csrf_token': ''} return self.request(POST, path, params) def songs_search(self, keyword, offset=0, limit=30): """ 搜索音乐 按照关键字搜索一般就用这个 {hlpretag: "<span class="s-fc7">", hlposttag: "</span>", s: "春夏秋冬 张国荣", type: "1", offset: "0", …} :return: """ path = '/weapi/cloudsearch/get/web?csrf_token=' params = { 'csrf_token': '', 'hlposttag': '</span>', 'hlpretag': '<span class="s-fc7">', 'limit': str(limit), 'offset': str(offset), 's': str(keyword), 'total': 'true', 'type': '1' } return self.request(POST, path, params) def songs_search_(self, song): """ 搜索音乐,搜索框联动接口,不常用 {s: "春夏秋冬", limit: "8", csrf_token: ""} :return: """ path = "/weapi/search/suggest/web?csrf_token=" params = {'s': str(song), 'limit': 8, 'csrf_token': ''} return self.request(POST, path, params) def songs_detail(self, song_id): """ 获取歌曲详情 给定 song id {id: "186453", c: "[{"id":"186453"}]", csrf_token: ""} :param song_id: 必传参数,song id :return: Song """ path = "/weapi/v3/song/detail?csrf_token=" params = { 'id': str(song_id), 'c': "[{'id': " + str(song_id) + "}]", 'csrf_token': '' } return self.request(POST, path, params)
class SqliteClient(object): def __init__(self, dbtype='sqlit'): """ :param dbtype: 选择数据库类型 """ self.log = LogHandler("db") DBCONFIG = DBConfig().get_db_config(dbtype) ROOT_PATH = os.path.join(os.path.dirname(os.path.abspath(CURRENT_PATH)), DBCONFIG.get('path')) DB_NAME = DBCONFIG.get("dbname") DB_PATH = os.path.join(ROOT_PATH,DB_NAME) print(DB_PATH) self.conn = sqlite3.connect(DB_PATH) self.c = self.conn.cursor() def create_table_sqlite(self): """ 创建数据表 :return: false true """ try: sql = "create table if not exists ipdaili(ip_addr TEXT, ip_port TEXT, type TEXT,ip_proxy TEXT, Downloadtime TEXT)" # self.c.execute('''CREATE TABLE ipdaili # (ip_addr TEXT, ip_port TEXT, type TEXT,ip_proxy TEXT, Downloadtime TEXT )''') self.c.execute(sql) self.conn.commit() except Exception as e: self.log.error(e) return False else: self.log.info("create success") return True def insert_table_sqlite(self, ip_addr, ip_port, type,ip_proxy): """ 插入数据 :param ip_addr: ip地址 :param ip_port: 端口 :param type: 类型 :return:false true """ downloadtime = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S") try: self.c.execute("INSERT INTO ipdaili (ip_addr,ip_port,type,ip_proxy,Downloadtime) VALUES (?,?,?,?,?)", (ip_addr, ip_port, type,ip_proxy, downloadtime)) self.conn.commit() except Exception as e: self.log.error(e) return False else: self.log.info("insert success") return True def search_table_sqlite(self, sql="select * from ipdaili"): """ 查询数据数 :param sql:执行sql语句 :return:结果值 false """ try: res = self.c.execute(sql) self.conn.commit() except Exception as e: self.log.error(e) return False else: self.log.info("search success") return res.fetchall() def __del__(self): """ 关闭链接 :return: """ # class_name = self.__class__.__name__ self.conn.close()
class IpSpider(object): def __init__(self, urltype): """ :param urltype: 0-国内高匿代理IP;1-国内透明代理IP;2-国内HTTPS代理IP;3-国外高匿代理IP """ url_list = { 0: 'http://www.pcdaili.com/index.php?m=daili&a=free&type=1', 1: 'http://www.pcdaili.com/index.php?m=daili&a=free&type=2', 2: 'http://www.pcdaili.com/index.php?m=daili&a=free&type=3', 3: 'http://www.pcdaili.com/index.php?m=daili&a=free&type=4', } if urltype in [0, 1, 2, 3]: self.url = url_list.get(urltype) self.ua = UserAgent() self.sqlite = SqliteClient() self.sqlite.create_table_sqlite() self.log = LogHandler("db") def run_spider(self, page): """ 进行爬虫抓取 :param page:几页 :return:tuple """ iplist = [] for x in range(1, page + 1): headers = {'Host': 'www.pcdaili.com', "user-agent": self.ua.chrome} sp_url = self.url + "&page=%d" % x try: r = requests.get(sp_url, headers=headers) except Exception as e: self.log.error(e) finally: html = etree.HTML(r.text) res = html.xpath( '/html/body/div/div/div[2]/table/tbody/tr/td/text()') iptuple = self.group_list(res, 7) iplist.append(iptuple) time.sleep(1) self.log.info("spider html ok") return iplist def group_list(self, grouped, length): """ 分组 :param grouped:列表 :param length:分组长度 :return: [(),()] """ d = [ tuple(grouped[i:i + length]) for i in range(0, len(grouped), length) ] return d[:13] def ip_insert_sql(self, ip_list): """ ip代理插入数据库 :param ip_list: ip列表 :return: """ for y in range(len(ip_list)): # print(ip_list[y]) for x in ip_list[y]: ip_addr = x[0] ip_port = x[1] type = x[3] ip_proxy = type + "://" + ip_addr + ":" + ip_port is_ok_ip = self.validate_ip(type=type, ip_proxy=ip_proxy) if is_ok_ip: insert_res = self.sqlite.insert_table_sqlite( ip_addr=ip_addr, ip_port=ip_port, type=type, ip_proxy=ip_proxy) return True def validate_ip(self, type, ip_proxy): """ 测试ip是否能够代理访问https://weibo.com/ :param type:ip类型 :param ip_proxy:IP地址 :return:true false """ test_url = "https://weibo.com/" proxies = {type: ip_proxy} try: requests.get(test_url, proxies=proxies) except Exception as e: self.log.error(e) return False else: self.log.info(ip_proxy + " is ok !test url is " + test_url) return True
class MysqlCline(object): def __init__(self, dbtype): """ 创建数据库 :param dbtype: 数据库类型 """ self.log = LogHandler("db") dbconfig = DBConfig().get_db_config(dbtype) # self.connection = pymysql.connect( # **dbconfig, # ) if dbtype == "mysql": # print("ok") self.connection = pymysql.connect(**dbconfig, ) def create_table_mysql(self): """ 创建表 :return: false true """ sql = """CREATE TABLE IF NOT EXISTS ipdaili ( ip_addr varchar(30) DEFAULT NULL, ip_port varchar(11) DEFAULT NULL, type varchar(10) DEFAULT NULL, Downloadtime varchar(30) DEFAULT NULL )""" try: cursor = self.connection.cursor() cursor.execute(sql) self.connection.commit() self.log.info("create success") return True except Exception as e: self.log.error(e) return False finally: self.log.info("create success") return True pass def insert_table_mysql(self, ip_addr, ip_port, type): """ 插入数据 :param ip_addr: ip地址 :param ip_port: 端口 :param type: 类型 :return:false true """ # 插入数据 # TODO 不能用with try: cursor = self.connection.cursor() downloadtime = datetime.datetime.now().strftime( "%Y-%m-%d %H:%M:%S") sql = "INSERT INTO ipdaili VALUES ('" + ip_addr + "','" + ip_port + "','" + type + "','" + downloadtime + "');" cursor.execute(sql) self.connection.commit() self.connection.commit() self.log.info("inserter sql success") return True except Exception as e: self.log.error(e) return False finally: self.log.info("insert success") def search_table_mysql(self, sql="select * from ipdaili"): """ 查询数据库 :param sql:查询语句 :return:结果值 false """ try: cursor = self.connection.cursor() cursor.execute(sql) res = cursor.fetchall() except Exception as e: self.log.error(e) return False finally: self.log.info("search success") return res def __del__(self): """ 关闭数据库链接 :return: """ self.connection.close()