def open_spider(self, spider): '''开启爬虫执行一次''' db, cs = connect_mysql(MYSQL_HOST, MYSQL_PORT, MYSQL_USER, MYSQL_PASSWD, MYSQL_DATABASE) check_file.check_log('szse_notice.log') check_file.check_log('szse_cp_notice.log') spider.db = db spider.cs = cs
def open_spider(self, spider): db, cs = connect_mysql(MYSQL_HOST, MYSQL_PORT, MYSQL_USER, MYSQL_PASSWD, MYSQL_DATABASE) spider.db = db spider.cs = cs pro_db, pro_cs = connect_mysql(MYSQL_HOST, MYSQL_PORT, MYSQL_USER, MYSQL_PASSWD, PROXY_DB) spider.pro_db = pro_db spider.pro_cs = pro_cs ip_tuple = get_proxy(spider.pro_cs) spider.proxy_list = ip_tuple shares = get_shares(spider.cs) shares = [i for i in shares if i] if shares else None # finished_code = finished_shares(spider.cs) # shares = set(shares) - set(finished_code) spider.shares_list = shares if shares else None
def open_spider(self, spider): '''开启爬虫执行一次''' if spider.name == 'shares_concept': pro_db, pro_cs = connect_mysql(MYSQL_HOST, MYSQL_PORT, MYSQL_USER, MYSQL_PASSWD, PROXY_DB) spider.pro_db = pro_db spider.pro_cs = pro_cs ip_tuple = get_proxy(spider.pro_cs) spider.proxy_list = ip_tuple # print(spider.ip_tuple) db, cs = connect_mysql(MYSQL_HOST, MYSQL_PORT, MYSQL_USER, MYSQL_PASSWD, MYSQL_DATABASE) check_file.check_log('shares_concept.log') spider.db = db spider.cs = cs shares = get_shares(spider.cs) # finished_code = finished_shares(spider.cs) # shares = set(shares) - set(finished_code) spider.shares_list = shares if shares else None
def open_spider(self, spider): '''开启爬虫执行一次''' if spider.name in ['sse_cp_notice', 'sse_shares']: pro_db, pro_cs = connect_mysql(MYSQL_HOST, MYSQL_PORT, MYSQL_USER, MYSQL_PASSWD, PROXY_DB) spider.pro_db = pro_db spider.pro_cs = pro_cs ip_tuple = get_proxy(spider.pro_cs) spider.ip_tuple = ip_tuple # print(spider.ip_tuple) db, cs = connect_mysql(MYSQL_HOST, MYSQL_PORT, MYSQL_USER, MYSQL_PASSWD, MYSQL_DATABASE) # check_file.check_log('sse_notice.log') # check_file.check_log('sse_cp_notice.log') client = MongoClient(host='111.193.233.196', port=27017) coll = client.shangjiaosuo.sse data_cs = coll.find() coll_backup = client.shangjiaosuo.sse_backup spider.data_cs = data_cs spider.coll = coll spider.coll_backup = coll_backup spider.db = db spider.cs = cs
def alexa_query(): query = request.args.get('query', 'show tables;') connection = connect_mysql( host='cs527mysql2.chmrmo5grph7.us-east-1.rds.amazonaws.com', user='******', password='******', db='test') col_name, content, query_time = connection.run_query(query) # result = {'col_name': col_name, 'result': content, 'query_time': query_time} temp_json['col_name'] = col_name temp_json['result'] = content temp_json['query_time'] = query_time connection.disconnect() return "Success"
PRODUCT_ORIGIN_weight = config["weight"]["PRODUCT_ORIGIN_weight"] BRAND_weight = config["weight"]["BRAND_weight"] # 创建权重字典 weight_dict = { "PTY_NUM_1": PTY_NUM_1_weight, "PTY_NUM_2": PTY_NUM_2_weight, "PTY_NUM_3": PTY_NUM_3_weight, "PRODUCT_ORIGIN_NUM_ID": PRODUCT_ORIGIN_weight, "BRAND_ID": BRAND_weight } query = """select distinct ITEM_NUM_ID, PTY_NUM_1, PTY_NUM_2, PTY_NUM_3, PRODUCT_ORIGIN_NUM_ID, BRAND_ID from goods_data where ITEM_NUM_ID is not null""" # 读取商品数据 conn = connect_mysql(mysql_host, mysql_port, mysql_db, mysql_user, mysql_password) cursor = conn.cursor() cursor.execute(query) columns = [ 'ITEM_NUM_ID', 'PTY_NUM_1', 'PTY_NUM_2', 'PTY_NUM_3', 'PRODUCT_ORIGIN_NUM_ID', 'BRAND_ID' ] # 获得spu和spu的特征 item_sentence_dict = {} for line in cursor.fetchall(): item = line[0] sentence = [ column + "|" + str(value) for column, value in zip(columns[1:], line[1:]) if value != None ]