def __init__(self): self.stock_single_url = 'http://hq.sinajs.cn/list={0}' self.mysql_conn = MysqlConn()
collecion = AwsCollection(itemId,title,itemUrl,int(commentCount),score,count,indexSeq,itemType,keyword) awsCollection.append(collecion) indexSeq = indexSeq +1 nextUrl = soup.find("a",{"title":"Next Page"}).attrs["href"] pageUrl = "https://www.amazon.com"+nextUrl nextPage = session.get(pageUrl, headers=headers) html = nextPage.content count = count +1 #ngrams = OrderedDict(sorted(output.items(), key=lambda t: t[1], reverse=True)) MysqlConn.insertCollection(awsCollection)
#!/usr/bin/python # -*- coding: UTF-8 -*- from AwsCollection import AwsCollection from AwsCollection import StatisItem import MysqlConn import Ngrams from collections import OrderedDict import collections keyword = "hair wax" items = MysqlConn.queryCollection(keyword) output = [] statisItems = [] wordNum = 2 for item in items: tmpOutput = Ngrams.getNgrams(item.title, wordNum) output.extend(tmpOutput) resultOutput = collections.Counter(output) for item in resultOutput.items(): print "key:"+item[0] + ";value:"+ str(item[1]) statis = StatisItem(item[0],item[1],wordNum,keyword) statisItems.append(statis)
import MysqlConn import ipanalysis def get_ip(): try: sql = "select ip from iptables where staticdate>= %s" result = mysqlconn.query_all( sql, time.strftime("%Y-%m-%d", time.localtime(int(time.time())))) except Exception as e: print(e) finally: return result if __name__ == '__main__': mysqlconn = MysqlConn.MySqlConn() iplist = list(get_ip()) ipinfoList = [] for ip in iplist: ipinfo = ipanalysis.ip_analysis(ip['ip']) if ipinfo == 0: continue else: ipinfoList.append(ipinfo) # print ipinfoList insertsql = 'insert into ipinfo VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)' mysqlconn.insert_many(insertsql, ipinfoList) mysqlconn.dispose()
def __init__(self): self.stock_list_url = 'http://vip.stock.finance.sina.com.cn/quotes_service/api/json_v2.php/Market_Center.getHQNodeData?page={0}&num={1}&sort=changepercent&asc=0&node={2}&symbol=&_s_r_a=page' self.size = 80 self.mysql_conn = MysqlConn() self.geode_client = GeodeClient()
import os import MysqlConn import ProductSku s = os.sep root = unicode("d:" + s + "桌面" + s + "爬虫" + s, 'utf-8') # 获取sku目录列表 fileList = ProductSku.get_file(os.path.join(root, 'bevol-detial' + s)) # print os.path.join(root,'bevol-detial'+s) # # print fileList # 创建数据库连接实例 myconn = MysqlConn.MySqlConn() # 获取entity对应字段信息,解析json字符串 # resultList = get_entity(fileList) # 插入entity表数据,批量执行 # sql = 'insert into product.sku_entity VALUES (%s, %s, %s, %s, %s,%s, %s, ' \ # '%s, %s, %s,%s, %s, %s, %s, %s,%s, %s, %s, %s, %s, %s, %s, %s)' # print myconn.insert_many(sql,resultList) # # 获取goods对应字段信息 # resultList = get_goods(fileList) # # 插入goods表数据,批量执行 # sql = "insert into product.sku_goods VALUES (%s, %s, %s, %s, %s,%s,%s, %s, %s, %s, " \ # "%s,%s,%s, %s, %s, %s, %s,%s,%s, %s, %s, %s,%s)" # print myconn.insert_many(sql,resultList) # # 获取doyen相关信息
import requests import lxml import re from bs4 import BeautifulSoup import Ngrams from collections import OrderedDict import collections from AwsCollection import AwsCollection from AwsCollection import DescItem import MysqlConn keyword = "hair wax" items = MysqlConn.queryCollection(keyword) session = requests.session() headers = {"User-Agent":"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_5)AppleWebKit 537.36 (KHTML, like Gecko) Chrome", "Accept":"text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8"} descItems = [] for item in items: url = item.url r = session.get(url, headers=headers) html = r.content soup = BeautifulSoup(html, 'lxml') listItem = soup.findAll("li", {"class": "showHiddenFeatureBullets"}) descText = "" for desc in listItem: descContent = desc.find("span").get_text() descText = descText + descContent
result = json.dumps(data) fp = open(filename, 'a+') fp.write(result + '\n') driver.close() except Exception as e: data = { 'email': email, 'date': date, 'code': code, 'callable_url': path_url, 'error_info': repr(e) } filename = './log/' + date + 'error_log.txt' result = json.dumps(data) fp = open(filename, 'a+') fp.write(result + '\n') driver.close() # exit('参数错误' + repr(e)) a = MysqlConn.Connection() code_sql = "select code,url from code_url" a.execute(code_sql) data = a.fetchall() for value in data: code = value[0] url = value[1] run(code, url) a.close()