Пример #1
0
 def __init__(self):
     self.stock_single_url = 'http://hq.sinajs.cn/list={0}'
     self.mysql_conn = MysqlConn()
Пример #2
0
        collecion = AwsCollection(itemId,title,itemUrl,int(commentCount),score,count,indexSeq,itemType,keyword)

        awsCollection.append(collecion)

        indexSeq = indexSeq +1



    nextUrl = soup.find("a",{"title":"Next Page"}).attrs["href"]

    pageUrl = "https://www.amazon.com"+nextUrl

    nextPage = session.get(pageUrl, headers=headers)
    html = nextPage.content
    count = count +1


#ngrams = OrderedDict(sorted(output.items(), key=lambda t: t[1], reverse=True))




MysqlConn.insertCollection(awsCollection)






Пример #3
0
#!/usr/bin/python
# -*- coding: UTF-8 -*-


from AwsCollection import AwsCollection
from AwsCollection import StatisItem
import MysqlConn
import Ngrams
from collections import OrderedDict
import collections

keyword = "hair wax"

items = MysqlConn.queryCollection(keyword)
output = []
statisItems = []

wordNum = 2

for item in items:
    tmpOutput = Ngrams.getNgrams(item.title, wordNum)
    output.extend(tmpOutput)


resultOutput = collections.Counter(output)

for item in resultOutput.items():
    print "key:"+item[0] + ";value:"+ str(item[1])
    statis = StatisItem(item[0],item[1],wordNum,keyword)
    statisItems.append(statis)
Пример #4
0
import MysqlConn
import ipanalysis


def get_ip():

    try:
        sql = "select ip from iptables where staticdate>= %s"
        result = mysqlconn.query_all(
            sql, time.strftime("%Y-%m-%d", time.localtime(int(time.time()))))
    except Exception as e:
        print(e)
    finally:
        return result


if __name__ == '__main__':
    mysqlconn = MysqlConn.MySqlConn()
    iplist = list(get_ip())
    ipinfoList = []
    for ip in iplist:
        ipinfo = ipanalysis.ip_analysis(ip['ip'])
        if ipinfo == 0:
            continue
        else:
            ipinfoList.append(ipinfo)
    # print ipinfoList
    insertsql = 'insert into ipinfo VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)'
    mysqlconn.insert_many(insertsql, ipinfoList)
    mysqlconn.dispose()
Пример #5
0
 def __init__(self):
     self.stock_list_url = 'http://vip.stock.finance.sina.com.cn/quotes_service/api/json_v2.php/Market_Center.getHQNodeData?page={0}&num={1}&sort=changepercent&asc=0&node={2}&symbol=&_s_r_a=page'
     self.size = 80
     self.mysql_conn = MysqlConn()
     self.geode_client = GeodeClient()
Пример #6
0
import os
import MysqlConn
import ProductSku

s = os.sep
root = unicode("d:" + s + "桌面" + s + "爬虫" + s, 'utf-8')
# 获取sku目录列表
fileList = ProductSku.get_file(os.path.join(root, 'bevol-detial' + s))

# print os.path.join(root,'bevol-detial'+s)
#
# print fileList

# 创建数据库连接实例
myconn = MysqlConn.MySqlConn()
# 获取entity对应字段信息,解析json字符串
# resultList = get_entity(fileList)
# 插入entity表数据,批量执行
# sql = 'insert into product.sku_entity VALUES (%s, %s, %s, %s, %s,%s, %s, ' \
#       '%s, %s, %s,%s, %s, %s, %s, %s,%s, %s, %s, %s, %s, %s, %s, %s)'
# print myconn.insert_many(sql,resultList)

# # 获取goods对应字段信息
# resultList = get_goods(fileList)
# # 插入goods表数据,批量执行
# sql = "insert into product.sku_goods VALUES (%s, %s, %s, %s, %s,%s,%s, %s, %s, %s, " \
#       "%s,%s,%s, %s, %s, %s, %s,%s,%s, %s, %s, %s,%s)"
# print myconn.insert_many(sql,resultList)

# # 获取doyen相关信息
Пример #7
0
import requests
import lxml
import re
from bs4 import BeautifulSoup
import Ngrams
from collections import OrderedDict
import collections
from AwsCollection import AwsCollection
from AwsCollection import DescItem
import MysqlConn

keyword = "hair wax"
items = MysqlConn.queryCollection(keyword)

session = requests.session()
headers = {"User-Agent":"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_5)AppleWebKit 537.36 (KHTML, like Gecko) Chrome",
           "Accept":"text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8"}

descItems = []

for item in items:
    url = item.url
    r = session.get(url, headers=headers)
    html = r.content
    soup = BeautifulSoup(html, 'lxml')
    listItem = soup.findAll("li", {"class": "showHiddenFeatureBullets"})

    descText = ""
    for desc in listItem:
        descContent = desc.find("span").get_text()
        descText = descText + descContent
Пример #8
0
            result = json.dumps(data)
            fp = open(filename, 'a+')
            fp.write(result + '\n')
            driver.close()

    except Exception as e:
        data = {
            'email': email,
            'date': date,
            'code': code,
            'callable_url': path_url,
            'error_info': repr(e)
        }
        filename = './log/' + date + 'error_log.txt'
        result = json.dumps(data)
        fp = open(filename, 'a+')
        fp.write(result + '\n')
        driver.close()
        # exit('参数错误' + repr(e))


a = MysqlConn.Connection()
code_sql = "select code,url from code_url"
a.execute(code_sql)
data = a.fetchall()
for value in data:
    code = value[0]
    url = value[1]
    run(code, url)
a.close()