Exemplo n.º 1
0
from databaseUtil import DatabaseUtil
from sessionUtil import SessionUtil
from logUtil import LogUtil
from dictUtil import DictUtil
import traceback
import json
def handleData(returnStr):
    jsonData=json.loads(returnStr)
    dateList=jsonData.get('date')
    dataList=jsonData.get('data1')
    return dateList,dataList
def storeData(dateStr,dataStr,conn,cur,wdzjPlatId):
    sql='insert into platIncomeRate (date,incomeRate,wdzjPlatId) values ("'+dateStr+'","'+dataStr+'","'+wdzjPlatId+'")'
    cur.execute(sql)
    conn.commit()
conn,cur=DatabaseUtil().getConn()
session=SessionUtil()
logUtil=LogUtil("platIncomeRate.log")
cur.execute('select wdzjPlatId from platData where month="2017-06"')
data=cur.fetchall()
print(data)
mylist=list()
print(data)
for i in range(0,len(data)):
    platId=str(data[i].get('wdzjPlatId'))
    
    mylist.append(platId)

print mylist  
for i in mylist:
    url='http://shuju.wdzj.com/plat-info-target.html'
Exemplo n.º 2
0
    planList=jsonData.get('data').get('plans')
    return planList
def storeData(jsonOne,conn,cur,logUtil,loanId):
    amount=jsonOne.get('amount')
    earnInterest=jsonOne.get('earnInterest')
    expectedYearRate=jsonOne.get('expectedYearRate')
    fundsuserRate=jsonOne.get('fundsUseRate')
    planId=jsonOne.get('id')
    name=jsonOne.get('name')
    status=jsonOne.get('status')
    subpointCountActual=jsonOne.get('subpointCountActual')
    sql='insert into RRDXinPlanList (amount,earnInterest,expectedYearRate,fundsUseRate,planId,name,status,subpointCountActual) values ("'+amount+'","'+earnInterest+'","'+expectedYearRate+'","'+fundsuserRate+'","'+planId+'","'+name+'","'+status+'","'+subpointCountActual+'")'
    print(sql)
    logUtil.warning(loanId)
    cur.execute(sql)
    conn.commit()
session=SessionUtil()
conn,cur=DatabaseUtil().getConn()
logUtil=LogUtil("uplanList.log")
for i in range(1,73):
    url='https://www.renrendai.com/autoinvestplan/listPlan!listPlanJson.action?pageIndex='+str(i)+'&_='+str(int(time.time()))
    try:
        planList=handleData(session.getReq(url))
        for j in range(len(planList)):
            dictObject=DictUtil(planList[j])
            storeData(dictObject,conn,cur,logUtil,str(i))
    except Exception,e:
        logUtil.warning(traceback.print_exc())
cur.close()
conn.close()
Exemplo n.º 3
0
	if choice == "A" or choice == "a":
		objBluetooth = BluetoothID()

		print("Performing Inquiry")

		#Looking for nearby devices
		nearby_devices = discover_devices(lookup_names = True)

		print ("found %d devices" %len(nearby_devices))

		#Print out all the nearby devices
		for name in nearby_devices:
			print( "%s -%s" % (name))

		#Fetching the mac address registered in the database
		obj = DatabaseUtil()
		phone = obj.getDeviceID("Altis")

		#split tuple so we get just the mac addr
		(addr, ownder) = name

		objBluetooth.Verify(addr, phone)
	elif choice == "B" or choice == "b":
		"""
		Main entry point of the program
		"""
		obj = QRCodeScanner()

		arguments = obj.argumentParser()

		# initialize the camera
Exemplo n.º 4
0
 def __init__(self):
     # 初始化一个数据库对象
     self.dbUtil = DatabaseUtil()
Exemplo n.º 5
0
class NewsCrawler():
    def __init__(self):
        # 初始化一个数据库对象
        self.dbUtil = DatabaseUtil()

    # 根据新闻的 url,获取新闻的具体内容
    def getNewsContent(self, url: str):
        content = []
        req = urllib.request.Request(url=url)
        response = urllib.request.urlopen(req, timeout=5)
        try:
            response_read = response.read().decode('utf-8')
        except:
            response_read = response.read().decode('gbk')
        soup = BeautifulSoup(response_read, 'lxml')
        article = soup.select_one('div.article')
        if not article:
            article = soup.select_one('div#artibody')
        paras = article.select('p')
        if paras:
            for para in paras:
                line = para.text.strip()
                if len(line) >= 1:
                    content.append(line)
        return content

    # Python3 支持的指定参数形式
    def parseNewsDetail(self, r: dict):
        entity = {}
        # 新闻标题
        entity['title'] = r['title']
        print(entity['title'])
        # 新闻的 url,进一步获取新闻的具体内容,从而生成关键字、摘要
        entity['url'] = r['url']
        print(entity['url'])
        # 新闻的时间
        timeInt = int(r['time'])
        entity['time'] = timeInt
        # 新闻的具体文本内容
        content = self.getNewsContent(entity['url'])
        contentConcat = ''
        if len(content) >= 1:
            contentConcat = '\n'.join(content)
        entity['content'] = contentConcat
        # 使用 SnowNLP 提供的生成关键词、提取摘要的接口
        # 具体算法是 TextRank,毕业论文需要具体介绍这种方法
        if contentConcat:
            s = SnowNLP(contentConcat)
            # 生成关键词,因为有的时候关键词是一个单字(常用字),所以只保留长度大于等于 2 的
            keywords10 = s.keywords(10)
            keywords = [
                x for x in keywords10 if (len(x) > 1 and self.validKeywords(x))
            ]
            keywordsSQL = '|'.join(keywords)
            entity['keywords'] = keywordsSQL
            # 生成一个三句话的摘要
            summary = s.summary(3)
            summarySQL = '|'.join(summary)
            entity['summary'] = summarySQL
            # print(keywordsSQL)
            # print(summarySQL)
            # print('='*80)
            # 写入数据库中
            self.dbUtil.insert(entity)

    def getSinaRollNews(self, startPage, endPage):
        for i in range(startPage, endPage):
            print("Page {}".format(i))
            params = globalParams.copy()
            params['page'] = str(i)
            allHtml = requests.get(newsUrl, params=params, headers=headers)
            pageHtml = allHtml.content.decode('gbk')
            pageHtml = pageHtml[pageHtml.index('{'):-1]
            # 解析获取到的 JSON 格式的新闻列表
            data_str = eval(pageHtml, Dummy())
            data_str = json.dumps(data_str)
            data_str = json.loads(data_str)
            data_str = data_str['list']
            for r in data_str:
                # 对每一个具体的新闻都进行解析,并且写入数据库
                self.parseNewsDetail(r)
                # 稍微暂停一下,避免爬取频率太高
                time.sleep(random.random() * 6.0)
            time.sleep(10)

    # 有时候得到的关键词是一些标点符号、纯数字等,所以过滤掉
    def validKeywords(self, x: str):
        if ('—' in x) or (':') in x or ('"' in x) or \
            (',' in x) or ('。' in x) or (x.isdigit()):
            return False
        else:
            return True
Exemplo n.º 6
0
'''Script to be run by cronjob. Goes through each item in database and updates
price'''
from time import gmtime, strftime
from webscraper import *
from databaseUtil import DatabaseUtil

if __name__ == "__main__":
    pathname = "/home/tma/host2/Users/tma/cs145new/mysite/mysite/script_log.txt"
    dbpath = "/home/tma/host2/Users/tma/cs145new/mysite/sqlite3db/mydb.db"
    localtime = strftime("%Y-%m-%d %H:%M:%S", gmtime())

    try:
        log = open(pathname, "a")
        myDB = DatabaseUtil(dbpath)
        
        # Go through every url in database and check for any changes
        urls = myDB.selectAllURLs()
        for url in urls:
            productName, productId, productPrice, storeName = parseUrl(url[0])
            myDB.update(productId, storeName, productPrice)
    except IOError:
        print "Error: Missing log file...creating new log file"
        log = open(pathname, "w")
    except Exception, e:
        log.write("ERROR at "+ localtime + "\n")
        log.write(str(e) + "\n")
    else:
        log.write("Successful cronjob at " + localtime + "\n")
    finally:
        log.close()