def getJifen(self, limit): mysql = Mysql() sqlAll = ConfigStart.SELECTFROMLEAGUEYEARINFOLIMIT resultSelect = mysql.getAll(sqlAll, limit) l = [] i = 0 if resultSelect == False: return sqlInsert = ConfigStart.UPDATELEAGUEYEARINFO_TOP for resultChild in resultSelect: webfile = urllib.urlopen(resultChild[LeagueYearInfo.p_league_url]) webcontext = webfile.read() webfile.close() webContent = unicode(webcontext, ConfigStart.GBK) soup = BeautifulSoup(webContent, ConfigStart.PARSEMETHOD) jifenUrl = soup.find_all(href=re.compile(ConfigStart.COMPILEJIFEN)) for getJifen in jifenUrl: if (getJifen.string == ConfigStart.STRINGJIFEN): print getJifen[ConfigStart.HREF] l.append(resultChild[LeagueYearInfo.p_id]) l.append(ConfigStart.STARTURL + getJifen[ConfigStart.HREF]) if i == ConfigStart.FALSE: sqlInsert += "(%s,%s)" i = 1 else: sqlInsert += ",(%s,%s)" pass sqlInsert += ConfigStart.UPDATELEAGUEYEARINFO_BOTTOM print sqlInsert result = mysql.update(sqlInsert, l) print result pass mysql.dispose()
def getMatchUrl(self, limit): mysql = Mysql() sqlAll = ConfigStart.SELECTFROMLEAGUEYEARINFOLIMIT resultSelect = mysql.getAll(sqlAll, limit) if resultSelect == False: return for resultChild in resultSelect: webfile = urllib.urlopen(resultChild[LeagueYearInfo.p_jifen_url]) webcontext = webfile.read() webfile.close() webContent = unicode(webcontext, ConfigStart.GBK) soup = BeautifulSoup(webContent, ConfigStart.PARSEMETHOD) getUrls = soup.find_all(class_="ltab_btn") sqlInsert = ConfigStart.INSERTINTOMATCHURL_TOP l = [] i = 0 print resultChild['p_jifen_url'] for getUrl in getUrls: if getUrl.string.find("赛制") >= 0: # resultOne=mysql.getOne("select * from institution where p_matchid=%s",resultChild['p_leagueid']) # if resultOne == False: # t=[] # t.append(resultChild['p_leagueid']) # mysql.insertOne("INSERT INTO institution(p_matchid,p_institution) VALUES (%s,%s) ",t) # pass break if i == 0: sqlInsert += "(%s,%s,%s)" i = 1 pass else: sqlInsert += ",(%s,%s,%s)" pass l.append(resultChild['p_id']) l.append(ConfigStart.STARTURL + getUrl['href']) l.append(getUrl.string) pass if l.__len__() == 0: continue resultInsert = mysql.update(sqlInsert, l) print resultInsert pass pass
def getMatchInfo(self, limit): mysql = Mysql() sqlAll = "select * from matchurl WHERE p_use=0 limit %s,10" resultSelect = mysql.getAll(sqlAll, limit) if resultSelect == False: return for resultChild in resultSelect: webfile = urllib.urlopen(resultChild['p_url']) webcontext = webfile.read() webfile.close() webContent = unicode(webcontext, 'gbk') soup = BeautifulSoup(webContent, ConfigStart.PARSEMETHOD) listInfo = soup.find_all(id='div_group_list') rounds = [] #当前第几回合或第几组 stid = resultChild['p_url'].split("jifen-")[1].split("/")[0] c = 'score' a = 'getmatch' if (listInfo.__len__() > 0): for listChild in listInfo[0].children: if (type(listChild) == bs4.element.Tag): if (listChild['data-group'] != 'all'): rounds.append(listChild['data-group']) pass pass pass pass listInfo = soup.find_all(id='match_group') if (listInfo.__len__() > 0): for listChild in listInfo[0].children: if (type(listChild) == bs4.element.Tag): if (listChild.a['data-group'] != 'all'): rounds.append(listChild.a['data-group']) pass pass pass pass #lmb3 listInfo = soup.find_all(class_='lmb3') asc = 0 for listC in listInfo: asc += 1 rounds.append(asc) pass pass urlInfo = "http://liansai.500.com/index.php?" if (rounds.__len__() == 0): insertContext = [] sqlInsert = "INSERT INTO `matchinfo` (`p_leagueid`, `fid`, `ghalfscore`, `gid`, `gname`, `gscore`, `gstanding`, `gsxname`, `handline`, `hhalfscore`, `hid`, `hname`, `hscore`, `hstanding`, `hsxname`, `round`, `status`, `stime`) VALUES " urlInfo += "c=" + c urlInfo += "&a=" + a urlInfo += "&stid=" + stid jsonContext = urllib.urlopen(urlInfo) jsonData = jsonContext.read() jsonContext.close() jsonData = unicode(jsonData, 'gbk') jsonData = json.loads(jsonData) index = 0 for jsonDataChild in jsonData: print jsonDataChild if index == 0: sqlInsert += "(%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)" index = 1 else: sqlInsert += ",(%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)" pass insertContext.append(resultChild['pid']) insertContext.append(jsonDataChild['fid']) insertContext.append(jsonDataChild['ghalfscore']) insertContext.append(jsonDataChild['gid']) insertContext.append(jsonDataChild['gname']) insertContext.append(jsonDataChild['gscore']) insertContext.append(jsonDataChild['gstanding']) insertContext.append(jsonDataChild['gsxname']) insertContext.append(jsonDataChild['handline']) insertContext.append(jsonDataChild['hhalfscore']) insertContext.append(jsonDataChild['hid']) insertContext.append(jsonDataChild['hname']) insertContext.append(jsonDataChild['hscore']) insertContext.append(jsonDataChild['hstanding']) insertContext.append(jsonDataChild['hsxname']) insertContext.append(jsonDataChild['round']) insertContext.append(jsonDataChild['status']) insertContext.append(jsonDataChild['stime']) pass if index == 0: continue resInfo = mysql.update(sqlInsert, insertContext) print resInfo else: for roundChild in rounds: insertContext = [] sqlInsert = "INSERT INTO `matchinfo` (`p_leagueid`, `fid`, `ghalfscore`, `gid`, `gname`, `gscore`, `gstanding`, `gsxname`, `handline`, `hhalfscore`, `hid`, `hname`, `hscore`, `hstanding`, `hsxname`, `round`, `status`, `stime`) VALUES " urlInfo += "c=" + c urlInfo += "&a=" + a urlInfo += "&stid=" + stid urlInfo += "&round=" + str(roundChild) jsonContext = urllib.urlopen(urlInfo) jsonData = jsonContext.read() jsonContext.close() jsonData = unicode(jsonData, 'gbk') jsonData = json.loads(jsonData) index = 0 for jsonDataChild in jsonData: print jsonDataChild if index == 0: sqlInsert += "(%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)" index = 1 else: sqlInsert += ",(%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)" pass insertContext.append(resultChild['pid']) insertContext.append(jsonDataChild['fid']) insertContext.append(jsonDataChild['ghalfscore']) insertContext.append(jsonDataChild['gid']) insertContext.append(jsonDataChild['gname']) insertContext.append(jsonDataChild['gscore']) insertContext.append(jsonDataChild['gstanding']) insertContext.append(jsonDataChild['gsxname']) insertContext.append(jsonDataChild['handline']) insertContext.append(jsonDataChild['hhalfscore']) insertContext.append(jsonDataChild['hid']) insertContext.append(jsonDataChild['hname']) insertContext.append(jsonDataChild['hscore']) insertContext.append(jsonDataChild['hstanding']) insertContext.append(jsonDataChild['hsxname']) insertContext.append(jsonDataChild['round']) insertContext.append(jsonDataChild['status']) insertContext.append(jsonDataChild['stime']) pass if index == 0: continue resInfo = mysql.update(sqlInsert, insertContext) print resInfo pass pass #INSERT INTO `matchinfo` (`p_leagueid`, `fid`, `ghalfscore`, `gid`, `gname`, `gscore`, `gstanding`, `gsxname`, `handline`, `hhalfscore`, `hid`, `hname`, `hscore`, `hstanding`, `hsxname`, `round`, `status`, `stime`) VALUES ('1', '1', '1', '1', '1', '1', '1', '1', '1', '1', '1', '1', '1', '1', '1', '1', '1', '2017-04-21 17:24:58') #设置p_use已经抓取标志 useSql = "update matchurl set p_use=1 where pid = %s" mysql.update(useSql, resultChild['pid']) print "matchurl更新成功" pass mysql.dispose()
def getDataFromMatchInfo(self, limit): mysql = Mysql() sqlAll = ConfigStart.SELECTFROMMATCHINFOLIMIT resultSelect = mysql.getAll(sqlAll, limit) if resultSelect == False: #print "没有要查找的数据" return #写日志 for resultChild in resultSelect: fid = resultChild['fid'] selectLogSql = "select count(*) as result from log where fid =%s " selRes = mysql.getOne(selectLogSql, fid) if selRes['result'] == 0: logSql = "insert into log(fid) values(%s)" mysql.update(logSql, fid) mysql.end() #print fid deleteSqls = [ "DELETE FROM yazhi WHERE matchinfoid=%s ", " DELETE FROM oupei WHERE matchinfoid=%s ", " DELETE FROM rangqiu WHERE matchinfoid=%s ", " DELETE FROM daxiao WHERE matchinfoid=%s ", " DELETE FROM befen WHERE matchinfoid=%s ", " DELETE FROM jinqiu WHERE matchinfoid=%s ", " DELETE FROM dsjinqiu WHERE matchinfoid=%s ", " DELETE FROM bqc WHERE matchinfoid=%s ", " DELETE FROM teamstatistics WHERE matchinfoid=%s ", " DELETE FROM playerstatistics WHERE matchinfoid=%s" ] for deleteSqlsChild in deleteSqls: mysql.delete(deleteSqlsChild, fid) mysql.end() #print "清理数据成功" i = 0 ''' =====================================欧赔开始================================================ ''' count_cursor = 0 while True: if count_cursor != i * 30: break url = ConfigStart.ANALYSISOUZHIURL % (fid, i * 30) #print "=============================================%s==================================="%url openUrls = OpenUrls() webcontext = openUrls.getWebContent(url, mysql, i, 1) # if webcontext.find('500.com')==-1 and webcontext!='': # #print "查看webcontext:%s"%webcontext # continue # pass # else: # if webcontext =='': # break soup = BeautifulSoup(webcontext, "html.parser") ouzhiData1 = soup.find_all(ttl='zy') if ouzhiData1.__len__() == 0: #print '获取完毕' break j = 0 for ouzhiDataChild in ouzhiData1: #print "------------------------%s------------------------" % (i * 30 + j+1) count_cursor = i * 30 + j + 1 #print ouzhiDataChild['id'] insertSql = "INSERT INTO `oupei` (`matchinfoid`, `companyid`, `op_s`, `op_p`, `op_f`, `ret`, `kl_s`, `kl_p`, `kl_f`, `update_time`) VALUES " insertContext = [] companyName = ouzhiDataChild.find_all('td', class_='tb_plgs') #print companyName[0]['title'] companyId = self.selectRetCompanyId( companyName[0]['title'], mysql, fid) webjson = 0 #每当进一次except就去减少一次可访问次数 reduceCount = 0 while True: try: webjson = openUrls.useProxy( ConfigStart.ANALYSISOUZHIDATAURL % (fid, ouzhiDataChild['id']), mysql, 0) webjson = json.loads(webjson) break pass except Exception, e: reduceCount = 1 continue pass pass pass #print webjson if webjson == None: continue if webjson.__len__() == 0: continue kellyjson = 0 while True: try: kellyjson = openUrls.useProxy( ConfigStart.ANALYSISOUZHIKELLYURL % (fid, ouzhiDataChild['id']), mysql, 0) kellyjson = json.loads(kellyjson) break pass except Exception, e: continue pass pass pass index = 0 for webjsonChild in webjson: indexT = 0 for kellyjsonChild in kellyjson: if index == indexT: #TODO:添加数据到数据库中 if index == 0: insertSql += "(%s, %s, %s, %s, %s, %s, %s, %s, %s, %s)" pass else: insertSql += ",(%s, %s, %s, %s, %s, %s, %s, %s, %s, %s)" pass insertContext.append(fid) insertContext.append(companyId) insertContext.append(webjsonChild[0]) insertContext.append(webjsonChild[1]) insertContext.append(webjsonChild[2]) insertContext.append(webjsonChild[3]) insertContext.append(kellyjsonChild[0]) insertContext.append(kellyjsonChild[1]) insertContext.append(kellyjsonChild[2]) insertContext.append(kellyjsonChild[3]) pass break pass pass indexT += 1 pass index += 1 pass mysql.update(insertSql, insertContext) mysql.end() j += 1 pass i += 1 pass