示例#1
0
 def getJifen(self, limit):
     mysql = Mysql()
     sqlAll = ConfigStart.SELECTFROMLEAGUEYEARINFOLIMIT
     resultSelect = mysql.getAll(sqlAll, limit)
     l = []
     i = 0
     if resultSelect == False:
         return
     sqlInsert = ConfigStart.UPDATELEAGUEYEARINFO_TOP
     for resultChild in resultSelect:
         webfile = urllib.urlopen(resultChild[LeagueYearInfo.p_league_url])
         webcontext = webfile.read()
         webfile.close()
         webContent = unicode(webcontext, ConfigStart.GBK)
         soup = BeautifulSoup(webContent, ConfigStart.PARSEMETHOD)
         jifenUrl = soup.find_all(href=re.compile(ConfigStart.COMPILEJIFEN))
         for getJifen in jifenUrl:
             if (getJifen.string == ConfigStart.STRINGJIFEN):
                 print getJifen[ConfigStart.HREF]
                 l.append(resultChild[LeagueYearInfo.p_id])
                 l.append(ConfigStart.STARTURL + getJifen[ConfigStart.HREF])
                 if i == ConfigStart.FALSE:
                     sqlInsert += "(%s,%s)"
                     i = 1
                 else:
                     sqlInsert += ",(%s,%s)"
         pass
     sqlInsert += ConfigStart.UPDATELEAGUEYEARINFO_BOTTOM
     print sqlInsert
     result = mysql.update(sqlInsert, l)
     print result
     pass
     mysql.dispose()
示例#2
0
    def getMatchUrl(self, limit):
        mysql = Mysql()
        sqlAll = ConfigStart.SELECTFROMLEAGUEYEARINFOLIMIT
        resultSelect = mysql.getAll(sqlAll, limit)
        if resultSelect == False:
            return
        for resultChild in resultSelect:
            webfile = urllib.urlopen(resultChild[LeagueYearInfo.p_jifen_url])
            webcontext = webfile.read()
            webfile.close()
            webContent = unicode(webcontext, ConfigStart.GBK)
            soup = BeautifulSoup(webContent, ConfigStart.PARSEMETHOD)
            getUrls = soup.find_all(class_="ltab_btn")
            sqlInsert = ConfigStart.INSERTINTOMATCHURL_TOP
            l = []
            i = 0
            print resultChild['p_jifen_url']
            for getUrl in getUrls:
                if getUrl.string.find("赛制") >= 0:
                    # resultOne=mysql.getOne("select * from institution where p_matchid=%s",resultChild['p_leagueid'])
                    # if resultOne == False:
                    #     t=[]
                    #     t.append(resultChild['p_leagueid'])
                    #     mysql.insertOne("INSERT INTO institution(p_matchid,p_institution) VALUES (%s,%s) ",t)
                    #     pass
                    break
                if i == 0:
                    sqlInsert += "(%s,%s,%s)"
                    i = 1
                    pass
                else:
                    sqlInsert += ",(%s,%s,%s)"
                    pass
                l.append(resultChild['p_id'])
                l.append(ConfigStart.STARTURL + getUrl['href'])
                l.append(getUrl.string)
                pass
            if l.__len__() == 0:
                continue
            resultInsert = mysql.update(sqlInsert, l)
            print resultInsert
            pass

        pass
示例#3
0
 def getMatchInfo(self, limit):
     mysql = Mysql()
     sqlAll = "select * from matchurl WHERE p_use=0 limit %s,10"
     resultSelect = mysql.getAll(sqlAll, limit)
     if resultSelect == False:
         return
     for resultChild in resultSelect:
         webfile = urllib.urlopen(resultChild['p_url'])
         webcontext = webfile.read()
         webfile.close()
         webContent = unicode(webcontext, 'gbk')
         soup = BeautifulSoup(webContent, ConfigStart.PARSEMETHOD)
         listInfo = soup.find_all(id='div_group_list')
         rounds = []  #当前第几回合或第几组
         stid = resultChild['p_url'].split("jifen-")[1].split("/")[0]
         c = 'score'
         a = 'getmatch'
         if (listInfo.__len__() > 0):
             for listChild in listInfo[0].children:
                 if (type(listChild) == bs4.element.Tag):
                     if (listChild['data-group'] != 'all'):
                         rounds.append(listChild['data-group'])
                         pass
                 pass
             pass
         pass
         listInfo = soup.find_all(id='match_group')
         if (listInfo.__len__() > 0):
             for listChild in listInfo[0].children:
                 if (type(listChild) == bs4.element.Tag):
                     if (listChild.a['data-group'] != 'all'):
                         rounds.append(listChild.a['data-group'])
                         pass
                 pass
             pass
         pass
         #lmb3
         listInfo = soup.find_all(class_='lmb3')
         asc = 0
         for listC in listInfo:
             asc += 1
             rounds.append(asc)
             pass
         pass
         urlInfo = "http://liansai.500.com/index.php?"
         if (rounds.__len__() == 0):
             insertContext = []
             sqlInsert = "INSERT INTO `matchinfo` (`p_leagueid`, `fid`, `ghalfscore`, `gid`, `gname`, `gscore`, `gstanding`, `gsxname`, `handline`, `hhalfscore`, `hid`, `hname`, `hscore`, `hstanding`, `hsxname`, `round`, `status`, `stime`) VALUES "
             urlInfo += "c=" + c
             urlInfo += "&a=" + a
             urlInfo += "&stid=" + stid
             jsonContext = urllib.urlopen(urlInfo)
             jsonData = jsonContext.read()
             jsonContext.close()
             jsonData = unicode(jsonData, 'gbk')
             jsonData = json.loads(jsonData)
             index = 0
             for jsonDataChild in jsonData:
                 print jsonDataChild
                 if index == 0:
                     sqlInsert += "(%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)"
                     index = 1
                 else:
                     sqlInsert += ",(%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)"
                     pass
                 insertContext.append(resultChild['pid'])
                 insertContext.append(jsonDataChild['fid'])
                 insertContext.append(jsonDataChild['ghalfscore'])
                 insertContext.append(jsonDataChild['gid'])
                 insertContext.append(jsonDataChild['gname'])
                 insertContext.append(jsonDataChild['gscore'])
                 insertContext.append(jsonDataChild['gstanding'])
                 insertContext.append(jsonDataChild['gsxname'])
                 insertContext.append(jsonDataChild['handline'])
                 insertContext.append(jsonDataChild['hhalfscore'])
                 insertContext.append(jsonDataChild['hid'])
                 insertContext.append(jsonDataChild['hname'])
                 insertContext.append(jsonDataChild['hscore'])
                 insertContext.append(jsonDataChild['hstanding'])
                 insertContext.append(jsonDataChild['hsxname'])
                 insertContext.append(jsonDataChild['round'])
                 insertContext.append(jsonDataChild['status'])
                 insertContext.append(jsonDataChild['stime'])
                 pass
             if index == 0:
                 continue
             resInfo = mysql.update(sqlInsert, insertContext)
             print resInfo
         else:
             for roundChild in rounds:
                 insertContext = []
                 sqlInsert = "INSERT INTO `matchinfo` (`p_leagueid`, `fid`, `ghalfscore`, `gid`, `gname`, `gscore`, `gstanding`, `gsxname`, `handline`, `hhalfscore`, `hid`, `hname`, `hscore`, `hstanding`, `hsxname`, `round`, `status`, `stime`) VALUES "
                 urlInfo += "c=" + c
                 urlInfo += "&a=" + a
                 urlInfo += "&stid=" + stid
                 urlInfo += "&round=" + str(roundChild)
                 jsonContext = urllib.urlopen(urlInfo)
                 jsonData = jsonContext.read()
                 jsonContext.close()
                 jsonData = unicode(jsonData, 'gbk')
                 jsonData = json.loads(jsonData)
                 index = 0
                 for jsonDataChild in jsonData:
                     print jsonDataChild
                     if index == 0:
                         sqlInsert += "(%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)"
                         index = 1
                     else:
                         sqlInsert += ",(%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)"
                         pass
                     insertContext.append(resultChild['pid'])
                     insertContext.append(jsonDataChild['fid'])
                     insertContext.append(jsonDataChild['ghalfscore'])
                     insertContext.append(jsonDataChild['gid'])
                     insertContext.append(jsonDataChild['gname'])
                     insertContext.append(jsonDataChild['gscore'])
                     insertContext.append(jsonDataChild['gstanding'])
                     insertContext.append(jsonDataChild['gsxname'])
                     insertContext.append(jsonDataChild['handline'])
                     insertContext.append(jsonDataChild['hhalfscore'])
                     insertContext.append(jsonDataChild['hid'])
                     insertContext.append(jsonDataChild['hname'])
                     insertContext.append(jsonDataChild['hscore'])
                     insertContext.append(jsonDataChild['hstanding'])
                     insertContext.append(jsonDataChild['hsxname'])
                     insertContext.append(jsonDataChild['round'])
                     insertContext.append(jsonDataChild['status'])
                     insertContext.append(jsonDataChild['stime'])
                     pass
                 if index == 0:
                     continue
                 resInfo = mysql.update(sqlInsert, insertContext)
                 print resInfo
                 pass
             pass
             #INSERT INTO `matchinfo` (`p_leagueid`, `fid`, `ghalfscore`, `gid`, `gname`, `gscore`, `gstanding`, `gsxname`, `handline`, `hhalfscore`, `hid`, `hname`, `hscore`, `hstanding`, `hsxname`, `round`, `status`, `stime`) VALUES ('1', '1', '1', '1', '1', '1', '1', '1', '1', '1', '1', '1', '1', '1', '1', '1', '1', '2017-04-21 17:24:58')
         #设置p_use已经抓取标志
         useSql = "update matchurl set p_use=1 where pid = %s"
         mysql.update(useSql, resultChild['pid'])
         print "matchurl更新成功"
     pass
     mysql.dispose()
示例#4
0
 def getDataFromMatchInfo(self, limit):
     mysql = Mysql()
     sqlAll = ConfigStart.SELECTFROMMATCHINFOLIMIT
     resultSelect = mysql.getAll(sqlAll, limit)
     if resultSelect == False:
         #print "没有要查找的数据"
         return
     #写日志
     for resultChild in resultSelect:
         fid = resultChild['fid']
         selectLogSql = "select count(*) as result from log where fid =%s  "
         selRes = mysql.getOne(selectLogSql, fid)
         if selRes['result'] == 0:
             logSql = "insert into log(fid) values(%s)"
             mysql.update(logSql, fid)
             mysql.end()
         #print fid
         deleteSqls = [
             "DELETE FROM yazhi WHERE matchinfoid=%s ",
             " DELETE FROM oupei WHERE matchinfoid=%s ",
             " DELETE FROM rangqiu WHERE matchinfoid=%s ",
             " DELETE FROM daxiao WHERE matchinfoid=%s ",
             " DELETE FROM befen WHERE matchinfoid=%s ",
             " DELETE FROM jinqiu WHERE matchinfoid=%s ",
             " DELETE FROM dsjinqiu WHERE matchinfoid=%s ",
             " DELETE FROM bqc WHERE matchinfoid=%s ",
             " DELETE FROM teamstatistics WHERE matchinfoid=%s ",
             " DELETE FROM playerstatistics WHERE matchinfoid=%s"
         ]
         for deleteSqlsChild in deleteSqls:
             mysql.delete(deleteSqlsChild, fid)
             mysql.end()
         #print "清理数据成功"
         i = 0
         '''
         =====================================欧赔开始================================================
         '''
         count_cursor = 0
         while True:
             if count_cursor != i * 30:
                 break
             url = ConfigStart.ANALYSISOUZHIURL % (fid, i * 30)
             #print "=============================================%s==================================="%url
             openUrls = OpenUrls()
             webcontext = openUrls.getWebContent(url, mysql, i, 1)
             # if webcontext.find('500.com')==-1 and webcontext!='':
             #     #print "查看webcontext:%s"%webcontext
             #     continue
             #     pass
             # else:
             #     if webcontext =='':
             #         break
             soup = BeautifulSoup(webcontext, "html.parser")
             ouzhiData1 = soup.find_all(ttl='zy')
             if ouzhiData1.__len__() == 0:
                 #print '获取完毕'
                 break
             j = 0
             for ouzhiDataChild in ouzhiData1:
                 #print "------------------------%s------------------------" % (i * 30 + j+1)
                 count_cursor = i * 30 + j + 1
                 #print ouzhiDataChild['id']
                 insertSql = "INSERT INTO `oupei` (`matchinfoid`, `companyid`, `op_s`, `op_p`, `op_f`, `ret`, `kl_s`, `kl_p`, `kl_f`, `update_time`) VALUES  "
                 insertContext = []
                 companyName = ouzhiDataChild.find_all('td',
                                                       class_='tb_plgs')
                 #print companyName[0]['title']
                 companyId = self.selectRetCompanyId(
                     companyName[0]['title'], mysql, fid)
                 webjson = 0
                 #每当进一次except就去减少一次可访问次数
                 reduceCount = 0
                 while True:
                     try:
                         webjson = openUrls.useProxy(
                             ConfigStart.ANALYSISOUZHIDATAURL %
                             (fid, ouzhiDataChild['id']), mysql, 0)
                         webjson = json.loads(webjson)
                         break
                         pass
                     except Exception, e:
                         reduceCount = 1
                         continue
                         pass
                     pass
                 pass
                 #print webjson
                 if webjson == None:
                     continue
                 if webjson.__len__() == 0:
                     continue
                 kellyjson = 0
                 while True:
                     try:
                         kellyjson = openUrls.useProxy(
                             ConfigStart.ANALYSISOUZHIKELLYURL %
                             (fid, ouzhiDataChild['id']), mysql, 0)
                         kellyjson = json.loads(kellyjson)
                         break
                         pass
                     except Exception, e:
                         continue
                         pass
                     pass
                 pass
                 index = 0
                 for webjsonChild in webjson:
                     indexT = 0
                     for kellyjsonChild in kellyjson:
                         if index == indexT:
                             #TODO:添加数据到数据库中
                             if index == 0:
                                 insertSql += "(%s, %s, %s, %s, %s, %s, %s, %s, %s, %s)"
                                 pass
                             else:
                                 insertSql += ",(%s, %s, %s, %s, %s, %s, %s, %s, %s, %s)"
                                 pass
                             insertContext.append(fid)
                             insertContext.append(companyId)
                             insertContext.append(webjsonChild[0])
                             insertContext.append(webjsonChild[1])
                             insertContext.append(webjsonChild[2])
                             insertContext.append(webjsonChild[3])
                             insertContext.append(kellyjsonChild[0])
                             insertContext.append(kellyjsonChild[1])
                             insertContext.append(kellyjsonChild[2])
                             insertContext.append(kellyjsonChild[3])
                             pass
                             break
                             pass
                         pass
                         indexT += 1
                     pass
                     index += 1
                 pass
                 mysql.update(insertSql, insertContext)
                 mysql.end()
                 j += 1
                 pass
             i += 1
             pass