Exemplo n.º 1
0
 def getDataFromMatchInfo(self, limit):
     mysql = Mysql()
     sqlAll = ConfigStart.SELECTFROMMATCHINFOLIMIT
     resultSelect = mysql.getAll(sqlAll, limit)
     if resultSelect == False:
         #print "没有要查找的数据"
         return
     #写日志
     for resultChild in resultSelect:
         fid = resultChild['fid']
         selectLogSql = "select count(*) as result from log where fid =%s  "
         selRes = mysql.getOne(selectLogSql, fid)
         if selRes['result'] == 0:
             logSql = "insert into log(fid) values(%s)"
             mysql.update(logSql, fid)
             mysql.end()
         #print fid
         deleteSqls = [
             "DELETE FROM yazhi WHERE matchinfoid=%s ",
             " DELETE FROM oupei WHERE matchinfoid=%s ",
             " DELETE FROM rangqiu WHERE matchinfoid=%s ",
             " DELETE FROM daxiao WHERE matchinfoid=%s ",
             " DELETE FROM befen WHERE matchinfoid=%s ",
             " DELETE FROM jinqiu WHERE matchinfoid=%s ",
             " DELETE FROM dsjinqiu WHERE matchinfoid=%s ",
             " DELETE FROM bqc WHERE matchinfoid=%s ",
             " DELETE FROM teamstatistics WHERE matchinfoid=%s ",
             " DELETE FROM playerstatistics WHERE matchinfoid=%s"
         ]
         for deleteSqlsChild in deleteSqls:
             mysql.delete(deleteSqlsChild, fid)
             mysql.end()
         #print "清理数据成功"
         i = 0
         '''
         =====================================欧赔开始================================================
         '''
         count_cursor = 0
         while True:
             if count_cursor != i * 30:
                 break
             url = ConfigStart.ANALYSISOUZHIURL % (fid, i * 30)
             #print "=============================================%s==================================="%url
             openUrls = OpenUrls()
             webcontext = openUrls.getWebContent(url, mysql, i, 1)
             # if webcontext.find('500.com')==-1 and webcontext!='':
             #     #print "查看webcontext:%s"%webcontext
             #     continue
             #     pass
             # else:
             #     if webcontext =='':
             #         break
             soup = BeautifulSoup(webcontext, "html.parser")
             ouzhiData1 = soup.find_all(ttl='zy')
             if ouzhiData1.__len__() == 0:
                 #print '获取完毕'
                 break
             j = 0
             for ouzhiDataChild in ouzhiData1:
                 #print "------------------------%s------------------------" % (i * 30 + j+1)
                 count_cursor = i * 30 + j + 1
                 #print ouzhiDataChild['id']
                 insertSql = "INSERT INTO `oupei` (`matchinfoid`, `companyid`, `op_s`, `op_p`, `op_f`, `ret`, `kl_s`, `kl_p`, `kl_f`, `update_time`) VALUES  "
                 insertContext = []
                 companyName = ouzhiDataChild.find_all('td',
                                                       class_='tb_plgs')
                 #print companyName[0]['title']
                 companyId = self.selectRetCompanyId(
                     companyName[0]['title'], mysql, fid)
                 webjson = 0
                 #每当进一次except就去减少一次可访问次数
                 reduceCount = 0
                 while True:
                     try:
                         webjson = openUrls.useProxy(
                             ConfigStart.ANALYSISOUZHIDATAURL %
                             (fid, ouzhiDataChild['id']), mysql, 0)
                         webjson = json.loads(webjson)
                         break
                         pass
                     except Exception, e:
                         reduceCount = 1
                         continue
                         pass
                     pass
                 pass
                 #print webjson
                 if webjson == None:
                     continue
                 if webjson.__len__() == 0:
                     continue
                 kellyjson = 0
                 while True:
                     try:
                         kellyjson = openUrls.useProxy(
                             ConfigStart.ANALYSISOUZHIKELLYURL %
                             (fid, ouzhiDataChild['id']), mysql, 0)
                         kellyjson = json.loads(kellyjson)
                         break
                         pass
                     except Exception, e:
                         continue
                         pass
                     pass
                 pass
                 index = 0
                 for webjsonChild in webjson:
                     indexT = 0
                     for kellyjsonChild in kellyjson:
                         if index == indexT:
                             #TODO:添加数据到数据库中
                             if index == 0:
                                 insertSql += "(%s, %s, %s, %s, %s, %s, %s, %s, %s, %s)"
                                 pass
                             else:
                                 insertSql += ",(%s, %s, %s, %s, %s, %s, %s, %s, %s, %s)"
                                 pass
                             insertContext.append(fid)
                             insertContext.append(companyId)
                             insertContext.append(webjsonChild[0])
                             insertContext.append(webjsonChild[1])
                             insertContext.append(webjsonChild[2])
                             insertContext.append(webjsonChild[3])
                             insertContext.append(kellyjsonChild[0])
                             insertContext.append(kellyjsonChild[1])
                             insertContext.append(kellyjsonChild[2])
                             insertContext.append(kellyjsonChild[3])
                             pass
                             break
                             pass
                         pass
                         indexT += 1
                     pass
                     index += 1
                 pass
                 mysql.update(insertSql, insertContext)
                 mysql.end()
                 j += 1
                 pass
             i += 1
             pass
Exemplo n.º 2
0
     print _arrChild
     selectSql = "select count(*) as result from proxyip where address_port=%s "
     res_select = mysql.getOne(selectSql, _arrChild)
     if res_select['result'] == 0:
         proxyHandler = urllib2.ProxyHandler({"http": r'%s' % (_arrChild)})
         opener = urllib2.build_opener(cookies, proxyHandler)
         opener.addheaders = [(
             'User-Agent',
             'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/31.0.1650.63 Safari/537.36'
         )]
         t1 = time.time()
         try:
             req = opener.open(testUrl, timeout=req_timeout)
             result = req.read()
             charsetCur = chardet_detect_str_encoding(result)
             timeused = time.time() - t1
             insertSql = "insert into proxyip(address_port) values(%s)"
             l = []
             l.append(_arrChild)
             print mysql.update(insertSql, l)
             mysql.end()
             file_object.write(_arrChild)
             file_object.write('\r\n')
         except Exception, e:
             print e
             pass
         pass
     else:
         continue
 file_object.close()
 time.sleep(10 * 60)
Exemplo n.º 3
0
 def getSecondUrl():
     # allraceMainWrap
     try:
         webfile = urllib.urlopen(ConfigStart.STARTURL)
         webContent = webfile.read()
         webfile.close()
         soup = BeautifulSoup(webContent, ConfigStart.PARSEMETHOD)
         allraceMainWrap = soup.find_all(id=ConfigStart.LEAGUESDIV)
         #获取到网页后开始分配数据库资源
         mysql = Mysql()
         #i为分区赛事
         i = -1
         # 数据模型为p_name p_type p_country p_sport_type p_main_type
         p_name = ConfigStart.NULLSTRING
         p_type = 1
         p_country = ConfigStart.NULLSTRING
         p_sport_type = 1
         p_main_type = ConfigStart.NULLSTRING
         sqlAll = ConfigStart.INSERTINTOLEAGUETABLE
         for child in allraceMainWrap[ConfigStart.DIVTOPINDEX].children:
             if (type(child) == bs4.element.Tag):
                 i = i + ConfigStart.INC
                 #print child
                 singleUrl = child.find_all(class_=[
                     ConfigStart.ALLLEAGUECLASS_1,
                     ConfigStart.ALLLEAGUECLASS_2
                 ])
                 #print singleUrl
                 for psingleUrl in singleUrl:
                     #print psingleUrl
                     for getUrlTag in psingleUrl:
                         if (type(getUrlTag) == bs4.element.Tag):
                             if (type(getUrlTag.div) == type(None)):
                                 print getUrlTag.a[ConfigStart.HREF]
                                 print getUrlTag.span.string.encode(
                                     ConfigStart.UTF8).replace(
                                         ConfigStart.SPACESTRING,
                                         ConfigStart.NULLSTRING)
                                 p_name = getUrlTag.span.string.encode(
                                     ConfigStart.UTF8).replace(
                                         ConfigStart.SPACESTRING,
                                         ConfigStart.NULLSTRING)
                                 p_type = i + 1
                                 p_country = ConfigStart.NULLSTRING
                                 p_main_type = ConfigStart.STARTURL + getUrlTag.a[
                                     ConfigStart.HREF]
                                 sqlString = ConfigStart.SELECTCOUNTFROMLEAGUETABLE
                                 lSelect = [p_name, p_type, p_country]
                                 resultSelect = mysql.getOne(
                                     sqlString, lSelect)
                                 if resultSelect[
                                         ConfigStart.
                                         RESULT] == ConfigStart.NULL:
                                     l = [[
                                         p_name, p_type, p_country,
                                         p_sport_type, p_main_type
                                     ]]
                                     result = mysql.insertMany(sqlAll, l)
                                     print result
                                     pass
                             else:
                                 #获取到国家
                                 print getUrlTag.span.string.encode(
                                     ConfigStart.UTF8).replace(
                                         ConfigStart.SPACESTRING,
                                         ConfigStart.NULLSTRING)
                                 #获取联赛及各个Url
                                 for leagueInfo in getUrlTag.div.children:
                                     if (type(leagueInfo) == bs4.element.Tag
                                         ):
                                         print leagueInfo.string
                                         print leagueInfo[ConfigStart.HREF]
                                         p_name = leagueInfo.string.encode(
                                             ConfigStart.UTF8).replace(
                                                 ConfigStart.SPACESTRING,
                                                 ConfigStart.NULLSTRING)
                                         p_main_type = ConfigStart.STARTURL + leagueInfo[
                                             ConfigStart.HREF]
                                         p_type = i + 1
                                         p_country = getUrlTag.span.string.encode(
                                             ConfigStart.UTF8).replace(
                                                 ConfigStart.SPACESTRING,
                                                 ConfigStart.NULLSTRING)
                                         sqlString = ConfigStart.SELECTCOUNTFROMLEAGUETABLE
                                         lSelect = [
                                             p_name, p_type, p_country
                                         ]
                                         resultSelect = mysql.getOne(
                                             sqlString, lSelect)
                                         if resultSelect[
                                                 ConfigStart.
                                                 RESULT] == ConfigStart.NULL:
                                             l = [[
                                                 p_name, p_type, p_country,
                                                 p_sport_type, p_main_type
                                             ]]
                                             result = mysql.insertMany(
                                                 sqlAll, l)
                                             print result
                                             pass
                                     pass
                             print ConfigStart.MATCHPARTION[i]
                     pass
                 pass
             pass
         pass
         #各洲的杯赛 lrace_bei
         allraceCup = soup.find_all(class_=ConfigStart.CPUMATCHTAG)
         for cup in allraceCup:
             i = i + 1
             print cup
             print cup.a.string
             print cup.a[ConfigStart.HREF]
             for cupChild in cup.find_all(ConfigStart.A):
                 print cupChild
                 p_name = cupChild.string.encode(ConfigStart.UTF8).replace(
                     ConfigStart.SPACESTRING, ConfigStart.NULLSTRING)
                 p_type = i + ConfigStart.INC
                 p_country = ConfigStart.NULLSTRING
                 p_main_type = ConfigStart.STARTURL + cupChild[
                     ConfigStart.HREF]
                 sqlString = ConfigStart.SELECTCOUNTFROMLEAGUETABLE
                 lSelect = [p_name, p_type, p_country]
                 resultSelect = mysql.getOne(sqlString, lSelect)
                 if resultSelect[ConfigStart.RESULT] == ConfigStart.NULL:
                     l = [[
                         p_name, p_type, p_country, p_sport_type,
                         p_main_type
                     ]]
                     result = mysql.insertMany(sqlAll, l)
                     mysql.end()
                     print result
                     pass
             pass
         mysql.dispose()
     except Exception, e:
         print Exception, ":", e
         pass