Python request_html示例，odoo.addons.tsbd.models.tool.request_html Python示例

示例#1

0

显示文件

 def parse_button(self, all_nhan_dinh_link= None):
     all_nhan_dinh_link = 'http://bongdaplus.vn/nhan-dinh-bong-da/trang-1.html'
     range_page = self.gen_range(all_nhan_dinh_link= all_nhan_dinh_link)
     for link in range_page:
         if link ==None:
             pass
         else:
             html =  request_html(link)
         soup = BeautifulSoup(html, 'html.parser')
         a_s = soup.select('a')
         hrefs = []
         for a in a_s:
             try:
                 hrefs.append(a['href'])
             except:
                 pass
         hrefs =  list(filter ( lambda a: 'nhan-dinh-bong-da-' in a, hrefs))
         for h in hrefs:
             link = 'http://bongdaplus.vn/' + h
             rs  = request_html(link)
             soup = BeautifulSoup(rs, 'html.parser')
             self.nhan_dinh_a_match_bondaplus(link=link)
             
     self.map_match_id()
     self.map_predict_id()

示例#2

0

显示文件

    def nhan_dinh_a_match_aegoal(self,*arg,**karg):
        link = karg.get('link')
        
        if not link:
            print ('Not link***********')
            file = open('/media/sf_C_DRIVE/D4/dl/testfile_link1.html','r') 
            html = file.read()
            soup = BeautifulSoup(html, 'html.parser')
        else:
            atuple = karg.get('atuple')
            link =  atuple[0]
            team_1_2_date = atuple[2]
            team1_2 = team_1_2_date[0]
            dt = team_1_2_date[1]
            ngay = dt.date()
            dt = fields.Datetime.to_string(dt)
            print ('Co link************')
            rs  = request_html(link)
            soup = BeautifulSoup(rs, 'html.parser')
            
        rs = soup.select('div.box-text-detail')[0].get_text()
        ti_so = du_doan_ti_so(rs)
        
#         raise UserError(ti_so)
        update_dict = {'nd_id':self.id}
        if ti_so:
            update_dict_more = { 'score1':ti_so[0],'score2':ti_so[1], 'state':'tu_dong'}
        else:
            update_dict_more = {'state':'can_read_du_doan'}
        update_dict.update(update_dict_more)
        ndlline = get_or_create_object_sosanh(self,'tsbd.ndlline', {'link':link, 'ngay':ngay,'ngay_gio':dt,'team1':team1_2[0],'team2':team1_2[1]}, update_dict)   
        return ti_so

示例#3

0

显示文件

 def nhan_dinh_bongdanet(self, all_nhan_dinh_link= None ):
     
     all_nhan_dinh_link = 'http://bongdanet.vn/nhan-dinh/p2'
     range_page = list(self.gen_range( patern = 'http://bongdanet.vn/nhan-dinh/p\d+',
                                       replacement = 'http://bongdanet.vn/nhan-dinh/p%s',
                                       all_nhan_dinh_link= all_nhan_dinh_link
                                       ))
     for link in range_page:
         html =  request_html(link)
         soup = BeautifulSoup(html, 'html.parser')
         a_s = soup.select('div.news-item div.detail-news-item a')
         hrefs = []
         for a in a_s:
             try:
                 hrefs.append(['http://bongdanet.vn'+a['href'],a.get_text()])
             except:
                 pass
         hrefs =  list(filter ( lambda a: 'nhan-dinh' in a[0] or 'phan-tich' in a[0], hrefs))
         for atuple in hrefs:
             rs = parse_title_bongdanet(atuple[1])
             atuple.append(rs)
             
             
         for  c, at in enumerate(hrefs):
             if at[2] != None:
                 ti_so = self.nhan_dinh_a_match_bongdanet(link=at[0],atuple= at)
     self.map_match_id()
     self.map_predict_id()

示例#4

0

显示文件

    def test(self):
        away = u'Việt Nam'
        away = quote(away)
        link = 'http://bongdaso.com/_CastingInfo.aspx?FixtureID=56032&SeasonID=112&Flags=&Home=Bournemouth&Away=%s' % away

        print('link', link)
        raise UserError(u'%s' % (request_html(link)))

示例#5

0

显示文件

 def leech_button(self):
     link = self.link if self.link_select == 'link1' else self.all_nhan_dinh_link
     rs  = request_html(link)
     file = open('/media/sf_C_DRIVE/D4/dl/testfile_%s.html'%self.link_select,'w') 
     file.write(rs) 
     file.close() 
     self.log =rs

示例#6

0

显示文件

def get_soup_of_events(fix_id, home,away):
    home =  quote(home)
    away = quote(away)
    link = 'http://bongdaso.com/_CastingInfo.aspx?FixtureID={}&SeasonID=112&Flags=&Home={}&Away={}'.format(fix_id, home, away)
    html  = request_html(link)
    soup = BeautifulSoup(html, 'html.parser')
    return soup

示例#7

0

显示文件

def get_soup(link):
    try:
        html  = request_html(link)
        soup = BeautifulSoup(html, 'html.parser')
    except GethtmlError as e:
        raise GethtmlError(u'Lỗi khi get soup')
    return soup

示例#8

0

显示文件

def get_soup(link):
    soup = ''
    count_try = 0
    while not str(soup) and count_try < 2:
        html  = request_html(link)
        soup = BeautifulSoup(html, 'html.parser')
        count_try +=1
    return soup

示例#9

0

显示文件

def get_team_and_date(self, match_link, add_update_dict, is_set_must_get_time = True):
        soup = None
        html = None
        if is_set_must_get_time:
            check_time_for_get_soup = 'time' not in add_update_dict
        else:
            check_time_for_get_soup = 'time' not in add_update_dict and 'date' not in add_update_dict
       
        is_get_soup =    any(['home' not in add_update_dict, 'away' not in add_update_dict, check_time_for_get_soup])
        
        if is_get_soup:
            html  = request_html(match_link)
            soup = BeautifulSoup(html, 'html.parser')
#             soup = get_soup(match_link)
        
        if  'home' not in add_update_dict:
            home = soup.select('div#scr_home a')[0].get_text()
            home = re.sub('\s+\[\d+\]', '', home)
        else:
            home = add_update_dict['home']
        if 'away' not in add_update_dict:
            away = soup.select('div#scr_away a')[0].get_text()
            away = re.sub('\[\d+\]\s+', '', away)
        else:
            away = add_update_dict['away']
        home = home.strip()
        away = away.strip()
        if 'time' in add_update_dict:
            begin_time = add_update_dict['time']
            dtime = datetime.strptime(begin_time,'%d/%m/%Y %H:%M') - timedelta(hours=7)
            str_time = fields.Datetime.to_string(dtime)
            match_date = dtime.date()
            str_date = fields.Date.to_string(dtime)
        else:
            if is_set_must_get_time:
                begin_time = soup.select('div#scr_start')[0].get_text()
                begin_time = begin_time[9:]
                dtime = datetime.strptime(begin_time,'%d/%m/%Y %H:%M') - timedelta(hours=7)
                str_time = fields.Datetime.to_string(dtime)
                match_date = dtime.date()
                str_date = fields.Date.to_string(match_date)
            else:
                match_date = datetime.strptime(add_update_dict['date'],'%d/%m/%Y')
                str_date = fields.Date.to_string(match_date)
                str_time = None
       
        team1_id = get_or_create_object_sosanh(self,'tsbd.team',{'name':home})
        team2_id = get_or_create_object_sosanh(self,'tsbd.team',{'name':away})
        team_dict = {'team1': team1_id.id,
                            'team2': team2_id.id,
#                             'time':str_time,
                            'date':str_date,
                            
                    }
        return team_dict, match_date, str_time,home,away, soup, html

示例#10

0

显示文件

    def nhan_dinh_a_match_bondaplus(self,*arg,**karg):
        link = karg.get('link')
        if not link:
            print ('Not link***********')
            file = open('/media/sf_C_DRIVE/D4/dl/testfile_link1.html','r') 
            html = file.read()
            soup = BeautifulSoup(html, 'html.parser')
        else:
            print ('Co link************')
            rs  = request_html(link)
            soup = BeautifulSoup(rs, 'html.parser')
        s = soup.select('h1.tit')
        str = s[0].get_text()
        print ('title **', str)
        rs = re.search(r'Nhận định bóng đá (.+?) vs (.+?),', str)
        if not rs:
            rs = re.search(r'Nhận định bóng đá (.+?) và (.+?),', str)
        if not rs:
            rs  = re.search(r'Nhận định bóng đá.*?: (.+?) vs (.+?)$', str)
        if not rs:
            rs  = re.search(r'Nhận định bóng đá.*?: (.+?) và (.+?)$', str)
        team1= rs.group(1).strip()
        team2= rs.group(2).strip()
       
        
        rs_search = re.search('(\d+)h(\d*).*?ngày\s+(\d+)/(\d+)', str)
#         gio= rs.group(1).strip()
#         rs_ngay = re.search('ngày\s+(\d+)/(\d+)', str)
#         ngay= rs.group(1).strip()
        
        rs =  (rs_search.group(1), rs_search.group(2),rs_search.group(3),rs_search.group(4))
        rs = list(map(lambda i:int_a_minute(i), rs))
        dt = datetime(year=datetime.now().year, month= rs[3], day= rs[2], hour= rs[0], minute = rs[1]) - timedelta(hours=7)
#         dt = lay_du_doan_ngay_gio(gio,ngay)
        dt = dt - timedelta(hours = 7)
        ngay =dt.date()
        dt = fields.Datetime.to_string(dt)
        
        
        update_dict = {'ngay':ngay,'ngay_gio':dt,'nd_id':self.id}
        try:
            score1,score2 = self.du_doan(soup)
            update_dict.update({ 'score1':score1,'score2':score2, 'state':'tu_dong'})
        except FETCHERROR:
            update_dict.update({ 'state':'can_read_du_doan'})
        ndlline = get_or_create_object_sosanh(self,'tsbd.ndlline', {'link':link,'team1':team1,'team2':team2}, update_dict)

示例#11

0

显示文件

    def nhan_dinh_aegoal(self, all_nhan_dinh_link= None ):
#         all_nhan_dinh_link = 'http://bongdaplus.vn/nhan-dinh-bong-da/trang-1.html'
        all_nhan_dinh_link = 'https://aegoal.net/nhan-dinh-bong-da.html?trang=1'
        range_page = self.gen_range(patern = 'trang-\d+',replacement = 'trang-%s', all_nhan_dinh_link= all_nhan_dinh_link)

        for link in range_page:
            html =  request_html(link)
            soup = BeautifulSoup(html, 'html.parser')
            rs = soup.select('div.list-item-new a')
            hrefs = []
            for a in rs:
                hrefs.append([a['href'],a.get_text()])
                
            hrefs =  list(filter ( lambda a: 'nhan-dinh' in a[0] or 'phan-tich' in a[0], hrefs))
            for atuple in hrefs:
                rs = parse_title_bongdanet(atuple[1])
                atuple.append(rs)
            for  c, at in enumerate(hrefs):
                    if at[2] != None:
                        ti_so = self.nhan_dinh_a_match_aegoal(link=at[0],atuple= at)

        self.map_match_id()
        self.map_predict_id()

示例#12

0

显示文件

    def gen_lineup_new(self, match_link, search_dict, match_id):
        match_link = match_link.replace('Data=Odds', 'Data=lineup').replace(
            'Data=Casting', 'Data=lineup')
        if 'Data=lineup' not in match_link:
            match_link = match_link + '&Data=lineup'
        html = request_html(match_link)
        lineup_dict = {}
        playerlines = []

        for patern in [
            ("'_HomeLineup_','(.*?)'", search_dict['team1'], 'home'),
            ("'_AwayLineup_','(.*?)'", search_dict['team2'], 'away')
        ]:
            rs = re.search(patern[0], html)
            rs = 'http://bongdaso.com/' + rs.group(1)
            rs = request_html(rs)
            soup = BeautifulSoup(rs, 'html.parser')
            rs = soup.select('div.squad_table table tr')
            da_chinhs = []
            da_phus = []
            alist = da_chinhs
            for count, tr in enumerate(rs):
                if count != 0:
                    if tr.get('class') == ['fixture_separator']:
                        alist = da_phus
                        continue
                    gt = tr.get_text()
                    number = tr.select('td:nth-of-type(1)')[0].get_text()
                    try:
                        number = int(number)
                    except:
                        number = False
                    print('tr**', tr)
                    #                     player_name_tr = tr.select('td:nth-of-type(2) div')[0]
                    player_name_tr = tr.select('td:nth-of-type(2)')[0]
                    name = player_name_tr.get_text()

                    if number:
                        adict_search = {'number': int(number), 'name': name}
                    else:
                        adict_search = {'name': name}
                    adict_update = {}

                    player_id = player_name_tr.get('id')
                    if player_id:
                        player_id = player_id.replace('player_', 'player_tip_')
                        player_id_soup = soup.select('div#%s' % player_id)[0]
                        image_soup = player_id_soup.select(
                            'div.boxBody > table > tr:nth-of-type(1) > td:nth-of-type(1) img'
                        )  #[0].get_text()
                        if image_soup:
                            image_soup = image_soup[0]
                            image_link = image_soup['src']
                            image_link = image_link.replace('&amp;', '&')
                            image_link = 'http://bongdaso.com/' + image_link
                        else:
                            image_link = False
                        trs = player_id_soup.select(
                            'div.boxBody > table > tr:nth-of-type(1) > td:nth-of-type(2) tr'
                        )  #[0].get_text()

                        if image_link:
                            adict_update['image_link'] = image_link
                        for count, tr in enumerate(trs):
                            if count == 0:
                                continue
                            if count == 1:
                                td2 = tr.select(
                                    'td:nth-of-type(2)')[0].get_text()
                                dt = datetime.strptime(td2, '%d/%m/%Y')
                                adict_update[
                                    'birthday'] = fields.Date.to_string(dt)
                    alist.append((adict_search, adict_update))
            for da_chinh_or_du_bi in [(da_chinhs, 'da_chinh'),
                                      (da_phus, 'du_bi')]:
                players = map(
                    lambda i: get_or_create_object_sosanh(
                        self, 'tsbd.player', i[0], i[1]).id,
                    da_chinh_or_du_bi[0])
                a_playerlines = map(
                    lambda i: get_or_create_object_sosanh(
                        self, 'tsbd.playerline', {
                            'player_id': i,
                            'team_id': patern[1],
                            'home_or_away': patern[2],
                            'da_chinh_hay_du_bi': da_chinh_or_du_bi[1],
                            'match_id': match_id
                        }).id, players)
                playerlines += list(a_playerlines)
        lineup_dict['playerline_ids'] = [(6, 0, playerlines)]
        return lineup_dict

示例#13

0

显示文件

def get_soup_ajax_link(fix_id,template_link):
#     fix_id = get_fix_id(match_link)
    score_link = template_link%fix_id
    html  = request_html(score_link)
    soup = BeautifulSoup(html, 'html.parser')
    return soup

示例#14

0

显示文件

文件： match.py 项目： tu95ctv/duan_mi

 def leech_button(self):
     rs = request_html(self.link)
     file = open('/media/sf_C_DRIVE/D4/dl/testfile.html', 'w')
     file.write(rs)
     file.close()
     self.log = rs