def get( me, kanal, o, today): for url in [ kanal.get('daily0'), kanal.get('daily1'), kanal.get('daily2'), ]: if not url: continue yymmdd = today.strftime( '%y%m%d' ) url = url.format( **locals() ) print( '#...', url, file= sys.stderr) me.anitem.update( today= today, stream= kanal.stream, channel= kanal.name) m = len( me.today_items) try: indata = visit( url, me.grammar_stack, data2text = slim, ienc= o.ienc, html_notfixed =o.html_notfixed, html_strict =o.html_strict, BR_as_data= '<br>' ) except urllib.error.HTTPError as e: print( ' ?daily', kanal.abbr, url, e, file= sys.stderr) continue whole = me.today_whole.pop() whole = whole .replace( '\u2013','-' #- ) tds = re.split( '([оo][тt] *\d+([.:]\d+)? *-? *([дd][оo]|-) *\d+([.:]\d+)? *часа)', whole, flags= re.IGNORECASE ) #print( '#...', len(tds), len(me.today_items) -m, file= sys.stderr) #print( '##...', tds, file= sys.stderr) allitems = [ (tds[ i], tds[i+3+1]) for i in range( 1,len( tds),4+1) ] for times,data in allitems: times = re.split( '(\d+(.\d+)?)', times) m = me.re_titles.search( data) #print( 3333333, times, data, m.groups()) title= m and m.group( 'title') or '' text = m and m.group( 'text') or '' me.today_items.append( da( bnr_daily.anitem, time = time4str( times[1] ), endtime = time4str( times[4] ), title = slim( title), text = slim( text.replace('<br>','') ), )) return indata
def get( me, kanal, o, today): #me.today_whole = [] indata = dictOrder() for url in [ kanal.get('daily') ]: if not url: continue print( '#...', url, file= sys.stderr) me.anitem.update( today= today, stream= kanal.stream, channel= kanal.name) try: indata[''] = visit( url, me.grammar_stack1, #return_also_headers=True, data2text = slim, ienc= o.ienc, html_notfixed =o.html_notfixed, html_strict =o.html_strict, BR_as_data= '<br>' ) except urllib.error.HTTPError as e: print( ' ?daily1', kanal.abbr, url, e, file= sys.stderr) continue #print( 4444444444444444444, me.today_whole) #ден след #днес #ден преди ndays=2 for dnes in [ today + datetime.timedelta( days= dayofs ) for dayofs in range( ndays) ]: url2 = None for u in me.today_whole: if (u.channel == me.anitem.channel and bnr_weekly.months[ dnes.month-1 ] in u.title.lower() and str( dnes.day) in u.title.split() and u.get('url') ): url2 = u.url break if not url2: continue me.anitem.update( today= dnes) if '://' in url: #http url2 = urllib.parse.urljoin( url, url2) print( '#....', url2, file= sys.stderr) try: indata[ url2] = visit( url2, me.grammar_stack2, data2text = slim, ienc= o.ienc, html_notfixed =o.html_notfixed, html_strict =o.html_strict, BR_as_data= '<br>' ) except urllib.error.HTTPError as e: print( ' ?daily2', kanal.abbr, url2, e, file= sys.stderr) continue #print( 5555555554444444444, me.today_items) whole = me.today_whole[-1].title whole = whole .replace( '\u2013','-' #- ) tds = re.split( '([оo][тt] *\d+([.:]\d+)? *-? *([дd][оo]|-) *\d+([.:]\d+)? *часа)', whole, flags= re.IGNORECASE ) #print( '#...', len(tds), len(me.today_items) -m, file= sys.stderr) #print( '##...', tds, file= sys.stderr) allitems = [ (tds[ i], tds[i+3+1]) for i in range( 1,len( tds),4+1) ] #print( 22222222, allitems) for times,data in allitems: times = re.split( '(\d+(.\d+)?)', times) m = me.re_titles.search( data) #print( 3333333, times, data, m.groups()) title= m and m.group( 'title') or '' text = m and m.group( 'text') or '' me.today_items.append( da( bnr_daily.anitem, time = time4str( times[1] ), endtime = time4str( times[4] ), title = slim( title), text = slim( text.replace('<br>','') ), )) #print( 3333333, times, data, m.groups()) return indata