Пример #1
0
def yahoo(doc2):
 doc = doc2.decode(encoding='UTF-8')
 idata=[]

 empty = ''
 lempty = ['']
 atype = 'Yahoo Finance'
 info  = ''
 base = 'https://in.finance.yahoo.com'
 timey = time.mktime(datetime.datetime.now().timetuple())

 soup = BeautifulSoup(doc,"lxml")
 gdata = soup.find_all("",{"class":"description"})

 #tdata=[]
 #ydata=[]
 n = len(gdata)
 for i in range(0,n):
  j=0
  #print i, gdata[i].text
  #print i, gdata[i].contents
  # get href link
  lstr = str(gdata[i].contents)
  #print lstr

  soup2 = BeautifulSoup(lstr,"lxml")
  for link in soup2.find_all('a'):
   xlink = base+str(link.get('href'))
   break

  # get info, announ, atype
  t1 = 'More'#+str(175)
  t2 = 'More'
  xstr = ''.join(str(gdata[i].text.encode('ascii', 'ignore')))
  xstr2 = ''.join(xstr).split(t1)
  #xstr = (str(gdata[i].text.encode('ascii', 'ignore').split(t1)).replace(t2,''))

  #print i, xstr2
  if xstr2!=['']:
   #tdata.append(xstr2[1])# announ
   #tdata.append(xstr2[0])# info
   #tdata.append(timey) # time stamp
   #tdata.append(xlink) # link
   idata.append(classes.ticker(xstr2[1],atype,xlink,timey,xstr2[0],0))
   #print i, xstr2[0]
   #print i, xstr2[1]
   if i==50: break # 50 articles in each page

   #soup2 = BeautifulSoup(str(gdata))
   #tables = soup2.find_all("tr")
    
 idata.sort(key=lambda x: x.time, reverse=True)
 return idata[0:classes.ticker.narticles]
Пример #2
0
def livemint(doc2,id):
 doc = doc2.decode(encoding='UTF-8')
 atype = 'livemint'+id
 feed = feedparser.parse(doc)
 i=0
 idata = []
 for post in feed.entries:
  i=i+1
  announ = str(post.title.encode('ascii', 'ignore'))
  xlink  = str(post.link.encode('ascii', 'ignore'))
  timestamp = int(time.mktime(post.updated_parsed))
  info = str(post.summary.encode('ascii', 'ignore')).split('>')

  idata.append(classes.ticker(announ, atype, xlink, timestamp, info[1],0))
 idata.sort(key=lambda x: x.time, reverse=True)
 return idata[0:classes.ticker.narticles]
Пример #3
0
def smera(doc2):
 doc = doc2.decode(encoding='UTF-8')
 atype = 'SMERA'

 soup = BeautifulSoup(doc,"lxml")
 gdata = soup.find_all("",{"class":"company"})

 idata=[]
 for items in gdata:
  announ = str(items.text).replace('View','')
  xlink = str(items.contents[1].get('href'))
  timestamp = int(time.time())
  info = ''
  idata.append(classes.ticker(announ, atype, xlink, timestamp, info,0))
  if(len(idata)==0): idata = None
 idata.sort(key=lambda x: x.time, reverse=True)
 return idata[0:classes.ticker.narticles]
Пример #4
0
def etimes(doc2):
 doc = doc2.decode(encoding='UTF-8')
 atype = 'etimes'
 feed = feedparser.parse(doc)
 i=0
 idata = []
 for post in feed.entries:
  i=i+1
  announ = str(post.title.encode('ascii', 'ignore'))
  xlink  = str(post.link.encode('ascii', 'ignore'))
  timestamp = int(time.mktime(post.updated_parsed))
  info = str(post.summary.encode('ascii', 'ignore')).split('>')
  #print announ
  #print xlink
  #print timestamp, util.ts_conv(timestamp)
  #print info[3]
  idata.append(classes.ticker(announ, atype, xlink, timestamp, info[3],0))
  #print i, announ, atype, xlink, timestamp, info[3]
  #pline2()
  #pline()
 idata.sort(key=lambda x: x.time, reverse=True)
 return idata[0:classes.ticker.narticles]