Exemplos de Mongo.selectFeedById em Python

Linguagem de programação: Python

Espaço para nome / nome do pacote: feedReader.mongoFunctions

Classe / Tipo: Mongo

Método / Função: selectFeedById

Exemplos em hotexamples.com: 1

Mongo.selectFeedById em Python - 1 exemplos encontrados. Esses são os exemplos do mundo real mais bem avaliados de feedReader.mongoFunctions.Mongo.selectFeedById em Python extraídos de projetos de código aberto. Você pode avaliar os exemplos para nos ajudar a melhorar a qualidade deles.

Métodos Frequentes

Exibir Ocultar

insertFeeds(1)

selectFeedById(1)

selectFeeds(1)

selectProcessedFeeds(1)

selectUnProcessedFeeds(1)

selectUser(1)

updateDepValues(1)

updateUserPref(1)

Métodos Frequentes

insertFeeds (1)

selectFeedById (1)

selectFeeds (1)

selectProcessedFeeds (1)

selectUnProcessedFeeds (1)

selectUser (1)

updateDepValues (1)

updateUserPref (1)

Exemplo n.º 1

0

Exibir arquivo

Arquivo: ParsingFunctions.py Projeto: rohithb/threeSixtyOne

class ParsingFuncs: ''' Contains all functions to get and parse the feeds ''' def __init__(self): ''' Initialise mongodb connection ''' self.mongo = Mongo() def fetchFeeds(self): ''' Fetches all the entries in the table siteInfo and fetches its feeds .. Stores to the database ''' siteList = SiteInfo.objects.all() for site in siteList: modifiedStr = self.createLastModifiedStr(site.lastModified, site.etag) if modifiedStr is not None: feeds = feedparser.parse(site.feedUrl, modifiedStr) else: feeds = feedparser.parse(site.feedUrl) # find the last modified date. This value will be in feed.updated, feed.last_modified lastModified = self.findLastModifiedDate(feeds.feed) try: etag = feeds.feed.etag except: etag = None feedsHash = self.md5Feeds(feeds) # calculating the hash of entire feeds if(site.feedHash == feedsHash): continue # if no change in feeds ignore it. site.feedHash = feedsHash # if changed save it in db if etag is None: site.lastModified = lastModified else: site.etag = etag site.save() for entry in feeds.entries: dt = datetime.fromtimestamp(mktime(entry.published_parsed)) # the format of published_parsed is not.. entry['published_parsed'] = dt # compactible with mongodb try: mediaContnet = entry['media_content'] except: mediaContnet = None try: content = entry['content'][0]['value'] except: content = None entry['image_link'] = self.getImage(media_content=mediaContnet, summary=entry['summary'], content=content, link=entry['link']) self.mongo.insertFeeds(entry, site.id) def allFeeds(self,user_id, lastDate=None): if lastDate is not None: return self.mongo.selectFeeds(user_id=user_id,dateOfLastItem=lastDate) return self.mongo.selectFeeds(user_id=user_id) def md5Feeds(self, feed): ''' find md5 of feed ''' md5 = hashlib.md5(str(feed).encode('utf-8')) return md5.hexdigest() def selectFeedById(self, id): return self.mongo.selectFeedById(id) def getSiteTitle(self, siteId): siteObject = SiteInfo.objects.filter(id=siteId) for site in siteObject: return site.title def getSummary(self, summary): summary1000wds = strip_tags(summary) summary1000wds = summary1000wds[:300] + "..." return summary1000wds def getFullPost(self, summaryDetail): post = strip_tags(summaryDetail) return post def createLastModifiedStr(self, last_modified=None, etag=None): modiStr = None if etag is not None: modiStr = "etag = " + str(etag) if last_modified is not None: modiStr = "modified = " + str(last_modified.utctimetuple()) return modiStr def findLastModifiedDate(self, feed): try: last_modified = datetime.fromtimestamp(mktime(feed.updated_parsed)) # if updated date is present except: try: last_modified = datetime.fromtimestamp(mktime(feed.date_parsed)) # if date field is present except: try: # if published_parsed is present last_modified = datetime.fromtimestamp(mktime(feed.published_parsed)) except: structTime = time.localtime() last_modified = datetime(*structTime[:6]) return last_modified def getFullPostURLOpen(self, link, summary): http = urllib3.PoolManager() #req = Request(link, headers={'User-Agent': "ireadr"}) try: page = http.request('GET', link) page = page.data #page = urlopen(req) except: page = None if page is not None: soup = BeautifulSoup(page) summary = summary[:25] # modify this like check match for entire summary. #if not found find a substring of length 50 and check again # if again not found then reduce the length and try again element = soup.find(text=re.compile(summary)) post = element.findParent('div') return post return None def findImgsrcFromHtml(self, content): soup = BeautifulSoup(content) img_links = soup.findAll('img') if len(img_links) > 0: for link in img_links: try: if link['height'] == '1' or link['width'] == '1': continue else: return (link['src']) except: return (link['src']) return None def getImage(self, media_content=None, summary=None, content=None, link=None): if media_content is not None: return (media_content[0]['url']) # if media_content is None if summary is not None: return (self.findImgsrcFromHtml(summary)) # if no matching image is found in summary if content is not None: return (self.findImgsrcFromHtml(content)) # if image is not found in content then fetch the original page and extract the image if link is not None: post = self.getFullPostURLOpen(link, summary) if post is not None: return(self.findImgsrcFromHtml(post)) return None