示例#1
0
    def get(self):
        """Update news."""
        url = 'http://www.cecyt9.ipn.mx/Paginas/inicio.aspx'
        html = urlopen(url).read()
        soup = BeautifulSoup(html, 'lxml')
        newsBox = soup.findAll('div', {'class': 'ca-item-main'})

        news = {'news': []}

        for idx, notice in enumerate(newsBox):
            if notice.find('div', {'class': 'ca-text'}):
                noticeTitle = notice.find('div', {'class': 'ca-text'}).find('span').text.replace('\t', '').replace('\r', '').replace('\n', '')
                noticeImage = notice.find('div', {'class': 'ca-icon'}).find('img')['src']
                if noticeTitle[0] == ' ':
                    noticeTitle = noticeTitle[1:]
                    noticeTitle = noticeTitle.capitalize()
                    noticeLink = notice.find('a', {'class': 'ca-more'})['href']
                    noticeObject = {
                        'title': noticeTitle.encode('UTF-8'),
                        'link': noticeLink,
                        'image': noticeImage,
                    }
                    query = New.query(New.title == noticeTitle and
                                      New.url == noticeLink and
                                      New.image == noticeImage).fetch()
                    if len(query) > 0:
                        query[0].updated = datetime.now()
                        query[0].put()
                    else:
                        news_object = New(title=noticeTitle, url=noticeLink, image=noticeImage)
                        news_object.put()
                    news['news'].append(noticeObject)
        self.response.headers['Content-Type'] = 'application/json'
        self.response.write(json.dumps(news))
示例#2
0
    def find_news(self, method='xquery', search_text='', not_in=False):
        import sys
        reload(sys)
        sys.setdefaultencoding('utf8')

        news = []
        if method == '':
            method = 'xquery'

        for source in self.rss_urls.keys():
            r = requests.get(self.rss_urls[source])
            rss = r.content

            if method == 'xquery':
                query_s = """for $i in //item
						where contains(lower-case($i/title), lower-case('""" + search_text + """'))
						return <new>{$i/title, $i/pubDate, $i/link }</new>
						"""
                if not_in:
                    query_s = """for $i in //item
						where not(contains(lower-case($i/title), lower-case('""" + search_text + """')))
						return <new>{$i/title, $i/pubDate, $i/link }</new>
						"""

                news_list = sxq.execute_all(query_s, rss)
                for n_i in news_list:
                    i = etree.fromstring(n_i)
                    title = i.xpath("./title")[0]
                    pubDate = i.xpath("./pubDate")[0]
                    link = i.xpath("./link")[0]
                    n = New(title=title.text,
                            link=link.text,
                            pubdate=pubDate.text)
                    news.append(n)

            elif method == 'regexp':
                h = HTMLParser.HTMLParser()
                rss = h.unescape(rss)
                pattern = ur'<item>(.*?)</item>'
                regex = re.compile(pattern,
                                   re.DOTALL + re.UNICODE + re.IGNORECASE)
                for match in regex.finditer(rss):
                    item = match.group(1)

                    pattern = ur'<title>((.*?)' + search_text + '(.*?))</title>'
                    if not_in:
                        pattern = ur'<title>((.(?<!' + search_text + '))*?)</title>'

                    regex = re.compile(pattern,
                                       re.DOTALL + re.UNICODE + re.IGNORECASE)
                    matching = regex.search(item)
                    if matching:
                        title = matching.group(1)
                        n = New(title=title)
                        news.append(n)
        return news
示例#3
0
文件: views.py 项目: clllyw/Project
def addnew(request):
    if request.POST:
        post = request.POST
        u=Usert.objects.all()
        for mu in u:
            mu1=mu

        new_New = New(
        er = mu1.Username,
        Information= post["news"]) 
        new_New.save()        
    return render_to_response("news-t.html")
示例#4
0
 def get(self):
     """Response to GET request."""
     news = New.get_latests()
     news_dict = {'news': []}
     for new in news:
         news_dict['news'].append(new.to_dict())
     self.response.headers.add_header("Access-Control-Allow-Origin", "*")
     self.response.headers['Content-Type'] = 'application/json'
     return self.response.write(json.dumps(news_dict))
示例#5
0
 def get_all_news(self):
     news = []
     for source in self.rss_urls.keys():
         r = requests.get(self.rss_urls[source])
         root = etree.fromstring(r.content)
         items = root.xpath("//item")
         for i in items:
             title = i.xpath("./title")[0]
             pubDate = i.xpath("./pubDate")[0]
             link = i.xpath("./link")[0]
             n = New(title=title.text, link=link.text, pubdate=pubDate.text)
             news.append(n)
     return news
示例#6
0
def insertuser(url, headline, summary, datetime, section):  #将数据插入数据库中
    global articleurl
    articleurl = url
    with app.app_context():
        try:

            new_old = New(url=url,
                          datetime=datetime,
                          headline=headline,
                          summary=summary,
                          section=section)
            db.session.add(new_old)
            db.session.commit()
            new = New.query.filter(New.url == url).first()

            return new
        except Exception as ce:

            logger.error(ce)