Exemplo n.º 1
0
 def test_slugify_on_save(self):
     item = NewsItem(title="My News Item",
                     publish_date=now(),
                     create_user_id=1,
                     feed_id=1)
     item.save()
     self.assertEquals(item.slug, 'my-news-item')
Exemplo n.º 2
0
 def test_slugify_on_save(self):
     item = NewsItem(title="My News Item",
                     publish_date=now(),
                     create_user_id=1,
                     feed_id=1)
     item.save()
     self.assertEquals(item.slug, 'my-news-item')
Exemplo n.º 3
0
def add(request):
    if request.method == "POST":
        feed_id = request.POST.get("feed")
        title = request.POST.get("title")
        link = request.POST.get("link")
        summary = request.POST.get("summary")
        date = request.POST.get("date")

        if not feed_id or not title or not summary:
            return HttpResponseBadRequest(
                "Bad Request: must include feed, title, and summary\n")

        if date:
            try:
                date = dateutil.parser.parse(date)
            except:
                return HttpResponseBadRequest(
                    "Bad Request: invalid date format\n")
        else:
            date = datetime.datetime.now()

        if not link:
            link = ""

        try:
            feed = NewsFeed.objects.get(id=feed_id)
        except NewsFeed.DoesNotExist:
            return HttpResponseNotFound("Not Found: no such feed id\n")

        item = NewsItem(feed=feed,
                        title=title,
                        link=link,
                        date=date,
                        summary=summary)
        item.save()

        # get previous item id
        prev_id = None
        try:
            prev_items = NewsItem.objects.filter(
                feed=feed, id__lt=item.id).order_by("-id")[:1]
            if len(prev_items) > 0:
                prev_id = prev_items[0].id
        except:
            pass

        out = dict()
        if prev_id:
            out["prev_id"] = str(prev_id)
        out["items"] = [item.to_json()]
        hr = dict()
        hr["body"] = json.dumps(out) + "\n"

        fanout_publish("feed-" + str(feed.id), str(item.id), str(prev_id), hr)

        return HttpResponse("Posted\n")
    else:
        return HttpResponseNotAllowed(["POST"])
Exemplo n.º 4
0
def create_news_items(is_sticky=False, amount=1):
    for i in range(0, amount):
        item = NewsItem(title='silly news item name',
                        publish_date=now(),
                        published=True,
                        create_user_id=1,
                        feed_id=randint(1, 2),
                        sticky=is_sticky)
        item.save()
Exemplo n.º 5
0
def get_items():
    page = int(request.query.page or 1)
    limit = int(request.query.limit or 100)
    items = NewsItem.select().paginate(page, limit)
    result = []
    for item in items:
        result.append(model_to_dict(item))

    pagination = get_pagination_object(NewsItem.select(), page, limit)
    response.set_header('content-type', 'application/json')
    response_obj = {"data": result, "pagination": pagination}
    return json.dumps(response_obj, cls=MyEncoder)
Exemplo n.º 6
0
Arquivo: views.py Projeto: fanout/demo
def add(request):
	if request.method == "POST":
		feed_id = request.POST.get("feed")
		title = request.POST.get("title")
		link = request.POST.get("link")
		summary = request.POST.get("summary")
		date = request.POST.get("date")

		if not feed_id or not title or not summary:
			return HttpResponseBadRequest("Bad Request: must include feed, title, and summary\n")

		if date:
			try:
				date = dateutil.parser.parse(date)
			except:
				return HttpResponseBadRequest("Bad Request: invalid date format\n")
		else:
			date = datetime.datetime.now()

		if not link:
			link = ""

		try:
			feed = NewsFeed.objects.get(id=feed_id)
		except NewsFeed.DoesNotExist:
			return HttpResponseNotFound("Not Found: no such feed id\n")

		item = NewsItem(feed=feed, title=title, link=link, date=date, summary=summary)
		item.save()

		# get previous item id
		prev_id = None
		try:
			prev_items = NewsItem.objects.filter(feed=feed, id__lt=item.id).order_by("-id")[:1]
			if len(prev_items) > 0:
				prev_id = prev_items[0].id
		except:
			pass

		out = dict()
		if prev_id:
			out["prev_id"] = str(prev_id)
		out["items"] = [item.to_json()]
		hr = dict()
		hr["body"] = json.dumps(out) + "\n"

		fanout_publish("feed-" + str(feed.id), str(item.id), str(prev_id), hr)

		return HttpResponse("Posted\n")
	else:
		return HttpResponseNotAllowed(["POST"])
Exemplo n.º 7
0
def get_item(news_id):
    response.set_header("content-type", "application/json")
    try:
        item = NewsItem.select().where(NewsItem.id == news_id).get()
        obj = model_to_dict(item)
        response_obj = {"data": obj}
        return json.dumps(response_obj, cls=MyEncoder)
    except DoesNotExist:
        abort(404)
Exemplo n.º 8
0
    def post(self): 
        # takes a NewsItem key and adds the loggeg-in user to votes
        # returns the news object with the updated vote count ?
        user = users.get_current_user()
        if user: 
            key = cgi.escape(self.request.get('key'))
            if key: 
                item = NewsItem.get_by_key_name(key)  
                if user not in item.bumps: 
                    item.bumps.append(user)
                    item.put()  
                    self.response.out.write(item.key().name())

        else:
            self.error(401) 
Exemplo n.º 9
0
 def post(self):
     key = self.request.get('key')
     feed = NewsFeed.get_by_key_name(key)
     # FIXME check if feed was retrieved
     result = urlfetch.fetch(feed.url)
     if result.status_code == 200:
         rssfeed = feedparser.parse(result.content)
         for i in rssfeed.entries:
             item = NewsItem(key_name=i.guid)
             item.url = i.link
             item.title = i.title
             item.text = i.summary
             item.date = datetime.datetime(*i.date_parsed[:6])
             item.orderdate = datetime.datetime(*i.date_parsed[:6])
             item.source = feed
             item.put()
         feed.last_fetch = datetime.datetime.now() 
         feed.put() 
         taskqueue.add(queue_name='fetch-news-queue', url='/admin/feeds/fetch/', params={'key':feed.key})
         self.response.out.write('feed pulled')
     else:  
         self.error(500)
Exemplo n.º 10
0
    def get(self): 
        url = settings.YAHOO_PIPE % 'rss'  
        result = urlfetch.fetch(url) 
        if result.status_code == 200:
            feed = feedparser.parse(result.content) 
            for i in feed.entries:  
                item = NewsItem(key_name=i.guid) 
                item.url = i.link
                item.title = i.title 
                item.text = i.summary
                item.date = datetime.datetime(*i.date_parsed[:6])
                item.orderdate = datetime.datetime(*i.date_parsed[:6])
                item.put() 

            items = db.GqlQuery("SELECT * FROM NewsItem ORDER BY orderdate DESC LIMIT 100")
 
            context = {'news':items }
            #context = add_user_to_context(context)
            self.response.out.write(
               template.render(tmpl('templates/news2.html'),
               context ))
        else: 
            self.response.out.write('err') 
Exemplo n.º 11
0
CUR_DIR = os.path.dirname(os.path.realpath(__file__))

from models import NewsItem

# Set up the template engine to look in the current directory
template_loader = jinja2.FileSystemLoader('templates')
template_env = jinja2.Environment(loader=template_loader)

# Load the template file
template_file = "report.tpl.txt"
template = template_env.get_template(template_file)

# Load all the news items from the past week
seven_days_ago_ts = arrow.utcnow().ceil('hour').replace(days=-2).timestamp
news_items = NewsItem.select().where(NewsItem.published_ts>seven_days_ago_ts)

# Create a dictionary with all the stories grouped by source
sources = {}
for news_item in news_items:

    if not sources.has_key(news_item.source):
        sources[news_item.source] = {
            'items': [],
            'name': news_item.source,
        }
        
    # Add the news item
    sources[news_item.source]['items'].append(news_item)

# Sort the news items for each key
Exemplo n.º 12
0
h1_el = soup.find('h1', text='Weather Articles')
tr_els = h1_el.findAllNext('tr')

for tr_el in tr_els:
    
    # Make sure the URL is absolute
    link = tr_el.a['href'].strip()
    if not link.startswith('http'):
        link = 'http://www.wunderground.com' + link
    
    # Create a hash from the URL to make a unique identifier
    url_hash = hashlib.md5(link).hexdigest()

    # See if the item already exists
    try:
        item = NewsItem.get(NewsItem.url_hash==url_hash)
        print 'Item Exists. Skipping.'
        continue
    except peewee.DoesNotExist:
        print 'Creating new item.'
        item = NewsItem()

    summary = tr_el.p.text.strip()
    headline = tr_el.h3.text.strip()

    # Try to get the opengraph data
    try:
        link_request = requests.get(link)
        links_soup = bs4.BeautifulSoup(link_request.text, 'html.parser')
        meta_og_title_el = links_soup.find('meta', {'property': 'og:title'})
        meta_og_desc_el = links_soup.find('meta', {'property': 'og:description'})
Exemplo n.º 13
0
# Set up the template engine to look in the current directory
template_loader = jinja2.FileSystemLoader('templates')
template_env = jinja2.Environment(loader=template_loader)

# Adding filters to enviroment to make them visible in the template
template_env.filters['format_date'] = reformat_date
template_env.filters['get_source_url'] = get_source_url

# Load the template file
template_file = "index.tpl.html"
template = template_env.get_template(template_file)

# Load all the news items
three_days_ago = arrow.utcnow().to('US/Eastern').replace(
    hours=-72).format('YYYY-MM-DD')
news_items = NewsItem.select().where(NewsItem.published_date > three_days_ago,
                                     NewsItem.hidden == 0)
news_items.order_by(NewsItem.published_ts)

# Render the template
context = {
    'news_items': news_items,
    'updated_eastern': arrow.utcnow().to('US/Eastern')
}
output = template.render(context)

# Save the output
filepath = os.path.join(CUR_DIR, 'output/sources.html')
with codecs.open(filepath, 'w', 'utf-8') as f:
    f.write(output)
Exemplo n.º 14
0
import peewee
import sys

from models import NewsItem

if len(sys.argv) < 2:
    sys.exit('Usage: $ python hide_story.py <story id>')
else:
    id = sys.argv[1]
    
try:
    id = int(id)
except ValueError:
    sys.exit("Invalid Story ID")

# Try to fetch the item
try:
    item = NewsItem.get(NewsItem.id==id)
except peewee.DoesNotExist:
    sys.exit("Error! News Item with ID %d not found" % id)

print 'You are attempting to hide story id %d' % id
print 'Headline: %s' % item.title

confirm = raw_input("Are you sure? Y/n: ")
if confirm == 'Y':
    item.hidden = True
    item.save()
Exemplo n.º 15
0
        'travel',
    )

    # See if any of the skippable ids are in the story ids
    if pcollid in skippable_collection_ids:
        print 'Skipping %s story' % pcollid
        continue

    # If it's also published on weather underground, skip it
    if 'wunderground' in tags:
        print 'Skipping Weather Underground Story'
        continue

    # See if the story already exists
    try:
        item = NewsItem.get(NewsItem.url_hash == url_hash)
        print 'Item Exists. Skipping.'
        continue
    except peewee.DoesNotExist:
        print 'Creating new item.'
        item = NewsItem()

    item.url_hash = url_hash
    item.title = unidecode.unidecode(entry['title'].strip())
    item.summary = unidecode.unidecode(entry['description'].strip())
    item.source = "Weather Channel"
    item.link = link
    item.published_date = published_date
    item.published_ts = published_ts
    item.inserted_ts = arrow.utcnow().timestamp
Exemplo n.º 16
0
request = requests.get(source_url)
soup = bs4.BeautifulSoup(request.text, 'html.parser')
article_els = soup.findAll('article', class_='story_list span3 col')

for article_el in article_els:

    div_el = article_el.find('div', class_='content')

    link = 'http://www.sandiegouniontribune.com' + div_el.a['href']
    
    # Create a hash from the URL to make a unique identifier
    url_hash = hashlib.md5(link).hexdigest()

    # See if the item already exists
    try:
        item = NewsItem.get(NewsItem.url_hash==url_hash)
        print 'Item Exists. Skipping.'
        continue
    except peewee.DoesNotExist:
        print 'Creating new item.'
        item = NewsItem()

    date = div_el.find('p', class_='date').text.replace('Updated', '').strip()
    dt = dateutil.parser.parse(date)
    dt = dt.replace(tzinfo=pytz.timezone('US/Pacific'))
    published_date = arrow.get(dt).date().strftime('%Y-%m-%d')

    headline = div_el.a.text.strip()
    published_ts = arrow.get(dt).to('UTC').timestamp
    summary = ''
Exemplo n.º 17
0
#! /usr/bin/env python

## To be Run every hour

from scrap import UnilagScrap as Scrap
from models import NewsItem
from datetime import datetime

scrapper = Scrap()

news_items = scrapper.get_news_items()

for item in news_items:
    ## let's insert stuff
    # first check that it does not already exist!
    to_db = NewsItem.select().where(NewsItem.slug == item['slug'])
    if len(to_db) == 0:
        # item has not been in the db before
        to_db = NewsItem(news_hash=item['news_hash'],
                         slug=item['slug'],
                         news_title=item['title'],
                         news_link=item['link'],
                         date_updated=item['date_updated'],
                         intro_text=item['intro_text'],
                         scrapped_at=datetime.now())
        to_db.save()
    else:
        to_db = to_db.get()
        # check if the hash has changed
        if to_db.news_hash != item['news_hash']:
            to_db.news_hash = item['news_hash']
Exemplo n.º 18
0
source_url = 'http://blogs.seattletimes.com/today/category/weather-beat/feed/'
feed = feedparser.parse(source_url)
entries = feed.entries

for entry in entries:
    
    link = entry.link
    url_hash = hashlib.md5(link).hexdigest()
    date = entry.published_parsed

    published_date = arrow.get(date).to('US/Pacific').date().strftime('%Y-%m-%d')
    published_ts = arrow.get(date).to('US/Pacific').to('UTC').timestamp

    # See if we already have this story
    try:
        NewsItem.get(NewsItem.url_hash==url_hash)
        print 'Item exists. Skipping.'
        continue
    except peewee.DoesNotExist:
        print 'Creating item.'
        item = NewsItem()

    headline = entry.title
    summary = entry.summary

    item.url_hash = url_hash
    item.link = link
    item.source = 'Seattle Times'
    item.title = headline
    item.summary = summary
    item.published_date = published_date
Exemplo n.º 19
0
        'travel',
    )
    
    # See if any of the skippable ids are in the story ids
    if pcollid in skippable_collection_ids:
        print 'Skipping %s story' % pcollid
        continue

    # If it's also published on weather underground, skip it
    if 'wunderground' in tags:
        print 'Skipping Weather Underground Story'
        continue

    # See if the story already exists
    try:
        item = NewsItem.get(NewsItem.url_hash==url_hash)
        print 'Item Exists. Skipping.'
        continue
    except peewee.DoesNotExist:
        print 'Creating new item.'
        item = NewsItem()

    item.url_hash = url_hash
    item.title = unidecode.unidecode(entry['title'].strip())
    item.summary = unidecode.unidecode(entry['description'].strip())
    item.source = "Weather Channel"
    item.link = link
    item.published_date = published_date
    item.published_ts = published_ts
    item.inserted_ts = arrow.utcnow().timestamp
Exemplo n.º 20
0
import peewee
import sys

from models import NewsItem

if len(sys.argv) < 2:
    sys.exit('Usage: $ python hide_story.py <story id>')
else:
    id = sys.argv[1]

try:
    id = int(id)
except ValueError:
    sys.exit("Invalid Story ID")

# Try to fetch the item
try:
    item = NewsItem.get(NewsItem.id == id)
except peewee.DoesNotExist:
    sys.exit("Error! News Item with ID %d not found" % id)

print 'You are attempting to hide story id %d' % id
print 'Headline: %s' % item.title

confirm = raw_input("Are you sure? Y/n: ")
if confirm == 'Y':
    item.hidden = True
    item.save()
Exemplo n.º 21
0
h1_el = soup.find('h1', text='Weather Articles')
tr_els = h1_el.findAllNext('tr')

for tr_el in tr_els:

    # Make sure the URL is absolute
    link = tr_el.a['href'].strip()
    if not link.startswith('http'):
        link = 'http://www.wunderground.com' + link

    # Create a hash from the URL to make a unique identifier
    url_hash = hashlib.md5(link).hexdigest()

    # See if the item already exists
    try:
        item = NewsItem.get(NewsItem.url_hash == url_hash)
        print 'Item Exists. Skipping.'
        continue
    except peewee.DoesNotExist:
        print 'Creating new item.'
        item = NewsItem()

    summary = tr_el.p.text.strip()
    headline = tr_el.h3.text.strip()

    # Try to get the opengraph data
    try:
        link_request = requests.get(link)
        links_soup = bs4.BeautifulSoup(link_request.text, 'html.parser')
        meta_og_title_el = links_soup.find('meta', {'property': 'og:title'})
        meta_og_desc_el = links_soup.find('meta',
Exemplo n.º 22
0
        'PM Update',
    )

    # Skip the story if it starts with "D.C. area forecast"
    prefix_match = False
    for prefix in skippable_headline_prefixes:
        if entry.title.startswith(prefix):
            prefix_match = True

    if prefix_match:
        print 'Skipping story'
        continue

    # See if we already have this story
    try:
        NewsItem.get(NewsItem.url_hash == url_hash)
        print 'Item exists. Skipping.'
        continue
    except peewee.DoesNotExist:
        print 'Creating item.'
        item = NewsItem()

    soup = bs4.BeautifulSoup(entry.description, 'html.parser')
    item.summary = unidecode.unidecode(soup.text.strip())
    item.title = unidecode.unidecode(entry.title)

    item.url_hash = url_hash
    item.link = link
    item.authors = ''
    item.source = 'Capital WX Gang'
    item.published_date = published_date
Exemplo n.º 23
0
        'PM Update',
    )

    # Skip the story if it starts with "D.C. area forecast"
    prefix_match = False
    for prefix in skippable_headline_prefixes:
        if entry.title.startswith(prefix):
            prefix_match = True
    
    if prefix_match:
        print 'Skipping story'
        continue
    
    # See if we already have this story
    try:
        NewsItem.get(NewsItem.url_hash==url_hash)
        print 'Item exists. Skipping.'
        continue
    except peewee.DoesNotExist:
        print 'Creating item.'
        item = NewsItem()

    soup = bs4.BeautifulSoup(entry.description, 'html.parser')
    item.summary = unidecode.unidecode(soup.text.strip())
    item.title = unidecode.unidecode(entry.title)

    item.url_hash = url_hash
    item.link = link
    item.authors = ''
    item.source = 'Capital WX Gang'
    item.published_date = published_date
Exemplo n.º 24
0
    headline = li_el.find('h4').text
    date = li_el.find('h5').text
    description = li_el.find('p').text
    
    # Parse the date
    dt = dateutil.parser.parse(date)
    dt = dt.replace(tzinfo=pytz.timezone('US/Eastern'))
    utc_dt = arrow.get(dt).to('UTC')
    published_date = arrow.get(dt).date().strftime('%Y-%m-%d')
    
    # Create a unique identifier from the hash of the URL
    url_hash = hashlib.md5(link).hexdigest()
    
    # See if the story already exists
    try:
        item = NewsItem.get(NewsItem.url_hash==url_hash)
        print 'Item Exists. Skipping.'
        continue
    except peewee.DoesNotExist:
        print 'Creating new item.'
        item = NewsItem()

    # Try to get the opengraph data
    try:
        link_request = requests.get(link)
        links_soup = bs4.BeautifulSoup(link_request.text, 'html.parser')
        meta_og_title_el = links_soup.find('meta', {'property': 'og:title'})
        meta_og_desc_el = links_soup.find('meta', {'property': 'og:description'})
        meta_og_url_el = links_soup.find('meta', {'property': 'og:url'})
    except Exception, e:
        meta_og_title_el = None
Exemplo n.º 25
0
feed = feedparser.parse(source_url)
entries = feed.entries

for entry in entries:

    link = entry.link
    url_hash = hashlib.md5(link).hexdigest()
    date = entry.published_parsed

    published_date = arrow.get(date).to('US/Pacific').date().strftime(
        '%Y-%m-%d')
    published_ts = arrow.get(date).to('US/Pacific').to('UTC').timestamp

    # See if we already have this story
    try:
        NewsItem.get(NewsItem.url_hash == url_hash)
        print 'Item exists. Skipping.'
        continue
    except peewee.DoesNotExist:
        print 'Creating item.'
        item = NewsItem()

    headline = entry.title
    summary = entry.summary

    item.url_hash = url_hash
    item.link = link
    item.source = 'Seattle Times'
    item.title = headline
    item.summary = summary
    item.published_date = published_date
Exemplo n.º 26
0
        return '#'

# Set up the template engine to look in the current directory
template_loader = jinja2.FileSystemLoader('templates')
template_env = jinja2.Environment(loader=template_loader)

# Adding filters to enviroment to make them visible in the template
template_env.filters['format_date'] = reformat_date
template_env.filters['get_source_url'] = get_source_url

# Load the template file
template_file = "index.tpl.html"
template = template_env.get_template(template_file)

# Load all the news items
three_days_ago = arrow.utcnow().to('US/Eastern').replace(hours=-72).format('YYYY-MM-DD')
news_items = NewsItem.select().where(
    NewsItem.published_date > three_days_ago,
    NewsItem.hidden == 0
)
news_items.order_by(NewsItem.published_ts)

# Render the template
context = {'news_items': news_items, 'updated_eastern': arrow.utcnow().to('US/Eastern') }
output = template.render(context)

# Save the output
filepath = os.path.join(CUR_DIR, 'output/sources.html')
with codecs.open(filepath, 'w', 'utf-8') as f:
    f.write(output)