Exemplo n.º 1
0
    def test_well_formed(self):
        for feed_type in FeedHandlerTestCase.FEED_TYPES:
            text = None
            with open(f'api/tests/test_files/{feed_type}/well_formed.xml', 'r') as f:
                text = f.read()

            feed_handler.text_2_d(text)
Exemplo n.º 2
0
    def test_malformed(self):
        for feed_type in FeedHandlerTestCase.FEED_TYPES:
            text = None
            with open(f'api/tests/test_files/{feed_type}/malformed.xml', 'r') as f:
                text = f.read()

            with self.assertRaises(QueryException):
                feed_handler.text_2_d(text)
Exemplo n.º 3
0
def _save_feed(url):
    response = None
    try:
        response = rss_requests.get(url)
        response.raise_for_status()
    except requests.exceptions.RequestException:
        raise QueryException('feed not found', 404)

    with transaction.atomic():
        d = feed_handler.text_2_d(response.text)
        feed = feed_handler.d_feed_2_feed(d.feed, url)
        feed.with_subscription_data()
        feed.save()

        feed_entries = []
        for d_entry in d.get('entries', []):
            feed_entry = None
            try:
                feed_entry = feed_handler.d_entry_2_feed_entry(d_entry)
            except ValueError:  # pragma: no cover
                continue

            feed_entry.feed = feed
            feed_entries.append(feed_entry)

        models.FeedEntry.objects.bulk_create(feed_entries)

        return feed
Exemplo n.º 4
0
    def test_d_feed_2_feed_entry_no_content(self):
        text = None
        with open('api/tests/test_files/atom_1.0/well_formed_no_content.xml', 'r') as f:
            text = f.read()

        d = feed_handler.text_2_d(text)

        with self.assertRaises(ValueError):
            feed_handler.d_entry_2_feed_entry(d.entries[0])
Exemplo n.º 5
0
    def test_d_feed_2_feed_entry_plaintext(self):
        text = None
        with open('api/tests/test_files/atom_1.0/well_formed_text.xml', 'r') as f:
            text = f.read()

        d = feed_handler.text_2_d(text)

        feed_entry = feed_handler.d_entry_2_feed_entry(d.entries[0])
        self.assertIsInstance(feed_entry, models.FeedEntry)
Exemplo n.º 6
0
    def test_d_feed_2_feed_entry(self):
        for feed_type in FeedHandlerTestCase.FEED_TYPES:
            text = None
            with open(f'api/tests/test_files/{feed_type}/well_formed.xml', 'r') as f:
                text = f.read()

            d = feed_handler.text_2_d(text)

            feed_entry = feed_handler.d_entry_2_feed_entry(d.entries[0])
            self.assertIsInstance(feed_entry, models.FeedEntry)
Exemplo n.º 7
0
    def test_d_entry_2_entry_tags(self):
        for feed_type in FeedHandlerTestCase.FEED_TYPES:
            text = None
            with open(f'api/tests/test_files/{feed_type}/well_formed.xml', 'r') as f:
                text = f.read()

            d = feed_handler.text_2_d(text)

            entry_tags = feed_handler.d_entry_2_entry_tags(d.entries[0])
            self.assertIs(type(entry_tags), frozenset)

            self.assertGreater(len(entry_tags), 0, f'{feed_type} is empty')

            for entry_tag in entry_tags:
                self.assertIs(type(entry_tag), str)
Exemplo n.º 8
0
    def test_d_feed_2_feed(self):
        for feed_type in FeedHandlerTestCase.FEED_TYPES:
            text = None
            with open(f'api/tests/test_files/{feed_type}/well_formed.xml', 'r') as f:
                text = f.read()

            d = feed_handler.text_2_d(text)

            url = 'http://www.example.com'

            feed = feed_handler.d_feed_2_feed(d.feed, url)

            self.assertEqual(feed.feed_url, url)
            self.assertEqual(feed.title, d.feed.get('title'))
            self.assertEqual(feed.home_url, d.feed.get('link'))
Exemplo n.º 9
0
def scrape_feed(feed, response_text):
    d = feed_handler.text_2_d(response_text)

    new_feed_entries = []

    for d_entry in d.get('entries', []):
        feed_entry = None
        try:
            feed_entry = feed_handler.d_entry_2_feed_entry(d_entry)
        except ValueError:  # pragma: no cover
            continue

        old_feed_entry = None
        old_feed_entry_get_kwargs = {
            'feed': feed,
            'url': feed_entry.url,
        }
        if feed_entry.updated_at is None:
            old_feed_entry_get_kwargs['updated_at__isnull'] = True
        else:
            old_feed_entry_get_kwargs['updated_at'] = feed_entry.updated_at

        try:
            old_feed_entry = models.FeedEntry.objects.get(
                **old_feed_entry_get_kwargs)
        except models.FeedEntry.DoesNotExist:
            pass

        if old_feed_entry is not None:
            old_feed_entry.id = feed_entry.id
            old_feed_entry.content = feed_entry.content
            old_feed_entry.author_name = feed_entry.author_name
            old_feed_entry.created_at = feed_entry.created_at
            old_feed_entry.updated_at = feed_entry.updated_at

            old_feed_entry.save(update_fields=[
                'id', 'content', 'author_name', 'created_at', 'updated_at'
            ])
        else:
            feed_entry.feed = feed
            new_feed_entries.append(feed_entry)

    models.FeedEntry.objects.bulk_create(new_feed_entries)

    feed.db_updated_at = datetime.datetime.utcnow()
Exemplo n.º 10
0
def _generate_feed(url):  # pragma: testing-subscription-setup-daemon-do-subscription
    response = rss_requests.get(url)
    response.raise_for_status()

    d = feed_handler.text_2_d(response.text)
    feed = feed_handler.d_feed_2_feed(d.feed, url)
    feed.save()

    feed_entries = []

    for d_entry in d.get('entries', []):
        feed_entry = None
        try:
            feed_entry = feed_handler.d_entry_2_feed_entry(d_entry)
        except ValueError:
            continue

        feed_entry.feed = feed
        feed_entries.append(feed_entry)

    models.FeedEntry.objects.bulk_create(feed_entries)

    return feed
Exemplo n.º 11
0

# monkey-patch the feed_handler logging
feed_handler.logger = logger

parser = argparse.ArgumentParser()
parser.add_argument('feed_url')
parser.add_argument('-s', '--save', action='store_true')
parser.add_argument('-f', '--print-feed', action='store_true')
parser.add_argument('-e', '--print-entries', action='store_true')
args = parser.parse_args()

response = rss_requests.get(args.feed_url)
response.raise_for_status()

d = feed_handler.text_2_d(response.text)

feed = feed_handler.d_feed_2_feed(d.feed, args.feed_url)

feed_entries = []

for index, d_entry in enumerate(d.get('entries', [])):
    feed_entry = None
    try:
        feed_entry = feed_handler.d_entry_2_feed_entry(d_entry)
    except ValueError:  # pragma: no cover
        logger().exception(f'unable to parse d_entry {index}')
        continue

    feed_entry.feed = feed