示例#1
0
    def fetch_and_save(self):
        rss_text = self.fetch()
        arxiv_xml = ArxivXml(rss_text)
        date = arxiv_xml.get_date()
        if not date:
            raise RssParseError('Could not parse dc:date.')

        if RssFetchHistory.exists(self._subject.id, date):
            rss_fetch_history = self._subject.rss_fetch_histories.create(
                date=date,
                is_duplicated=True,
            )
            return []

        rss_fetch_history = self._subject.rss_fetch_histories.create(date=date)
        papers = []
        for paper_item in arxiv_xml.get_paper_items():
            # avoid Google Tranlate API limit (100,000chars/100sec)
            if papers and len(papers) % 50 == 0:
                time.sleep(100)

            paper = Paper.from_xml(paper_item)
            rss_fetch_history.papers.add(paper, bulk=False)
            paper.add_authors(paper_item['authors'])
            papers.append(paper)

        return papers
示例#2
0
    def test_from_xml(self, _):
        arxiv_paper_item = {
            'title': 'Some title.(arXiv:1611.07078v2 [cs.AI] UPDATED)',
            'abstract': 'ABSTRACT',
            'link': 'LINK',
        }
        result = Paper.from_xml(arxiv_paper_item)

        self.assertIsInstance(result, Paper)
        self.assertEqual(result.title,
                         'Some title.(arXiv:1611.07078v2 [cs.AI] UPDATED)')
        self.assertEqual(result.abstract, 'ABSTRACT')
        self.assertEqual(result.link, 'LINK')
        self.assertEqual(result.subject, 'cs.AI')
        self.assertEqual(result.submit_type, Paper.SUBMIT_TYPE_UPDATED)