def run(journal, num_articles): print "Running publication-dates version 1.1\n" # Setup output file, set parameters, and use brief run if testing writer = FileWriter(journal) num_volumes = 18 # 18 volumes per year issue = 1 # sample issue for each volume if len(sys.argv) > 1: print "Testing....." num_articles = 3 num_volumes = 1 # Sample papers accepted in previous year date = html.detect_start_volume() start_volume = date[0] acceptance_year = date[1] counter = 0 volumes = range(start_volume - num_volumes + 1, start_volume + 1) for volume in reversed(volumes): # Go to volume/issue contents page, and extract URLs of articles articles = html.build_urls(journal, volume, issue) for num in range(1, num_articles + 1): # For first 'num_articles' in this volume/issue, try to extract date string from article webpage url = articles[num] try: date_string = html.get_date_div(url) counter += 1 except: print "Some error occurred (URL '", url, "' not available?). Skipping." break article = Article(date_string) if article.get_year() == acceptance_year: writer.write_to_file(article) writer.close_file() return counter
def test_html_extracts_article_urls_from_contents_page_apjl(self): journal = "APJL" volume = self.test_vol issue = self.test_iss expectedArticleUrls = [ "http://iopscience.iop.org/article/10.3847/2041-8213/836/2/L17", "http://iopscience.iop.org/article/10.3847/2041-8213/aa5dab", "http://iopscience.iop.org/article/10.3847/2041-8213/aa5cb0", "http://iopscience.iop.org/article/10.3847/2041-8213/aa5eb0", "http://iopscience.iop.org/article/10.3847/2041-8213/aa5dee" ] #URLs of first 5 articles in this issue articleUrls = html.build_urls(journal, volume, issue) #URLs of all articles in the issue self.assertEqual(expectedArticleUrls, articleUrls[:5])
def test_html_extracts_article_urls_from_contents_page_apj(self): journal = "APJ" volume = self.test_vol issue = self.test_iss expectedArticleUrls = [ "http://iopscience.iop.org/article/10.3847/1538-4357/aa5be8", "http://iopscience.iop.org/article/10.3847/1538-4357/aa5b8b", "http://iopscience.iop.org/article/10.3847/1538-4357/aa5b88", "http://iopscience.iop.org/article/10.3847/1538-4357/836/2/152", "http://iopscience.iop.org/article/10.3847/1538-4357/836/2/153" ] #URLs of first 5 articles in this issue articleUrls = html.build_urls(journal, volume, issue) #URLs of all articles in the issue self.assertEqual(expectedArticleUrls, articleUrls[:5])