def test_arxiv_to_classic(self): testfiles = glob.glob( os.path.join(os.path.dirname(__file__), 'data/arxiv.test/oai*')) shouldbe = [ f.replace('/oai', '/tagged/oai') + '.tagged' for f in testfiles ] for f, b in zip(testfiles, shouldbe): # Python 3 orders the properties dictionary differently if sys.version_info > (3, ) and os.path.exists( b.replace('/tagged/oai', '/tagged/python3/oai')): b = b.replace('/tagged/oai', '/tagged/python3/oai') if sys.version_info > (3, ): open_mode = 'rb' else: open_mode = 'rU' with open(f, open_mode) as fp: serializer = classic.Tagged() outputfp = StringIO() parser = arxiv.ArxivParser() document = parser.parse(fp) serializer.write(document, outputfp) testoutput = outputfp.getvalue() outputfp.close() if sys.version_info > (3, ): read_mode = 'r' else: read_mode = 'rU' with open(b, read_mode) as bp: self.assertEqual(testoutput, bp.read())
class TestATel(unittest.TestCase): import pytest def setUp(self): "Mock atel.ATelParser.urllib.urlopen" self.patcher = patch('urllib2.urlopen') self.urlopen_mock = self.patcher.start() def test_output(self): parser = atel.ATelParser() mock_infile = os.path.join(os.path.dirname(__file__), "data/stubdata/input/ATel_rss.xml") mock_data = open(mock_infile).read() self.urlopen_mock.return_value = MockResponse(mock_data) joss_url = 'http://www.astronomerstelegram.org/?adsbiblio' test_data = parser.parse(joss_url, data_tag='item') test_outfile = "test_atel.tag" standard_outfile = os.path.join(os.path.dirname(__file__), "data/stubdata/serialized/atel.tag") try: os.remove(test_outfile) except Exception, err: pass for d in test_data: serializer = classic.Tagged() outputfp = open(test_outfile, 'a') serializer.write(d, outputfp) outputfp.close() result = filecmp.cmp(test_outfile, standard_outfile) self.assertEqual(result, True) os.remove(test_outfile)
def test_output(self): parser = hstprop.HSTParser() mock_infile = os.path.join(os.path.dirname(__file__), "data/stubdata/input/hstprop.json") mock_data = json.loads(open(mock_infile).read()) self.get_batch_mock.return_value = mock_data api_url = 'https://proper.stsci.edu/proper/adsProposalSearch/query_test' token = 'foo' test_data = parser.parse(api_url, api_key=token, fromDate='2019-01-01', maxRecords=1, test=True) test_outfile = "test_hst.tag" standard_outfile = os.path.join( os.path.dirname(__file__), "data/stubdata/serialized/hstprop.tag") try: os.remove(test_outfile) except Exception as err: pass for d in test_data: serializer = classic.Tagged() outputfp = open(test_outfile, 'a') serializer.write(d, outputfp) outputfp.close() result = filecmp.cmp(test_outfile, standard_outfile) self.assertEqual(result, True) os.remove(test_outfile)
class TestProcSci(unittest.TestCase): def setUp(self): "Mock procsci.PoSParser.urllib.urlopen" self.patcher = patch('urllib.urlopen') self.urlopen_mock = self.patcher.start() def test_output(self): parser = procsci.PoSParser() mock_infile = os.path.join(os.path.dirname(__file__), "data/stubdata/input/pos_sissa_it_299.html") mock_data = open(mock_infile, 'rU').read() self.urlopen_mock.return_value = MockResponse(mock_data) test_data = parser.parse("https://pos.sissa.it/299") test_outfile = "test_pos.tag" standard_outfile = os.path.join(os.path.dirname(__file__), "data/stubdata/serialized/procsci_299.tag") try: os.remove(test_outfile) except Exception, err: pass for d in test_data: serializer = classic.Tagged() outputfp = open(test_outfile, 'a') serializer.write(d, outputfp) outputfp.close() result = filecmp.cmp(test_outfile, standard_outfile) self.assertEqual(result, True) os.remove(test_outfile)
class TestJOSS(unittest.TestCase): import pytest def setUp(self): "Mock joss.JOSSParser.urllib.urlopen" self.patcher = patch('urllib2.urlopen') self.urlopen_mock = self.patcher.start() def test_output(self): parser = joss.JOSSParser() mock_infile = os.path.join(os.path.dirname(__file__), "data/stubdata/input/joss_atom.xml") mock_data = open(mock_infile).read() self.urlopen_mock.return_value = MockResponse(mock_data) joss_url = 'https://joss.theoj.org/papers/published.atom' test_data = parser.parse(joss_url, since='2019-07-10', page=1) test_outfile = "test_joss.tag" standard_outfile = os.path.join(os.path.dirname(__file__), "data/stubdata/serialized/joss.tag") try: os.remove(test_outfile) except Exception, err: pass for d in test_data: serializer = classic.Tagged() outputfp = open(test_outfile, 'a') serializer.write(d, outputfp) outputfp.close() result = filecmp.cmp(test_outfile, standard_outfile) self.assertEqual(result, True) os.remove(test_outfile)
def test_pnas_parser(self): webdata_file = os.path.join(self.stubdata_dir, 'input', 'pnas_117_36_21873.xml') with open(webdata_file, open_mode_u) as fw: webdata = fw.read() parser = pnas.PNASParser() output = parser.parse(webdata) serializer = classic.Tagged() test_outfile = os.path.join(self.stubdata_dir, 'serialized', 'test_pnas.tag') if sys.version_info > (3, ): standard_outfile = os.path.join(self.stubdata_dir, 'serialized', 'python3', 'pnas.tag') else: standard_outfile = os.path.join(self.stubdata_dir, 'serialized', 'pnas.tag') try: os.remove(test_outfile) except Exception as err: pass with open(test_outfile, 'w') as fo: serializer.write(output, fo) result = filecmp.cmp(test_outfile, standard_outfile) self.assertEqual(result, True) os.remove(test_outfile)
def test_pnas_parser(self): mock_infile = os.path.join(self.stubdata_dir, 'input', 'pnas_feedparser.resp') mock_html_file = os.path.join(self.stubdata_dir, 'input', 'pnas_resp.html') mock_data = open(mock_infile, open_mode_u).read() mock_html = open(mock_html_file, open_mode_u).read() self.requests_mock.return_value.text = MockResponse(mock_html) feed = json.loads(mock_data) for _item in feed['entries']: absURL = _item['link'] parser = pnas.PNASParser() output = parser.parse(absURL) serializer = classic.Tagged() test_outfile = os.path.join(self.stubdata_dir, 'serialized', 'test_pnas.tag') if sys.version_info > (3,): standard_outfile = os.path.join(self.stubdata_dir, 'serialized', 'python3', 'pnas.tag') else: standard_outfile = os.path.join(self.stubdata_dir, 'serialized', 'pnas.tag') try: os.remove(test_outfile) except Exception as err: pass with open(test_outfile, 'w') as fo: serializer.write(output, fo) result = filecmp.cmp(test_outfile, standard_outfile) self.assertEqual(result, True) os.remove(test_outfile)
def test_arxiv_to_classic(self): testfiles = glob.glob(os.path.join(os.path.dirname(__file__), 'data/arxiv.test/oai*')) shouldbe = [f.replace('/oai', '/tagged/oai') + '.tagged' for f in testfiles] for f, b in zip(testfiles, shouldbe): with open(f, 'rU') as fp: serializer = classic.Tagged() outputfp = cStringIO.StringIO() parser = arxiv.ArxivParser() document = parser.parse(fp) serializer.write(document, outputfp) testoutput = outputfp.getvalue() outputfp.close() with open(b, 'rU') as bp: self.assertEqual(testoutput, bp.read())
def test_proquest_parser(self): infilename = 'SAO_NASA_Sep_2020.UNX' parser = proquest.ProQuestParser(infilename) parsed = parser.parse() serializer = classic.Tagged() standard_outfile = os.path.join(self.outputdir, 'SAO_NASA_Sep_2020.UNX.new') test_outfile = os.path.join(self.outputdir, 'test_proquest.UNX.new') try: os.remove(test_outfile) except Exception as err: pass with open(test_outfile, 'w') as fo: for rec in parser.results: serializer.write(rec, fo) result = filecmp.cmp(test_outfile, standard_outfile) self.assertEqual(result, True) os.remove(test_outfile)
def test_output(self): parser = procsci.PoSParser() mock_infile = "test_data/stubdata/input/pos_sissa_it_299.html" mock_data = open(mock_infile, 'rU').read() self.urlopen_mock.return_value = MockResponse(mock_data) test_data = parser.parse("https://pos.sissa.it/299") test_outfile = "test_pos.tag" standard_outfile = "test_data/stubdata/serialized/procsci_299.tag" try: os.remove(test_outfile) except: pass for d in test_data: serializer = classic.Tagged() outputfp = open(test_outfile, 'a') serializer.write(d, outputfp) outputfp.close() result = filecmp.cmp(test_outfile, standard_outfile) self.assertEqual(result, True) os.remove(test_outfile)
def test_output(self): parser = procsci.PoSParser() mock_infile = os.path.join(os.path.dirname(__file__), "data/stubdata/input/pos_sissa_it_299.html") mock_data = open(mock_infile, open_mode_u).read() self.requests_mock.return_value.text = MockResponse(mock_data) test_data = parser.parse("https://pos.sissa.it/299_test") test_outfile = "test_pos.tag" standard_outfile = os.path.join(os.path.dirname(__file__), "data/stubdata/serialized/procsci_299.tag") try: os.remove(test_outfile) except Exception as err: pass for d in test_data: serializer = classic.Tagged() outputfp = open(test_outfile, 'a') serializer.write(d, outputfp) outputfp.close() result = filecmp.cmp(test_outfile, standard_outfile) self.assertEqual(result, True) os.remove(test_outfile)
def test_proquest_parser(self): marc_filename = self.stubdata_dir + 'input/' + 'SAO_NASA_Sep_2020.UNX' oa_filename = marc_filename.replace('.UNX', '_OpenAccessTitles.csv') marcdata = open(marc_filename).read() oadata = open(oa_filename).read() parser = proquest.ProQuestParser(marcdata, oadata) parsed = parser.parse() serializer = classic.Tagged() standard_outfile = os.path.join(self.outputdir, 'SAO_NASA_Sep_2020.UNX.new') test_outfile = os.path.join(self.outputdir, 'test_proquest.UNX.new') try: os.remove(test_outfile) except Exception as err: pass with open(test_outfile, 'w') as fo: for rec in parser.results: serializer.write(rec, fo) result = filecmp.cmp(test_outfile, standard_outfile) self.assertEqual(result, True) os.remove(test_outfile)
def test_output(self): parser = atel.ATelParser() mock_infile = os.path.join(os.path.dirname(__file__), "data/stubdata/input/ATel_rss.xml") mock_data = open(mock_infile).read() self.urlopen_mock.return_value = MockResponse(mock_data) atel_url = 'http://www.astronomerstelegram.org/?adsbiblio.test' test_data = parser.parse(atel_url, data_tag='item') test_outfile = "test_atel.tag" standard_outfile = os.path.join(os.path.dirname(__file__), "data/stubdata/serialized/atel.tag") try: os.remove(test_outfile) except Exception as err: pass for d in test_data: serializer = classic.Tagged() outputfp = open(test_outfile, 'a') serializer.write(d, outputfp) outputfp.close() result = filecmp.cmp(test_outfile, standard_outfile) self.assertEqual(result, True) os.remove(test_outfile)
def test_output(self): parser = joss.JOSSParser() mock_infile = os.path.join(os.path.dirname(__file__), "data/stubdata/input/joss_atom.xml") mock_data = open(mock_infile).read() self.urlopen_mock.return_value = MockResponse(mock_data) joss_url = 'https://joss.theoj.org/papers/published.atom.test' test_data = parser.parse(joss_url, since='2019-07-10', page=1) test_outfile = "test_joss.tag" if sys.version_info > (3,): standard_outfile = os.path.join(os.path.dirname(__file__), "data/stubdata/serialized/python3/joss.tag") else: standard_outfile = os.path.join(os.path.dirname(__file__), "data/stubdata/serialized/joss.tag") try: os.remove(test_outfile) except Exception as err: pass for d in test_data: serializer = classic.Tagged() outputfp = open(test_outfile, 'a') serializer.write(d, outputfp) outputfp.close() result = filecmp.cmp(test_outfile, standard_outfile) self.assertEqual(result, True) os.remove(test_outfile)