示例#1
0
 def test_download_file(self):
     """Test if download_file works."""
     httpretty.register_uri(httpretty.GET,
                            "http://example.com/test.txt",
                            body="Lorem ipsum\n",
                            status=200)
     file_fd, file_name = tempfile.mkstemp()
     os.close(file_fd)
     download_file("http://example.com/test.txt", file_name)
     self.assertEqual("Lorem ipsum\n", open(file_name).read())
示例#2
0
 def test_download_file(self):
     """Test if download_file works."""
     httpretty.register_uri(
         httpretty.GET,
         "http://example.com/test.txt",
         body="Lorem ipsum\n",
         status=200
     )
     file_fd, file_name = tempfile.mkstemp()
     os.close(file_fd)
     download_file("http://example.com/test.txt", file_name)
     self.assertEqual("Lorem ipsum\n", open(file_name).read())
    def _attach_fulltext(self, rec, doi):
        url = 'http://dx.doi.org/' + doi
        page = requests.get(url)
        #url after redirect
        url = page.url
        page = page.text
        parsed_uri = urlparse(url)
        domain = '{uri.scheme}://{uri.netloc}'.format(uri=parsed_uri)
        page = BeautifulSoup(page)
        try:
            if 'epjconf' in doi:
                div = page.body.find('div', attrs={'id': 'header'})
            else:
                div = page.body.find('div', attrs={
                    'class': 'module_background files'
                })
            links = div.findAll('a')
        except AttributeError:
            return
        for pdf in links:
            if pdf['href'].endswith('pdf'):
                link_to_pdf = domain + pdf['href']
                record_add_field(rec, '856', ind1='4',
                                 subfields=[('u', link_to_pdf),
                                            ('y', 'EDP Sciences server')])
                out_folder = join(CFG_EDPSCIENCE_OUT_FOLDER,
                                  "fulltexts")
                try:
                    makedirs(out_folder)
                    filename = join(out_folder,
                                    link_to_pdf.split('/')[-1])
                except (IOError, OSError):
                    # Problem creating folder
                    filename = None

                filename = download_file(from_url=link_to_pdf,
                                         to_filename=filename,
                                         retry_count=5)
                record_add_field(rec, 'FFT',
                                 subfields=[('a', filename),
                                            ('t', 'INSPIRE-PUBLIC'),
                                            ('d', 'Fulltext')])
示例#4
0
    def _attach_fulltext(self, rec, doi):
        url = 'http://dx.doi.org/' + doi
        page = requests.get(url)
        #url after redirect
        url = page.url
        page = page.text
        parsed_uri = urlparse(url)
        domain = '{uri.scheme}://{uri.netloc}'.format(uri=parsed_uri)
        page = BeautifulSoup(page)
        try:
            if 'epjconf' in doi:
                div = page.body.find('div', attrs={'id': 'header'})
            else:
                div = page.body.find(
                    'div', attrs={'class': 'module_background files'})
            links = div.findAll('a')
        except AttributeError:
            return
        for pdf in links:
            if pdf['href'].endswith('pdf'):
                link_to_pdf = domain + pdf['href']
                record_add_field(rec,
                                 '856',
                                 ind1='4',
                                 subfields=[('u', link_to_pdf),
                                            ('y', 'EDP Sciences server')])
                out_folder = join(CFG_EDPSCIENCE_OUT_FOLDER, "fulltexts")
                try:
                    makedirs(out_folder)
                    filename = join(out_folder, link_to_pdf.split('/')[-1])
                except (IOError, OSError):
                    # Problem creating folder
                    filename = None

                filename = download_file(from_url=link_to_pdf,
                                         to_filename=filename,
                                         retry_count=5)
                record_add_field(rec,
                                 'FFT',
                                 subfields=[('a', filename),
                                            ('t', 'INSPIRE-PUBLIC'),
                                            ('d', 'Fulltext')])