示例#1
0
def bst_elsevier():
    els = ElsevierPackage()
    els.bibupload_it()

    prepare_package_table()
    prepare_doi_package_table()

    write_message(els.conn.packages_delivery)
    for p in els.conn.packages_delivery:
        name = p[0][:-1]
        date = p[1]
        if run_sql("SELECT name FROM package WHERE name=%s", (name, )):
            write_message("Package already exist: %s: %s" % ('Elsevier', name))
            continue
        else:
            write_message("New pacakge discovered for publisher %s: %s" % ('Elsevier', name))
            run_sql("INSERT INTO package(name, delivery_date) VALUES(%s, %s)", (name.strip(), date))

    for dp in els.doi_package_name_mapping:
        try:
            p_name, doi = dp
            p_id = run_sql("SELECT id FROM package WHERE name=%s", (p_name.strip(),))
            try:
                write_message("Adding doi to package: %d %s" % (p_id[0][0], doi))
                run_sql("INSERT INTO doi_package VALUES(%s, %s)", (p_id[0][0], doi))
            except Exception as e:
                write_message(e)
                write_message("This already exist: %d %s" % (p_id[0][0], doi))
        except Exception as e:
            write_message(e)
示例#2
0
 def setUp(self):
     """Setup initial document."""
     self.els = ElsevierPackage(CONSYN=True,
                                journal_mappings=journal_mappings)
     self.document = parse(
         pkg_resources.resource_filename(
             'harvestingkit.tests',
             os.path.join('data', 'sample_consyn_record.xml')))
def call_elsevier(settings):
    if settings.update_credentials:
        _query_user_for_credentials(['ELSEVIER'])
        return

    elsevier_package = ElsevierPackage(package_name=settings.package_name,
                                       path=settings.path,
                                       run_locally=settings.run_locally,
                                       extract_nations=
                                       settings.extract_nations)
    elsevier_package.bibupload_it()
示例#4
0
def call_elsevier(settings):
    if settings.update_credentials:
        _query_user_for_credentials(['ELSEVIER'])
        return

    elsevier_package = ElsevierPackage(
        package_name=settings.package_name,
        path=settings.path,
        run_locally=settings.run_locally,
        extract_nations=settings.extract_nations)
    elsevier_package.bibupload_it()
 def setUp(self):
     """Setup initial document."""
     self.els = ElsevierPackage(no_harvest=True)
     self.document = parse(
         pkg_resources.resource_filename(
             'harvestingkit.tests',
             os.path.join('data', 'sample_elsevier_document_output.xml')))
     self.document540 = parse(
         pkg_resources.resource_filename(
             'harvestingkit.tests',
             os.path.join('data',
                          'sample_elsevier_540_document_output.xml')))
 def setUp(self):
     """Setup initial document."""
     self.els = ElsevierPackage(CONSYN=True,
                                journal_mappings=journal_mappings)
     self.document = parse(pkg_resources.resource_filename(
         'harvestingkit.tests',
         os.path.join('data', 'sample_consyn_record.xml')
     ))
 def setUp(self):
     """Setup initial document."""
     self.els = ElsevierPackage(no_harvest=True)
     self.document = parse(pkg_resources.resource_filename(
         'harvestingkit.tests',
         os.path.join('data', 'sample_elsevier_document_output.xml')
     ))
     self.document540 = parse(pkg_resources.resource_filename(
         'harvestingkit.tests',
         os.path.join('data', 'sample_elsevier_540_document_output.xml')
     ))
示例#8
0
def bst_consyn_harvest(feed=None,
                       package=None,
                       package_list=None,
                       batch_size='500',
                       delete_zip='False',
                       upload_FTP='True'):
    """ Task to convert xml files from consyn.elsevier.com to Marc xml files.
    There are three execution modes:
    1. Download from an atom feed.
    2. Extract a zip package.
    3. Extract a list of zip packages.

    :param feed: The URL of the atom feed to download.
    :type feed: string

    :param package: A path to a zip package
    :type package: string

    :param package_list: A path to a file with a list of paths to zip packages
    :type package_list: string

    :param batch_size: The number of records contained in each output file
    :type batch_size: string representation of an integer

    :param delete_zip: Flag to indicate if the downloaded zip files
                       should be kept on the disk or not
    :type delete_zip: string representation of a boolean

    :param upload_FTP: Flag to indicate whether the result files
                       should be uploaded to the FTP server
    :type upload_FTP: string representation of a boolean
    """
    if not feed:
        feed = "https://consyn.elsevier.com/batch/atom?key=%s" % \
               (CFG_CONSYN_ATOM_KEY,)
    new_files = []
    new_sources = []

    try:
        batch_size = int(batch_size)
    except ValueError:
        batch_size = 500
        write_message('Warning batch_size parameter is not a valid integer\n' +
                      'the default value \'500\' has been used!\n')
    if delete_zip.lower() == 'true':
        delete_zip = True
    elif delete_zip.lower() == 'false':
        delete_zip = False
    else:
        delete_zip = False
        write_message(
            'Warning delete_zip parameter is not a valid Boolean (True/False)\n'
            + 'the default value \'False\' has been used!\n')
    if upload_FTP.lower() == 'true':
        upload_FTP = True
    elif upload_FTP.lower() == 'false':
        upload_FTP = False
    else:
        upload_FTP = True
        write_message(
            'Warning upload_FTP parameter is not a valid Boolean (True/False)\n'
            + 'the default value \'True\' has been used!\n')

    if not exists(CFG_CONSYN_OUT_DIRECTORY):
        rmdir(create_work_folder(CFG_CONSYN_OUT_DIRECTORY))
    out_folder = CFG_CONSYN_OUT_DIRECTORY
    els = ElsevierPackage(CONSYN=True)

    consyn_files = join(out_folder, "consyn-files")
    consyn_files = consyn_files.lstrip()

    if not package and not package_list:
        download_feed(feed, batch_size, delete_zip, new_sources, out_folder)
        task_update_progress("Converting files 2/3...")
        task_sleep_now_if_required(can_stop_too=True)
        fetch_xml_files(consyn_files, els, new_files)
        task_sleep_now_if_required(can_stop_too=False)
    else:
        xml_files = []
        if package:
            xml_files = extract_package(package, batch_size, delete_zip,
                                        out_folder)
        elif package_list:
            xml_files = extract_multiple_packages(package_list, batch_size,
                                                  delete_zip, new_sources,
                                                  out_folder)
            task_update_progress("Converting files 2/3...")
            results = convert_files(xml_files, els, prefix=consyn_files)

            for dummy, (status_code, result) in results.iteritems():
                if status_code == StatusCodes.OK:
                    new_files.append(result)
    task_update_progress("Compiling output 3/3...")
    create_collection(batch_size, new_files, new_sources, out_folder,
                      upload_FTP)
class ElsevierPackageTests(unittest.TestCase):

    """Test extraction of Elsevier records."""

    def setUp(self):
        """Setup initial document."""
        self.els = ElsevierPackage(CONSYN=True,
                                   journal_mappings=journal_mappings)
        self.document = parse(pkg_resources.resource_filename(
            'harvestingkit.tests',
            os.path.join('data', 'sample_consyn_record.xml')
        ))

    def test_doi(self):
        """Test that doi is good."""
        self.assertEqual(self.els._get_doi(self.document), '10.1016/0370-2693(88)91603-6')

    def test_title(self):
        """Test that title is good."""
        self.assertEqual(self.els.get_title(self.document), 'Toward classification of conformal theories')

    def test_doctype(self):
        """Test that doctype is good."""
        self.assertEqual(self.els.get_doctype(self.document), 'fla')

    def test_abstract(self):
        """Test that abstract is good."""
        abstract = 'By studying the representations of the mapping class groups '\
                   'which arise in 2D conformal theories we derive some restrictions '\
                   'on the value of the conformal dimension h i of operators and the '\
                   'central charge c of the Virasoro algebra. As a simple application '\
                   'we show that when there are a finite number of operators in the '\
                   'conformal algebra, the h i and c are all rational.'
        self.assertEqual(self.els.get_abstract(self.document), abstract)

    def test_keywords(self):
        """Test that keywords are good."""
        keywords = ['Heavy quarkonia', 'Quark gluon plasma', 'Mott effect', 'X(3872)']
        self.assertEqual(self.els.get_keywords(self.document), keywords)

    def test_add_orcids(self):
        """Test that orcids are good.

        According to "Tag by Tag The Elsevier DTD 5 Family of XML DTDs" orcids will be
        distributed as an attribute in the ce:author tag.
        """
        xml_author = Element('ce:author')
        xml_author.setAttribute('orcid', '1234-5678-4321-8765')
        authors = [{}]

        # _add_orcids will alter the authors list
        self.els._add_orcids(authors, [xml_author])

        self.assertEqual(authors, [{'orcid': 'ORCID:1234-5678-4321-8765'}])

    def test_authors(self):
        """Test that authors are good."""
        authors = [{'affiliation': ['Lyman Laboratory of Physics, Harvard University, Cambridge, MA 02138, USA'], 'surname': 'Vafa', 'given_name': 'Cumrun', 'orcid': 'ORCID:1234-5678-4321-8765'}]
        self.assertEqual(self.els.get_authors(self.document), authors)

    def test_copyright(self):
        """Test that copyright is good."""
        self.assertEqual(self.els.get_copyright(self.document), 'Copyright unknown. Published by Elsevier B.V.')

    def test_publication_information(self):
        """Test that pubinfo is good."""
        publication_information = ('Phys.Lett.',
                                   '0370-2693',
                                   'B206',
                                   '3',
                                   '421',
                                   '426',
                                   '1988',
                                   '1988-05-26',
                                   '10.1016/0370-2693(88)91603-6')
        self.assertEqual(self.els.get_publication_information(self.document), publication_information)

    def test_publication_date_oa(self):
        """Test that date is good from openAccessEffective."""
        data = """
        <doc xmlns:oa="http://vtw.elsevier.com/data/ns/properties/OpenAccess-1/">
        <oa:openAccessInformation>
          <oa:openAccessStatus xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
            http://vtw.elsevier.com/data/voc/oa/OpenAccessStatus#Full
          </oa:openAccessStatus>
          <oa:openAccessEffective xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">2014-11-11T08:38:44Z</oa:openAccessEffective>
          <oa:sponsor xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
            <oa:sponsorName>SCOAP&#xB3; - Sponsoring Consortium for Open Access Publishing in Particle Physics</oa:sponsorName>
            <oa:sponsorType>http://vtw.elsevier.com/data/voc/oa/SponsorType#FundingBody</oa:sponsorType>
          </oa:sponsor>
          <oa:userLicense xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">http://creativecommons.org/licenses/by/3.0/</oa:userLicense>
        </oa:openAccessInformation>
        </doc>"""
        doc = parseString(data)
        self.assertEqual(
            self.els.get_publication_date(doc),
            "2014-11-11"
        )

    def test_publication_date_cover_display(self):
        """Test that date is good from coverDisplayDate."""
        data = """
        <doc xmlns:prism="http://vtw.elsevier.com/data/ns/properties/OpenAccess-1/">
          <prism:coverDisplayDate>December 2014</prism:coverDisplayDate>
        </doc>"""
        doc = parseString(data)
        self.assertEqual(
            self.els.get_publication_date(doc),
            "2014-12"
        )

    def test_publication_date_cover_display_full(self):
        """Test that date is good from coverDisplayDate."""
        data = """
        <doc xmlns:prism="http://vtw.elsevier.com/data/ns/properties/OpenAccess-1/">
          <prism:coverDisplayDate>1 December 2014</prism:coverDisplayDate>
        </doc>"""
        doc = parseString(data)
        self.assertEqual(
            self.els.get_publication_date(doc),
            "2014-12-01"
        )

    def test_publication_date_cover(self):
        """Test that date is good."""
        data = """
        <doc xmlns:prism="http://vtw.elsevier.com/data/ns/properties/OpenAccess-1/">
          <prism:coverDisplayDate>April 2011</prism:coverDisplayDate>
          <prism:coverDate>2011-04-01</prism:coverDate>
        </doc>"""
        doc = parseString(data)
        self.assertEqual(
            self.els.get_publication_date(doc),
            "2011-04-01"
        )

    def test_references(self):
        """Test that references is good."""
        references = [('[1]', ['Belavin, A.A.', 'Polyakov, A.M.', 'Zamolodchikov, A.B.'], '', 'Nucl. Phys. B 241 1984', '333', '', '241', '1984', [], None, True, '', 'Nucl. Phys. B', '', [], '', []),
                      ('[2]', ['Friedan, D.', 'Qiu, Z.', 'Shenker, S.H.'], '', 'Phys. Rev. Lett. 52 1984', '1575', '', '52', '1984', [], None, True, '', 'Phys. Rev. Lett.', '', [], '', []),
                      ('[3]', ['Cardy, J.L.'], '', 'Nucl. Phys. B 270 1986', '186', '', '270', '1986', [], None, True, '[FS16]', 'Nucl. Phys. B', '', [], '', []),
                      ('[3]', ['Capelli, A.', 'Itzykson, C.', 'Zuber, J.-B.'], '', 'Nucl. Phys. B 280 1987', '445', '', '280', '1987', [], None, True, '[FS 18]', 'Nucl. Phys. B', '', [], '', []),
                      ('[3]', ['Capelli, A.', 'Itzykson, C.', 'Zuber, J.-B.'], '', 'Commun. Math. Phys. 113 1987', '1', '', '113', '1987', [], None, True, '', 'Commun. Math. Phys.', '', [], '', []),
                      ('[3]', ['Gepner, D.'], '', 'Nucl. Phys. B 287 1987', '111', '', '287', '1987', [], None, True, '', 'Nucl. Phys. B', '', [], '', []),
                      ('[4]', [], '', '', '', '', '', '', 'G. Anderson and G. Moore, IAS preprint IASSNS-HEP-87/69.', None, [], '', '', '', [], '', []),
                      ('[5]', ['Friedan, D.', 'Shenker, S.'], '', 'Phys. Lett. B 175 1986', '287', '', '175', '1986', [], None, True, '', 'Phys. Lett. B', '', [], '', []),
                      ('[5]', ['Friedan, D.', 'Shenker, S.'], '', 'Nucl. Phys. B 281 1987', '509', '', '281', '1987', [], None, True, '', 'Nucl. Phys. B', '', [], '', []),
                      ('[6]', [], '', '', '', '', '', '', 'E. Martinec and S. Shenker, unpublished.', None, [], '', '', '', [], '', []),
                      ('[7]', ['Vafa, C.'], '', 'Phys. Lett. B 199 1987', '195', '', '199', '1987', [], None, True, '', 'Phys. Lett. B', '', [], '', []),
                      ('[8]', ['Harer, J.'], '', 'Inv. Math. 72 1983', '221', '', '72', '1983', [], None, True, '', 'Inv. Math.', '', [], '', []),
                      ('[9]', ['Tsuchiya, A.', 'Kanie, Y.'], '', 'Lett. Math. Phys. 13 1987', '303', '', '13', '1987', [], None, True, '', 'Lett. Math. Phys.', '', [], '', []),
                      ('[10]', [], '', '', '', '', '', '', 'E. Verlinde, to be published.', None, [], '', '', '', [], '', []),
                      ('[11]', ['Dehn, M.'], '', 'Acta Math. 69 1938', '135', '', '69', '1938', [], None, True, '', 'Acta Math.', '', [], '', []),
                      ('[12]', [], '', '', '', '', '', '', 'D. Friedan and S. Shenker, unpublished.', None, [], '', '', '', [], '', []),
                      ('[13]', [], '', '', '', '', '', '', 'J. Harvey, G. Moore, and C. Vafa, Nucl. Phys. B, to be published', None, [], '', '', '', [], '', []),
                      ('[14]', [], '', '', '', '', '', '', 'D. Kastor, E. Martinec and Z. Qiu, E. Fermi Institute preprint EFI-87-58.', None, [], '', '', '', [], '', []),
                      ('[15]', ['Adeva, B.'], '', 'Phys. Rev. D 58 1998', '112001', '', '58', '1998', [], None, True, '', 'Phys. Rev. D', '', [], '', [])]
        for ref in self.els.get_references(self.document):
            self.assertTrue(ref in references)

    def test_get_record(self):
        """Test that the whole record is correct."""
        source_file = pkg_resources.resource_filename(
            'harvestingkit.tests',
            os.path.join('data', 'sample_consyn_record.xml')
        )
        marc_file = pkg_resources.resource_filename(
            'harvestingkit.tests',
            os.path.join('data', 'sample_consyn_output.xml')
        )
        xml = self.els.get_record(source_file, test=True)
        with open(marc_file) as marc:
            result = marc.read()
        self.assertEqual(xml.strip(), result.strip())
 def setUp(self):
     self.els = ElsevierPackage(CONSYN=True,
                                journal_mappings=journal_mappings)
     self.document = parse(join(dirname(folder), consyn_test_record))
class ElsevierPackageTests(unittest.TestCase):
    def setUp(self):
        self.els = ElsevierPackage(CONSYN=True,
                                   journal_mappings=journal_mappings)
        self.document = parse(join(dirname(folder), consyn_test_record))

    def test_doi(self):
        self.assertEqual(self.els._get_doi(self.document), '10.1016/0370-2693(88)91603-6')

    def test_title(self):
        self.assertEqual(self.els.get_title(self.document), 'Toward classification of conformal theories')

    def test_doctype(self):
        self.assertEqual(self.els.get_doctype(self.document), 'fla')

    def test_abstract(self):
        abstract = 'By studying the representations of the mapping class groups '\
                   'which arise in 2D conformal theories we derive some restrictions '\
                   'on the value of the conformal dimension h i of operators and the '\
                   'central charge c of the Virasoro algebra. As a simple application '\
                   'we show that when there are a finite number of operators in the '\
                   'conformal algebra, the h i and c are all rational.'
        self.assertEqual(self.els.get_abstract(self.document), abstract)

    def test_keywords(self):
        keywords = ['Heavy quarkonia', 'Quark gluon plasma', 'Mott effect', 'X(3872)']
        self.assertEqual(self.els.get_keywords(self.document), keywords)

    def test_authors(self):
        authors = [{'affiliation': ['Lyman Laboratory of Physics, Harvard University, Cambridge, MA 02138, USA'], 'surname': 'Vafa', 'given_name': 'Cumrun'}]
        self.assertEqual(self.els.get_authors(self.document), authors)

    def test_copyritght(self):
        self.assertEqual(self.els.get_copyright(self.document), 'Copyright unknown. Published by Elsevier B.V.')

    def test_publication_information(self):
        publication_information = ('Phys.Lett.',
                                   '0370-2693',
                                   'B206',
                                   '3',
                                   '421',
                                   '426',
                                   '1988',
                                   '1988-05-26',
                                   '10.1016/0370-2693(88)91603-6')
        self.assertEqual(self.els.get_publication_information(self.document), publication_information)

    def test_references(self):
        references = [('[1]', ['Belavin, A.A.', 'Polyakov, A.M.', 'Zamolodchikov, A.B.'], '', 'Nucl. Phys. B 241 1984', '333', '', '241', '1984', [], None, True, '', 'Nucl. Phys. B', '', [], ''),
                      ('[2]', ['Friedan, D.', 'Qiu, Z.', 'Shenker, S.H.'], '', 'Phys. Rev. Lett. 52 1984', '1575', '', '52', '1984', [], None, True, '', 'Phys. Rev. Lett.', '', [], ''),
                      ('[3]', ['Cardy, J.L.'], '', 'Nucl. Phys. B 270 1986', '186', '', '270', '1986', [], None, True, '[FS16]', 'Nucl. Phys. B', '', [], ''),
                      ('[3]', ['Capelli, A.', 'Itzykson, C.', 'Zuber, J.-B.'], '', 'Nucl. Phys. B 280 1987', '445', '', '280', '1987', [], None, True, '[FS 18]', 'Nucl. Phys. B', '', [], ''),
                      ('[3]', ['Capelli, A.', 'Itzykson, C.', 'Zuber, J.-B.'], '', 'Commun. Math. Phys. 113 1987', '1', '', '113', '1987', [], None, True, '', 'Commun. Math. Phys.', '', [], ''),
                      ('[3]', ['Gepner, D.'], '', 'Nucl. Phys. B 287 1987', '111', '', '287', '1987', [], None, True, '', 'Nucl. Phys. B', '', [], ''),
                      ('[4]', [], '', '', '', '', '', '', 'G. Anderson and G. Moore, IAS preprint IASSNS-HEP-87/69.', None, [], '', '', '', [], ''),
                      ('[5]', ['Friedan, D.', 'Shenker, S.'], '', 'Phys. Lett. B 175 1986', '287', '', '175', '1986', [], None, True, '', 'Phys. Lett. B', '', [], ''),
                      ('[5]', ['Friedan, D.', 'Shenker, S.'], '', 'Nucl. Phys. B 281 1987', '509', '', '281', '1987', [], None, True, '', 'Nucl. Phys. B', '', [], ''),
                      ('[6]', [], '', '', '', '', '', '', 'E. Martinec and S. Shenker, unpublished.', None, [], '', '', '', [], ''),
                      ('[7]', ['Vafa, C.'], '', 'Phys. Lett. B 199 1987', '195', '', '199', '1987', [], None, True, '', 'Phys. Lett. B', '', [], ''),
                      ('[8]', ['Harer, J.'], '', 'Inv. Math. 72 1983', '221', '', '72', '1983', [], None, True, '', 'Inv. Math.', '', [], ''),
                      ('[9]', ['Tsuchiya, A.', 'Kanie, Y.'], '', 'Lett. Math. Phys. 13 1987', '303', '', '13', '1987', [], None, True, '', 'Lett. Math. Phys.', '', [], ''),
                      ('[10]', [], '', '', '', '', '', '', 'E. Verlinde, to be published.', None, [], '', '', '', [], ''),
                      ('[11]', ['Dehn, M.'], '', 'Acta Math. 69 1938', '135', '', '69', '1938', [], None, True, '', 'Acta Math.', '', [], ''),
                      ('[12]', [], '', '', '', '', '', '', 'D. Friedan and S. Shenker, unpublished.', None, [], '', '', '', [], ''),
                      ('[13]', [], '', '', '', '', '', '', 'J. Harvey, G. Moore, and C. Vafa, Nucl. Phys. B, to be published', None, [], '', '', '', [], ''),
                      ('[14]', [], '', '', '', '', '', '', 'D. Kastor, E. Martinec and Z. Qiu, E. Fermi Institute preprint EFI-87-58.', None, [], '', '', '', [], '')]
        for ref in self.els.get_references(self.document):
            self.assertTrue(ref in references)

    def test_get_record(self):
        source_file = join(dirname(folder), consyn_test_record)
        marc_file = join(dirname(folder), consyn_output)
        with open(marc_file) as marc:
            result = marc.read()
        self.assertEqual(self.els.get_record(source_file, test=True), result)
示例#12
0
def bst_elsevier():
    els = ElsevierPackage()
    run(els, els.conn.packages_delivery, els.doi_package_name_mapping)
class ElsevierScoap3PackageTests(unittest.TestCase):

    """Test extraction of Elsevier records in SCOAP3."""

    def setUp(self):
        """Setup initial document."""
        self.els = ElsevierPackage(no_harvest=True)
        self.document = parse(pkg_resources.resource_filename(
            'harvestingkit.tests',
            os.path.join('data', 'sample_elsevier_document_output.xml')
        ))
        self.document540 = parse(pkg_resources.resource_filename(
            'harvestingkit.tests',
            os.path.join('data', 'sample_elsevier_540_document_output.xml')
        ))

    ## tests for documents
    def test_doi(self):
         """Test that doi is good."""
         self.assertEqual(self.els._get_doi(self.document), '10.1016/j.nuclphysb.2015.07.011')

    def test_540_doi(self):
         """Test that doi is good."""
         self.assertEqual(self.els._get_doi(self.document540), '10.1016/j.cell.2015.03.041')

    def test_title(self):
        """Test that title is good."""
        self.assertEqual(self.els.get_title(self.document), 'F-theory vacua with <math altimg="si1.gif" xmlns="http://www.w3.org/1998/Math/MathML"><msub><mrow><mi mathvariant="double-struck">Z</mi></mrow><mrow><mn>3</mn></mrow></msub></math> gauge symmetry')

    def test_540_title(self):
        """Test that title is good."""
        self.assertEqual(self.els.get_title(self.document540), 'Bending Gradients: How the Intestinal Stem Cell Gets Its Home')

    def test_doctype(self):
        """Test that doctype is good."""
        self.assertEqual(self.els.get_doctype(self.document), '')

    def test_540_doctype(self):
        """Test that doctype is good."""
        self.assertEqual(self.els.get_doctype(self.document540), '')

    def test_abstract(self):
        """Test that abstract is good."""
        abstract = 'Discrete gauge groups naturally arise in F-theory compactifications on genus-one fibered '\
                   'Calabi\xe2\x80\x93Yau manifolds. Such geometries appear in families that are parameterized '\
                   'by the Tate\xe2\x80\x93Shafarevich group of the genus-one fibration. While the F-theory '\
                   'compactification on any element of this family gives rise to the same physics, the corresponding '\
                   'M-theory compactifications on these geometries differ and are obtained by a fluxed circle '\
                   'reduction of the former. In this note, we focus on an element of order three in the '\
                   'Tate\xe2\x80\x93Shafarevich group of the general cubic. We discuss how the different M-theory '\
                   'vacua and the associated discrete gauge groups can be obtained by Higgsing of a pair of '\
                   'five-dimensional U(1) symmetries. The Higgs fields arise from vanishing cycles in '\
                   '<math altimg="si2.gif" xmlns="http://www.w3.org/1998/Math/MathML"><msub><mrow><mi>I</mi></mrow>'\
                   '<mrow><mn>2</mn></mrow></msub></math> -fibers that appear at certain codimension two loci in the '\
                   'base. We explicitly identify all three curves that give rise to the corresponding Higgs fields. '\
                   'In this analysis the investigation of different resolved phases of the underlying geometry plays '\
                   'a crucial r\xc3\xb4le.'

        self.assertEqual(self.els.get_abstract(self.document), abstract)

    def test_540_abstract(self):
        """Test that abstract is good."""
        abstract = 'We address the mechanism by which adult intestinal stem cells (ISCs) become localized to the '\
                   'base of each villus during embryonic development. We find that, early in gut development, '\
                   'proliferating progenitors expressing ISC markers are evenly distributed throughout the '\
                   'epithelium, in both the chick and mouse. However, as the villi form, the putative stem cells '\
                   'become restricted to the base of the villi. This shift in the localization is driven by '\
                   'mechanically influenced reciprocal signaling between the epithelium and underlying mesenchyme. '\
                   'Buckling forces physically distort the shape of the morphogenic field, causing local maxima of '\
                   'epithelial signals, in particular Shh, at the tip of each villus. This induces a suite\xc2\xa0of '\
                   'high-threshold response genes in the underlying mesenchyme to form a signaling center called '\
                   'the\xc2\xa0\xe2\x80\x9cvillus cluster.\xe2\x80\x9d Villus cluster signals, notably Bmp4, feed '\
                   'back on the overlying epithelium to ultimately restrict the stem cells to the base of each villus.'

        self.assertEqual(self.els.get_abstract(self.document540), abstract)

    def test_keywords(self):
        """Test that keywords are good."""
        keywords = []
        self.assertEqual(self.els.get_keywords(self.document), keywords)

    def test_540_keywords(self):
        """Test that keywords are good."""
        keywords = []
        self.assertEqual(self.els.get_keywords(self.document540), keywords)

    def test_add_orcids(self):
        """Test that orcids are good.

        According to "Tag by Tag The Elsevier DTD 5 Family of XML DTDs" orcids will be
        distributed as an attribute in the ce:author tag.
        """
        xml_author = Element('ce:author')
        xml_author.setAttribute('orcid', '1234-5678-4321-8765')
        authors = [{}]

        # _add_orcids will alter the authors list
        self.els._add_orcids(authors, [xml_author])

        self.assertEqual(authors, [{'orcid': 'ORCID:1234-5678-4321-8765'}])

    def test_authors(self):
        """Test that authors are good."""
        authors = [{'affiliation': ['Department of Physics and Astronomy, University of Pennsylvania, Philadelphia, PA, 19104-6396, USA'],
                    'cross_ref': ['aff0010'],
                    'surname': 'Cveti\xc4\x8d',
                    'given_name': 'Mirjam',
                    'email': '*****@*****.**'},
                   {'affiliation': ['Department of Physics and Astronomy, University of Pennsylvania, Philadelphia, PA, 19104-6396, USA',
                                    'Department of Mathematics, University of Pennsylvania, Philadelphia, PA, 19104-6396, USA'],
                    'cross_ref': ['aff0010', 'aff0020', 'cr0010'],
                    'surname': 'Donagi',
                    'given_name': 'Ron',
                    'email': '*****@*****.**'},
                   {'affiliation': ['Theory Group, Physics Department, CERN, Geneva 23, CH-1211, Switzerland'], 'cross_ref': ['aff0030'], 'surname': 'Klevers', 'given_name': 'Denis', 'email': '*****@*****.**'},
                   {'affiliation': ['Department of Physics and Astronomy, University of Pennsylvania, Philadelphia, PA, 19104-6396, USA'], 'cross_ref': ['aff0010'], 'surname': 'Piragua', 'given_name': 'Hernan', 'email': '*****@*****.**'},
                   {'affiliation': ['Department of Physics and Astronomy, University of Pennsylvania, Philadelphia, PA, 19104-6396, USA'], 'cross_ref': ['aff0010'], 'surname': 'Poretschkin', 'given_name': 'Maximilian', 'email': '*****@*****.**'}]
        self.assertEqual(self.els.get_authors(self.document), authors)

    def test_540_authors(self):
        """Test that authors are good."""
        authors = [{'affiliation': ['Department of Genetics, Harvard Medical School, Boston, MA 02115, USA'],
                    'surname': 'Shyer',
                    'given_name': 'Amy\xc2\xa0E.',
                    'cross_ref': ['aff1', 'fn1']},
                   {'affiliation': ['Department of Genetics, Harvard Medical School, Boston, MA 02115, USA'], 'surname': 'Huycke', 'given_name': 'Tyler\xc2\xa0R.', 'cross_ref': ['aff1']},
                   {'affiliation': ['Department of Genetics, Harvard Medical School, Boston, MA 02115, USA'], 'surname': 'Lee', 'given_name': 'ChangHee', 'cross_ref': ['aff1']},
                   {'affiliation': ['School of Engineering and Applied Sciences, Harvard University, Cambridge, MA 02138, USA',
                                    'Department of Organismic and Evolutionary Biology, Harvard University, Cambridge, MA 02138, USA',
                                    'Department of Physics, Harvard University, Cambridge, MA 02138, USA',
                                    'Wyss Institute for Biologically Inspired Engineering, Harvard University, Cambridge, MA 02138, USA',
                                    'Kavli Institute for Nanobio Science and Technology, Harvard University, Cambridge, MA 02138, USA',
                                    'Department of Systems Biology, Harvard Medical School, Boston, MA 02115, USA'],
                    'surname': 'Mahadevan',
                    'given_name': 'L.',
                    'cross_ref': ['aff2', 'aff3', 'aff4', 'aff5', 'aff6', 'aff7']},
                   {'affiliation': ['Department of Genetics, Harvard Medical School, Boston, MA 02115, USA'],
                    'cross_ref': ['aff1', 'cor1'],
                    'surname': 'Tabin',
                    'given_name': 'Clifford\xc2\xa0J.',
                    'email': '*****@*****.**'}]
        self.assertEqual(self.els.get_authors(self.document540), authors)

    def test_copyright(self):
        """Test that copyright is good."""
        self.assertEqual(self.els.get_copyright(self.document), '')

    def test_540_copyright(self):
        """Test that copyright is good."""
        self.assertEqual(self.els.get_copyright(self.document540), 'Elsevier Inc.')

    #Need to find a better example package for DTD5.2 version - this on doesnt have issue.xml and we have unmatching issue and main files
    @unittest.skip("Issue and main xml are not matching")
    def test_publication_information(self):
        """Test that pubinfo is good."""
        self.els._found_issues = [pkg_resources.resource_filename('harvestingkit.tests', os.path.join('data', 'sample_elsevier_issue'))]
        self.els._build_doi_mapping()
        publication_information = ('Phys.Lett.',
                                   '0370-2693',
                                   'B206',
                                   '3',
                                   '421',
                                   '426',
                                   '1988',
                                   '1988-05-26',
                                   '10.1016/j.nuclphysb.2015.07.011')
        self.assertEqual(self.els.get_publication_information(self.document), publication_information)

    def test_540_publication_information(self):
        """Test that pubinfo is good."""
        self.els._found_issues = [pkg_resources.resource_filename('harvestingkit.tests', os.path.join('data', 'sample_elsevier_540_issue'))]
        self.els._build_doi_mapping()
        publication_information = ('CELL',
                                   '0092-8674',
                                   '161',
                                   '3',
                                   '569',
                                   '580',
                                   '2015',
                                   '2015-04-23',
                                   '10.1016/j.cell.2015.03.041')
        self.assertEqual(self.els.get_publication_information(self.document540), publication_information)

    @unittest.skip("Not done yet")
    def test_references(self):
        """Test that references is good."""
        references = [('[1]', ['Belavin, A.A.', 'Polyakov, A.M.', 'Zamolodchikov, A.B.'], '', 'Nucl. Phys. B 241 1984', '333', '', '241', '1984', [], None, True, '', 'Nucl. Phys. B', '', [], ''),
                      ('[2]', ['Friedan, D.', 'Qiu, Z.', 'Shenker, S.H.'], '', 'Phys. Rev. Lett. 52 1984', '1575', '', '52', '1984', [], None, True, '', 'Phys. Rev. Lett.', '', [], ''),
                      ('[3]', ['Cardy, J.L.'], '', 'Nucl. Phys. B 270 1986', '186', '', '270', '1986', [], None, True, '[FS16]', 'Nucl. Phys. B', '', [], ''),
                      ('[3]', ['Capelli, A.', 'Itzykson, C.', 'Zuber, J.-B.'], '', 'Nucl. Phys. B 280 1987', '445', '', '280', '1987', [], None, True, '[FS 18]', 'Nucl. Phys. B', '', [], ''),
                      ('[3]', ['Capelli, A.', 'Itzykson, C.', 'Zuber, J.-B.'], '', 'Commun. Math. Phys. 113 1987', '1', '', '113', '1987', [], None, True, '', 'Commun. Math. Phys.', '', [], ''),
                      ('[3]', ['Gepner, D.'], '', 'Nucl. Phys. B 287 1987', '111', '', '287', '1987', [], None, True, '', 'Nucl. Phys. B', '', [], ''),
                      ('[4]', [], '', '', '', '', '', '', 'G. Anderson and G. Moore, IAS preprint IASSNS-HEP-87/69.', None, [], '', '', '', [], ''),
                      ('[5]', ['Friedan, D.', 'Shenker, S.'], '', 'Phys. Lett. B 175 1986', '287', '', '175', '1986', [], None, True, '', 'Phys. Lett. B', '', [], ''),
                      ('[5]', ['Friedan, D.', 'Shenker, S.'], '', 'Nucl. Phys. B 281 1987', '509', '', '281', '1987', [], None, True, '', 'Nucl. Phys. B', '', [], ''),
                      ('[6]', [], '', '', '', '', '', '', 'E. Martinec and S. Shenker, unpublished.', None, [], '', '', '', [], ''),
                      ('[7]', ['Vafa, C.'], '', 'Phys. Lett. B 199 1987', '195', '', '199', '1987', [], None, True, '', 'Phys. Lett. B', '', [], ''),
                      ('[8]', ['Harer, J.'], '', 'Inv. Math. 72 1983', '221', '', '72', '1983', [], None, True, '', 'Inv. Math.', '', [], ''),
                      ('[9]', ['Tsuchiya, A.', 'Kanie, Y.'], '', 'Lett. Math. Phys. 13 1987', '303', '', '13', '1987', [], None, True, '', 'Lett. Math. Phys.', '', [], ''),
                      ('[10]', [], '', '', '', '', '', '', 'E. Verlinde, to be published.', None, [], '', '', '', [], ''),
                      ('[11]', ['Dehn, M.'], '', 'Acta Math. 69 1938', '135', '', '69', '1938', [], None, True, '', 'Acta Math.', '', [], ''),
                      ('[12]', [], '', '', '', '', '', '', 'D. Friedan and S. Shenker, unpublished.', None, [], '', '', '', [], ''),
                      ('[13]', [], '', '', '', '', '', '', 'J. Harvey, G. Moore, and C. Vafa, Nucl. Phys. B, to be published', None, [], '', '', '', [], ''),
                      ('[14]', [], '', '', '', '', '', '', 'D. Kastor, E. Martinec and Z. Qiu, E. Fermi Institute preprint EFI-87-58.', None, [], '', '', '', [], '')]
        for ref in self.els.get_references(self.document):
            self.assertTrue(ref in references)

    @unittest.skip("Not done yet")
    def test_get_record(self):
        """Test that the whole record is correct."""
        source_file = pkg_resources.resource_filename(
            'harvestingkit.tests',
            os.path.join('data', 'sample_elsevier_document_output.xml'))
        marc_file = pkg_resources.resource_filename(
            'harvestingkit.tests',
            os.path.join('data', 'sample_elsevier_record.xml')
        )
        self.els._found_issues = [pkg_resources.resource_filename('harvestingkit.tests', os.path.join('data', 'sample_elsevier_issue'))]
        self.els._build_doi_mapping()
        xml = self.els.get_record(source_file, test=True, no_pdf=True)
        with open(marc_file) as marc:
            result = marc.read()
        self.assertEqual(xml.strip(), result.strip())
def bst_consyn_harvest(feed=None, package=None, package_list=None,
                       batch_size='500', delete_zip='False'):
    """
    Task to convert xml files from consyn.elsevier.com to marc xml files.
    There are three excecution modes:
    1. Download from an atom feed.
    2. Extract a zip package.
    3. Extract a list of zip packages.

    @param feed: The URL of the atom feed to download.
    @type feed: string

    @param package: A path to a zip package
    @type package: string

    @param package_list: A path to a file with a list of paths to zip packages
    @type package_list: string

    @param batch_size: The number of records contained in each output file
    @type batch_size: int

    @param delete_zip: Flag to indicate if the downloaded zip files
                       should be kept on the disk or not
    @type delete_zip: boolean
    """
    if not feed:
        feed = "https://consyn.elsevier.com/batch/atom?key=%s" % \
               (CFG_CONSYN_ATOM_KEY,)
    new_files = []
    new_sources = []

    try:
        batch_size = int(batch_size)
    except ValueError:
        batch_size = 500
        write_message('Warning batch_size parameter is not a valid integer\n' +
                      'the default value \'500\' has been used!\n')
    if delete_zip.lower() == 'true':
        delete_zip = True
    elif delete_zip.lower() == 'false':
        delete_zip = False
    else:
        delete_zip = False
        write_message('Warning delete_zip parameter is not a valid Boolean (True/False)\n' +
                      'the default value \'False\' has been used!\n')

    out_folder = create_work_folder(CFG_CONSYN_OUT_DIRECTORY)

    try:
        run_sql("SELECT filename FROM CONSYNHARVEST")
    except ProgrammingError:
        # Table missing, create it.
        run_sql("CREATE TABLE CONSYNHARVEST ("
                "filename VARCHAR(100) NOT NULL PRIMARY KEY,"
                "date VARCHAR(50),"
                "size VARCHAR(30) );")

    if not package and not package_list:
        download_feed(feed, batch_size, delete_zip, new_sources, out_folder)
    elif package:
        extract_package(package, batch_size, delete_zip, out_folder)
    elif package_list:
        extract_multiple_packages(package_list, batch_size,
                                  delete_zip, new_sources,
                                  out_folder)

    task_sleep_now_if_required(can_stop_too=True)
    consyn_files = join(out_folder, "consyn-files")
    consyn_files = consyn_files.lstrip()
    els = ElsevierPackage(path="whatever", CONSYN=True)
    task_update_progress("Converting files 2/2...")
    fetch_xml_files(consyn_files, els, new_files)
    task_sleep_now_if_required(can_stop_too=False)
    create_collection(batch_size, new_files, new_sources, out_folder)
示例#15
0
def bst_consyn_harvest(feed_url=None,
                       package=None,
                       feed_file=None,
                       package_list_file=None,
                       batch_size='500',
                       delete_zip='False',
                       submit='False',
                       threshold_date=None):
    """ Task to convert xml files from consyn.elsevier.com to Marc xml files.
    There are four execution modes:
    1. Download from an atom feed url.
    2. Extract and convert a zip package.
    3. Download from an atom feed file.
    4. Extract and convert a list of zip packages.

    The feed is stored to the file system under the folder feeds.
    If no errors occur during the execution of the tasklet the feed
    is deleted. Records may be recovered running the tasklet again with
    the modes 2, 3 or 4.

    :param feed_url: A URL to the atom feed.
    :type feed: string.

    :param package: A path to a zip package.
    :type package: string.

    :param package: A path to an atom feed file.
    :type package: string.

    :param package_list_file: A path to a file with a list of paths
                              to zip packages. The file must contain
                              the path to each package in a different
                              line.
    :type package_list_file: string.

    :param batch_size: The number of records contained in each output file.
    :type batch_size: string representation of an integer.

    :param delete_zip: Flag to indicate if the downloaded zip files
                       should be kept on the disk or not.
    :type delete_zip: string representation of a boolean.

    :param submit: Flag to indicate whether the result files
                       should be submited by email and uploaded
                       to FTP server.
    :type submit: string representation of a boolean.
    :param threshold_date: threshold date only converts records that they were
                      published after threshold_date
    :type threshold_date: string in the format YYYY-MM-DD
    """
    if not feed_url:
        feed_url = "https://consyn.elsevier.com/batch/atom?key=%s" % \
                   (CFG_CONSYN_ATOM_KEY,)
    new_files = []
    new_sources = []
    feed_location = ''

    try:
        batch_size = int(batch_size)
    except ValueError:
        batch_size = 500
        write_message('Warning batch_size parameter is not a valid integer\n'
                      'the default value \'500\' has been used!\n')
    if delete_zip.lower() == 'true':
        delete_zip = True
    elif delete_zip.lower() == 'false':
        delete_zip = False
    else:
        delete_zip = False
        write_message('Warning delete_zip parameter is not'
                      ' a valid Boolean (True/False)\n'
                      'the default value \'False\' has been used!\n')
    if submit.lower() == 'true':
        submit = True
    elif submit.lower() == 'false':
        submit = False
    else:
        submit = False
        write_message('Warning upload_FTP parameter is not'
                      ' a valid Boolean (True/False)\n'
                      'the default value \'False\' has been used!\n')
    if threshold_date:
        import time
        date_format = "%Y-%m-%d"
        try:
            date = datetime(*(time.strptime(threshold_date, date_format)[0:6]))
            threshold_date = date.strftime('%Y-%m-%d')
        except ValueError:
            write_message('Error threshold_date parameter is not '
                          'in the right format. It should be in '
                          'form "YYYY-MM-DD".')
            task_update_status("ERROR")
            return

    if not exists(CFG_CONSYN_OUT_DIRECTORY):
        makedirs(CFG_CONSYN_OUT_DIRECTORY)
    out_folder = CFG_CONSYN_OUT_DIRECTORY
    journal_mappings = get_kbs()['journals'][1]
    els = ElsevierPackage(CONSYN=True, journal_mappings=journal_mappings)

    consyn_files = join(out_folder, "consyn-files")
    consyn_files = consyn_files.lstrip()

    if package:
        xml_files = extract_package(package, delete_zip, out_folder,
                                    new_sources)
    elif package_list_file:
        package_list = []
        with open(package_list_file, 'r') as package_file:
            for line in package_file:
                line = line.strip()
                if line:
                    package_list.append(line)
        xml_files = extract_multiple_packages(package_list, delete_zip,
                                              new_sources, out_folder)
    elif feed_file:
        entries = parse_feed(feed_file)
        links = [a[0] for a in entries]
        package_list = [a[1] for a in entries]
        package_list = [
            join(CFG_CONSYN_OUT_DIRECTORY, a) for a in package_list
        ]
        for package in package_list:
            task_sleep_now_if_required()
            if not exists(package):
                index = package_list.index(package)
                link = links[index]
                link = link.replace(' ', '%20')
                try:
                    message = ("Downloading %s to %s\n" % (link, package))
                    write_message(message)
                    download_url(link, "zip", package, 5, 60.0)
                    package_list.append(package)
                except InvenioFileDownloadError as err:
                    message = "URL could not be opened: " + link
                    write_message(message)
                    write_message(str(err))
                    write_message(traceback.format_exc()[:-1])
                    task_update_status("CERROR")
                    continue
            xml_files = extract_multiple_packages(package_list, delete_zip,
                                                  new_sources, out_folder)
    else:
        feeds_folder = join(CFG_CONSYN_OUT_DIRECTORY, 'feeds')
        if not exists(feeds_folder):
            makedirs(feeds_folder)
        date = datetime.now().strftime("%Y.%m.%d")
        feed_location = "feed-%s.xml" % date
        feed_location = join(feeds_folder, feed_location)
        xml_files = download_feed(feed_url, delete_zip, new_sources,
                                  out_folder, feed_location)
    task_update_progress("Converting files 2/3...")
    task_sleep_now_if_required()
    results = convert_files(xml_files,
                            els,
                            prefix=consyn_files,
                            threshold_date=threshold_date)
    for dummy, (status_code, result) in results.iteritems():
        if status_code == StatusCodes.OK:
            new_files.append(result)
    task_update_progress("Compiling output 3/3...")
    task_sleep_now_if_required()
    create_collection(batch_size, new_files, new_sources, out_folder, submit)
    if feed_location and not _errors_detected:
        remove(feed_location)
    for error in _errors_detected:
        write_message(str(error))
示例#16
0
class ElsevierScoap3PackageTests(unittest.TestCase):

    """Test extraction of Elsevier records in SCOAP3."""

    def setUp(self):
        """Setup initial document."""
        self.els = ElsevierPackage(no_harvest=True)
        self.document = parse(pkg_resources.resource_filename(
            'harvestingkit.tests',
            os.path.join('data', 'sample_elsevier_document_output.xml')
        ))
        self.document540 = parse(pkg_resources.resource_filename(
            'harvestingkit.tests',
            os.path.join('data', 'sample_elsevier_540_document_output.xml')
        ))
        self.document550 = parse(pkg_resources.resource_filename(
            'harvestingkit.tests',
            os.path.join('data', 'sample_elsevier_550_document_output.xml')
        ))
        self.document560 = parse(pkg_resources.resource_filename(
            'harvestingkit.tests',
            os.path.join('data', 'sample_elsevier_560_document_output.xml')
        ))
        self.document560_1 = parse(pkg_resources.resource_filename(
            'harvestingkit.tests',
            os.path.join('data', 'sample_elsevier_560_document_output_1.xml')
        ))


    ## tests for documents
    def test_doi(self):
         """Test that doi is good."""
         self.assertEqual(self.els._get_doi(self.document), '10.1016/j.nuclphysb.2015.07.011')

    def test_540_doi(self):
         """Test that doi is good."""
         self.assertEqual(self.els._get_doi(self.document540), '10.1016/j.cell.2015.03.041')

    def test_550_doi(self):
         """Test that doi is good."""
         self.assertEqual(self.els._get_doi(self.document550), '10.1016/j.nuclphysb.2018.12.001')

    def test_560_doi(self):
         """Test that doi is good."""
         self.assertEqual(self.els._get_doi(self.document560), '10.1016/j.physletb.2019.06.020')

    def test_title(self):
        """Test that title is good."""
        self.assertEqual(self.els.get_title(self.document), 'F-theory vacua with <math altimg="si1.gif" xmlns="http://www.w3.org/1998/Math/MathML"><msub><mrow><mi mathvariant="double-struck">Z</mi></mrow><mrow><mn>3</mn></mrow></msub></math> gauge symmetry')

    def test_540_title(self):
        """Test that title is good."""
        self.assertEqual(self.els.get_title(self.document540), 'Bending Gradients: How the Intestinal Stem Cell Gets Its Home')

    def test_550_title(self):
        """Test that title is good."""
        self.assertEqual(self.els.get_title(self.document550), 'Revisiting RGEs for general gauge theories')

    def test_560_title(self):
        """Test that title is good."""
        self.assertEqual(self.els.get_title(self.document560), 'A two Higgs doublet model for dark matter and neutrino masses')

    def test_doctype(self):
        """Test that doctype is good."""
        self.assertEqual(self.els.get_doctype(self.document), '')

    def test_540_doctype(self):
        """Test that doctype is good."""
        self.assertEqual(self.els.get_doctype(self.document540), '')

    def test_550_doctype(self):
        """Test that doctype is good."""
        self.assertEqual(self.els.get_doctype(self.document550), '')

    def test_560_doctype(self):
        """Test that doctype is good."""
        self.assertEqual(self.els.get_doctype(self.document560), '')

    def test_abstract(self):
        """Test that abstract is good."""
        abstract = 'Discrete gauge groups naturally arise in F-theory compactifications on genus-one fibered '\
                   'Calabi\xe2\x80\x93Yau manifolds. Such geometries appear in families that are parameterized '\
                   'by the Tate\xe2\x80\x93Shafarevich group of the genus-one fibration. While the F-theory '\
                   'compactification on any element of this family gives rise to the same physics, the corresponding '\
                   'M-theory compactifications on these geometries differ and are obtained by a fluxed circle '\
                   'reduction of the former. In this note, we focus on an element of order three in the '\
                   'Tate\xe2\x80\x93Shafarevich group of the general cubic. We discuss how the different M-theory '\
                   'vacua and the associated discrete gauge groups can be obtained by Higgsing of a pair of '\
                   'five-dimensional U(1) symmetries. The Higgs fields arise from vanishing cycles in '\
                   '<math altimg="si2.gif" xmlns="http://www.w3.org/1998/Math/MathML"><msub><mrow><mi>I</mi></mrow>'\
                   '<mrow><mn>2</mn></mrow></msub></math> -fibers that appear at certain codimension two loci in the '\
                   'base. We explicitly identify all three curves that give rise to the corresponding Higgs fields. '\
                   'In this analysis the investigation of different resolved phases of the underlying geometry plays '\
                   'a crucial r\xc3\xb4le.'

        self.assertEqual(self.els.get_abstract(self.document), abstract)

    def test_540_abstract(self):
        """Test that abstract is good."""
        abstract = 'We address the mechanism by which adult intestinal stem cells (ISCs) become localized to the '\
                   'base of each villus during embryonic development. We find that, early in gut development, '\
                   'proliferating progenitors expressing ISC markers are evenly distributed throughout the '\
                   'epithelium, in both the chick and mouse. However, as the villi form, the putative stem cells '\
                   'become restricted to the base of the villi. This shift in the localization is driven by '\
                   'mechanically influenced reciprocal signaling between the epithelium and underlying mesenchyme. '\
                   'Buckling forces physically distort the shape of the morphogenic field, causing local maxima of '\
                   'epithelial signals, in particular Shh, at the tip of each villus. This induces a suite\xc2\xa0of '\
                   'high-threshold response genes in the underlying mesenchyme to form a signaling center called '\
                   'the\xc2\xa0\xe2\x80\x9cvillus cluster.\xe2\x80\x9d Villus cluster signals, notably Bmp4, feed '\
                   'back on the overlying epithelium to ultimately restrict the stem cells to the base of each villus.'

        self.assertEqual(self.els.get_abstract(self.document540), abstract)

    def test_keywords(self):
        """Test that keywords are good."""
        keywords = []
        self.assertEqual(self.els.get_keywords(self.document), keywords)

    def test_540_keywords(self):
        """Test that keywords are good."""
        keywords = []
        self.assertEqual(self.els.get_keywords(self.document540), keywords)

    def test_add_orcids(self):
        """Test that orcids are good.

        According to "Tag by Tag The Elsevier DTD 5 Family of XML DTDs" orcids will be
        distributed as an attribute in the ce:author tag.
        """
        xml_author = Element('ce:author')
        xml_author.setAttribute('orcid', '1234-5678-4321-8765')
        authors = [{}]

        # _add_orcids will alter the authors list
        self.els._add_orcids(authors, [xml_author])

        self.assertEqual(authors, [{'orcid': 'ORCID:1234-5678-4321-8765'}])

    def test_authors(self):
        """Test that authors are good."""
        authors = [{'affiliation': ['Department of Physics and Astronomy, University of Pennsylvania, Philadelphia, PA, 19104-6396, USA'],
                    'cross_ref': ['aff0010'],
                    'surname': 'Cveti\xc4\x8d',
                    'given_name': 'Mirjam',
                    'email': '*****@*****.**'},
                   {'affiliation': ['Department of Physics and Astronomy, University of Pennsylvania, Philadelphia, PA, 19104-6396, USA',
                                    'Department of Mathematics, University of Pennsylvania, Philadelphia, PA, 19104-6396, USA'],
                    'cross_ref': ['aff0010', 'aff0020', 'cr0010'],
                    'surname': 'Donagi',
                    'given_name': 'Ron',
                    'email': '*****@*****.**'},
                   {'affiliation': ['Theory Group, Physics Department, CERN, Geneva 23, CH-1211, Switzerland'], 'cross_ref': ['aff0030'], 'surname': 'Klevers', 'given_name': 'Denis', 'email': '*****@*****.**'},
                   {'affiliation': ['Department of Physics and Astronomy, University of Pennsylvania, Philadelphia, PA, 19104-6396, USA'], 'cross_ref': ['aff0010'], 'surname': 'Piragua', 'given_name': 'Hernan', 'email': '*****@*****.**'},
                   {'affiliation': ['Department of Physics and Astronomy, University of Pennsylvania, Philadelphia, PA, 19104-6396, USA'], 'cross_ref': ['aff0010'], 'surname': 'Poretschkin', 'given_name': 'Maximilian', 'email': '*****@*****.**'}]
        self.assertEqual(self.els.get_authors(self.document), authors)

    def test_540_authors(self):
        """Test that authors are good."""
        authors = [{'affiliation': ['Department of Genetics, Harvard Medical School, Boston, MA 02115, USA'],
                    'surname': 'Shyer',
                    'given_name': 'Amy\xc2\xa0E.',
                    'cross_ref': ['aff1', 'fn1']},
                   {'affiliation': ['Department of Genetics, Harvard Medical School, Boston, MA 02115, USA'], 'surname': 'Huycke', 'given_name': 'Tyler\xc2\xa0R.', 'cross_ref': ['aff1']},
                   {'affiliation': ['Department of Genetics, Harvard Medical School, Boston, MA 02115, USA'], 'surname': 'Lee', 'given_name': 'ChangHee', 'cross_ref': ['aff1']},
                   {'affiliation': ['School of Engineering and Applied Sciences, Harvard University, Cambridge, MA 02138, USA',
                                    'Department of Organismic and Evolutionary Biology, Harvard University, Cambridge, MA 02138, USA',
                                    'Department of Physics, Harvard University, Cambridge, MA 02138, USA',
                                    'Wyss Institute for Biologically Inspired Engineering, Harvard University, Cambridge, MA 02138, USA',
                                    'Kavli Institute for Nanobio Science and Technology, Harvard University, Cambridge, MA 02138, USA',
                                    'Department of Systems Biology, Harvard Medical School, Boston, MA 02115, USA'],
                    'surname': 'Mahadevan',
                    'given_name': 'L.',
                    'cross_ref': ['aff2', 'aff3', 'aff4', 'aff5', 'aff6', 'aff7']},
                   {'affiliation': ['Department of Genetics, Harvard Medical School, Boston, MA 02115, USA'],
                    'cross_ref': ['aff1', 'cor1'],
                    'surname': 'Tabin',
                    'given_name': 'Clifford\xc2\xa0J.',
                    'email': '*****@*****.**'}]
        self.assertEqual(self.els.get_authors(self.document540), authors)

    def test_copyright(self):
        """Test that copyright is good."""
        self.assertEqual(self.els.get_copyright(self.document), '')

    def test_540_copyright(self):
        """Test that copyright is good."""
        self.assertEqual(self.els.get_copyright(self.document540), 'Elsevier Inc.')

    def test_550_copyright(self):
        """Test that copyright is good."""
        self.assertEqual(self.els.get_copyright(self.document550), 'The Authors')

    def test_560_copyright(self):
        """Test that copyright is good."""
        self.assertEqual(self.els.get_copyright(self.document560), '')

    #Need to find a better example package for DTD5.2 version - this on doesnt have issue.xml and we have unmatching issue and main files
    @unittest.skip("Issue and main xml are not matching")
    def test_publication_information(self):
        """Test that pubinfo is good."""
        self.els._found_issues = [pkg_resources.resource_filename('harvestingkit.tests', os.path.join('data', 'sample_elsevier_issue'))]
        self.els._build_doi_mapping()
        publication_information = ('Phys.Lett.',
                                   '0370-2693',
                                   'B206',
                                   '3',
                                   '421',
                                   '426',
                                   '1988',
                                   '1988-05-26',
                                   '10.1016/j.nuclphysb.2015.07.011')
        self.assertEqual(self.els.get_publication_information(self.document), publication_information)

    def test_540_publication_information(self):
        """Test that pubinfo 540 is good."""
        self.els._found_issues = [pkg_resources.resource_filename('harvestingkit.tests', os.path.join('data', 'sample_elsevier_540_issue'))]
        self.els._build_doi_mapping()
        publication_information = ('CELL',
                                   '0092-8674',
                                   '161',
                                   '3',
                                   '569',
                                   '580',
                                   '2015',
                                   '2015-04-23',
                                   '10.1016/j.cell.2015.03.041')
        self.assertEqual(self.els.get_publication_information(self.document540), publication_information)

    def test_550_publication_information(self):
        """Test that pubinfo 550 is good."""
        self.els._found_issues = [pkg_resources.resource_filename('harvestingkit.tests', os.path.join('data', 'sample_elsevier_550_issue'))]
        self.els._build_doi_mapping()
        publication_information = ('Nuclear Physics B',
                                   '0550-3213',
                                   '939',
                                   '',
                                   '1',
                                   '48',
                                   '2019',
                                   '2019-02',
                                   '10.1016/j.nuclphysb.2018.12.001')
        self.assertEqual(self.els.get_publication_information(self.document550), publication_information)

    def test_560_publication_information(self):
        """Test that pubinfo 560 is good."""
        self.els._found_issues = [pkg_resources.resource_filename('harvestingkit.tests', os.path.join('data', 'sample_elsevier_560_issue'))]
        self.els._build_doi_mapping()
        publication_information = ('Physics letters B',
                                   '0370-2693',
                                   '795',
                                   '',
                                   '1',
                                   '6',
                                   '2019',
                                   '2019-08-10',
                                   '10.1016/j.physletb.2019.05.043')
        self.assertEqual(self.els.get_publication_information(self.document560_1), publication_information)

    @unittest.skip("Not done yet")
    def test_references(self):
        """Test that references is good."""
        references = [('[1]', ['Belavin, A.A.', 'Polyakov, A.M.', 'Zamolodchikov, A.B.'], '', 'Nucl. Phys. B 241 1984', '333', '', '241', '1984', [], None, True, '', 'Nucl. Phys. B', '', [], ''),
                      ('[2]', ['Friedan, D.', 'Qiu, Z.', 'Shenker, S.H.'], '', 'Phys. Rev. Lett. 52 1984', '1575', '', '52', '1984', [], None, True, '', 'Phys. Rev. Lett.', '', [], ''),
                      ('[3]', ['Cardy, J.L.'], '', 'Nucl. Phys. B 270 1986', '186', '', '270', '1986', [], None, True, '[FS16]', 'Nucl. Phys. B', '', [], ''),
                      ('[3]', ['Capelli, A.', 'Itzykson, C.', 'Zuber, J.-B.'], '', 'Nucl. Phys. B 280 1987', '445', '', '280', '1987', [], None, True, '[FS 18]', 'Nucl. Phys. B', '', [], ''),
                      ('[3]', ['Capelli, A.', 'Itzykson, C.', 'Zuber, J.-B.'], '', 'Commun. Math. Phys. 113 1987', '1', '', '113', '1987', [], None, True, '', 'Commun. Math. Phys.', '', [], ''),
                      ('[3]', ['Gepner, D.'], '', 'Nucl. Phys. B 287 1987', '111', '', '287', '1987', [], None, True, '', 'Nucl. Phys. B', '', [], ''),
                      ('[4]', [], '', '', '', '', '', '', 'G. Anderson and G. Moore, IAS preprint IASSNS-HEP-87/69.', None, [], '', '', '', [], ''),
                      ('[5]', ['Friedan, D.', 'Shenker, S.'], '', 'Phys. Lett. B 175 1986', '287', '', '175', '1986', [], None, True, '', 'Phys. Lett. B', '', [], ''),
                      ('[5]', ['Friedan, D.', 'Shenker, S.'], '', 'Nucl. Phys. B 281 1987', '509', '', '281', '1987', [], None, True, '', 'Nucl. Phys. B', '', [], ''),
                      ('[6]', [], '', '', '', '', '', '', 'E. Martinec and S. Shenker, unpublished.', None, [], '', '', '', [], ''),
                      ('[7]', ['Vafa, C.'], '', 'Phys. Lett. B 199 1987', '195', '', '199', '1987', [], None, True, '', 'Phys. Lett. B', '', [], ''),
                      ('[8]', ['Harer, J.'], '', 'Inv. Math. 72 1983', '221', '', '72', '1983', [], None, True, '', 'Inv. Math.', '', [], ''),
                      ('[9]', ['Tsuchiya, A.', 'Kanie, Y.'], '', 'Lett. Math. Phys. 13 1987', '303', '', '13', '1987', [], None, True, '', 'Lett. Math. Phys.', '', [], ''),
                      ('[10]', [], '', '', '', '', '', '', 'E. Verlinde, to be published.', None, [], '', '', '', [], ''),
                      ('[11]', ['Dehn, M.'], '', 'Acta Math. 69 1938', '135', '', '69', '1938', [], None, True, '', 'Acta Math.', '', [], ''),
                      ('[12]', [], '', '', '', '', '', '', 'D. Friedan and S. Shenker, unpublished.', None, [], '', '', '', [], ''),
                      ('[13]', [], '', '', '', '', '', '', 'J. Harvey, G. Moore, and C. Vafa, Nucl. Phys. B, to be published', None, [], '', '', '', [], ''),
                      ('[14]', [], '', '', '', '', '', '', 'D. Kastor, E. Martinec and Z. Qiu, E. Fermi Institute preprint EFI-87-58.', None, [], '', '', '', [], '')]
        for ref in self.els.get_references(self.document):
            self.assertTrue(ref in references)

    @unittest.skip("Not done yet")
    def test_get_record(self):
        """Test that the whole record is correct."""
        source_file = pkg_resources.resource_filename(
            'harvestingkit.tests',
            os.path.join('data', 'sample_elsevier_document_output.xml'))
        marc_file = pkg_resources.resource_filename(
            'harvestingkit.tests',
            os.path.join('data', 'sample_elsevier_record.xml')
        )
        self.els._found_issues = [pkg_resources.resource_filename('harvestingkit.tests', os.path.join('data', 'sample_elsevier_issue'))]
        self.els._build_doi_mapping()
        xml = self.els.get_record(source_file, test=True, no_pdf=True)
        with open(marc_file) as marc:
            result = marc.read()
        self.assertEqual(xml.strip(), result.strip())
示例#17
0
class ElsevierPackageTests(unittest.TestCase):
    """Test extraction of Elsevier records."""
    def setUp(self):
        """Setup initial document."""
        self.els = ElsevierPackage(CONSYN=True,
                                   journal_mappings=journal_mappings)
        self.document = parse(
            pkg_resources.resource_filename(
                'harvestingkit.tests',
                os.path.join('data', 'sample_consyn_record.xml')))

    def test_doi(self):
        """Test that doi is good."""
        self.assertEqual(self.els._get_doi(self.document),
                         '10.1016/0370-2693(88)91603-6')

    def test_title(self):
        """Test that title is good."""
        self.assertEqual(self.els.get_title(self.document),
                         'Toward classification of conformal theories')

    def test_doctype(self):
        """Test that doctype is good."""
        self.assertEqual(self.els.get_doctype(self.document), 'fla')

    def test_abstract(self):
        """Test that abstract is good."""
        abstract = 'By studying the representations of the mapping class groups '\
                   'which arise in 2D conformal theories we derive some restrictions '\
                   'on the value of the conformal dimension h i of operators and the '\
                   'central charge c of the Virasoro algebra. As a simple application '\
                   'we show that when there are a finite number of operators in the '\
                   'conformal algebra, the h i and c are all rational.'
        self.assertEqual(self.els.get_abstract(self.document), abstract)

    def test_keywords(self):
        """Test that keywords are good."""
        keywords = [
            'Heavy quarkonia', 'Quark gluon plasma', 'Mott effect', 'X(3872)'
        ]
        self.assertEqual(self.els.get_keywords(self.document), keywords)

    def test_add_orcids(self):
        """Test that orcids are good.

        According to "Tag by Tag The Elsevier DTD 5 Family of XML DTDs" orcids will be
        distributed as an attribute in the ce:author tag.
        """
        xml_author = Element('ce:author')
        xml_author.setAttribute('orcid', '1234-5678-4321-8765')
        authors = [{}]

        # _add_orcids will alter the authors list
        self.els._add_orcids(authors, [xml_author])

        self.assertEqual(authors, [{'orcid': 'ORCID:1234-5678-4321-8765'}])

    def test_authors(self):
        """Test that authors are good."""
        authors = [{
            'affiliation': [
                'Lyman Laboratory of Physics, Harvard University, Cambridge, MA 02138, USA'
            ],
            'surname':
            'Vafa',
            'given_name':
            'Cumrun',
            'orcid':
            'ORCID:1234-5678-4321-8765'
        }]
        self.assertEqual(self.els.get_authors(self.document), authors)

    def test_copyright(self):
        """Test that copyright is good."""
        self.assertEqual(self.els.get_copyright(self.document),
                         'Copyright unknown. Published by Elsevier B.V.')

    def test_publication_information(self):
        """Test that pubinfo is good."""
        publication_information = ('Phys.Lett.', '0370-2693', 'B206', '3',
                                   '421', '426', '1988', '1988-05-26',
                                   '10.1016/0370-2693(88)91603-6')
        self.assertEqual(self.els.get_publication_information(self.document),
                         publication_information)

    def test_publication_date_oa(self):
        """Test that date is good from openAccessEffective."""
        data = """
        <doc xmlns:oa="http://vtw.elsevier.com/data/ns/properties/OpenAccess-1/">
        <oa:openAccessInformation>
          <oa:openAccessStatus xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
            http://vtw.elsevier.com/data/voc/oa/OpenAccessStatus#Full
          </oa:openAccessStatus>
          <oa:openAccessEffective xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">2014-11-11T08:38:44Z</oa:openAccessEffective>
          <oa:sponsor xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
            <oa:sponsorName>SCOAP&#xB3; - Sponsoring Consortium for Open Access Publishing in Particle Physics</oa:sponsorName>
            <oa:sponsorType>http://vtw.elsevier.com/data/voc/oa/SponsorType#FundingBody</oa:sponsorType>
          </oa:sponsor>
          <oa:userLicense xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">http://creativecommons.org/licenses/by/3.0/</oa:userLicense>
        </oa:openAccessInformation>
        </doc>"""
        doc = parseString(data)
        self.assertEqual(self.els.get_publication_date(doc), "2014-11-11")

    def test_publication_date_cover_display(self):
        """Test that date is good from coverDisplayDate."""
        data = """
        <doc xmlns:prism="http://vtw.elsevier.com/data/ns/properties/OpenAccess-1/">
          <prism:coverDisplayDate>December 2014</prism:coverDisplayDate>
        </doc>"""
        doc = parseString(data)
        self.assertEqual(self.els.get_publication_date(doc), "2014-12")

    def test_publication_date_cover_display_full(self):
        """Test that date is good from coverDisplayDate."""
        data = """
        <doc xmlns:prism="http://vtw.elsevier.com/data/ns/properties/OpenAccess-1/">
          <prism:coverDisplayDate>1 December 2014</prism:coverDisplayDate>
        </doc>"""
        doc = parseString(data)
        self.assertEqual(self.els.get_publication_date(doc), "2014-12-01")

    def test_publication_date_cover(self):
        """Test that date is good."""
        data = """
        <doc xmlns:prism="http://vtw.elsevier.com/data/ns/properties/OpenAccess-1/">
          <prism:coverDisplayDate>April 2011</prism:coverDisplayDate>
          <prism:coverDate>2011-04-01</prism:coverDate>
        </doc>"""
        doc = parseString(data)
        self.assertEqual(self.els.get_publication_date(doc), "2011-04-01")

    def test_references(self):
        """Test that references is good."""
        references = [
            ('[1]', ['Belavin, A.A.', 'Polyakov, A.M.', 'Zamolodchikov, A.B.'
                     ], '', 'Nucl. Phys. B 241 1984', '333', '', '241', '1984',
             [], None, True, '', 'Nucl. Phys. B', '', [], '', []),
            ('[2]', ['Friedan, D.', 'Qiu, Z.', 'Shenker, S.H.'
                     ], '', 'Phys. Rev. Lett. 52 1984', '1575', '', '52',
             '1984', [], None, True, '', 'Phys. Rev. Lett.', '', [], '', []),
            ('[3]', ['Cardy, J.L.'
                     ], '', 'Nucl. Phys. B 270 1986', '186', '', '270', '1986',
             [], None, True, '[FS16]', 'Nucl. Phys. B', '', [], '', []),
            ('[3]', ['Capelli, A.', 'Itzykson, C.', 'Zuber, J.-B.'
                     ], '', 'Nucl. Phys. B 280 1987', '445', '', '280', '1987',
             [], None, True, '[FS 18]', 'Nucl. Phys. B', '', [], '', []),
            ('[3]', ['Capelli, A.', 'Itzykson, C.', 'Zuber, J.-B.'], '',
             'Commun. Math. Phys. 113 1987', '1', '', '113', '1987', [], None,
             True, '', 'Commun. Math. Phys.', '', [], '', []),
            ('[3]', ['Gepner, D.'
                     ], '', 'Nucl. Phys. B 287 1987', '111', '', '287', '1987',
             [], None, True, '', 'Nucl. Phys. B', '', [], '', []),
            ('[4]', [], '', '', '', '', '', '',
             'G. Anderson and G. Moore, IAS preprint IASSNS-HEP-87/69.', None,
             [], '', '', '', [], '', []),
            ('[5]', ['Friedan, D.', 'Shenker, S.'
                     ], '', 'Phys. Lett. B 175 1986', '287', '', '175', '1986',
             [], None, True, '', 'Phys. Lett. B', '', [], '', []),
            ('[5]', ['Friedan, D.', 'Shenker, S.'
                     ], '', 'Nucl. Phys. B 281 1987', '509', '', '281', '1987',
             [], None, True, '', 'Nucl. Phys. B', '', [], '', []),
            ('[6]', [], '', '', '', '', '', '',
             'E. Martinec and S. Shenker, unpublished.', None, [], '', '', '',
             [], '', []),
            ('[7]', ['Vafa, C.'
                     ], '', 'Phys. Lett. B 199 1987', '195', '', '199', '1987',
             [], None, True, '', 'Phys. Lett. B', '', [], '', []),
            ('[8]', ['Harer, J.'], '', 'Inv. Math. 72 1983', '221', '', '72',
             '1983', [], None, True, '', 'Inv. Math.', '', [], '', []),
            ('[9]', ['Tsuchiya, A.', 'Kanie, Y.'
                     ], '', 'Lett. Math. Phys. 13 1987', '303', '', '13',
             '1987', [], None, True, '', 'Lett. Math. Phys.', '', [], '', []),
            ('[10]', [], '', '', '', '', '', '',
             'E. Verlinde, to be published.', None, [], '', '', '', [], '',
             []),
            ('[11]', ['Dehn, M.'], '', 'Acta Math. 69 1938', '135', '', '69',
             '1938', [], None, True, '', 'Acta Math.', '', [], '', []),
            ('[12]', [], '', '', '', '', '', '',
             'D. Friedan and S. Shenker, unpublished.', None, [], '', '', '',
             [], '', []),
            ('[13]', [], '', '', '', '', '', '',
             'J. Harvey, G. Moore, and C. Vafa, Nucl. Phys. B, to be published',
             None, [], '', '', '', [], '', []),
            ('[14]', [], '', '', '', '', '', '',
             'D. Kastor, E. Martinec and Z. Qiu, E. Fermi Institute preprint EFI-87-58.',
             None, [], '', '', '', [], '', []),
            ('[15]', ['Adeva, B.'], '', 'Phys. Rev. D 58 1998', '112001', '',
             '58', '1998', [], None, True, '', 'Phys. Rev. D', '', [], '', [])
        ]
        for ref in self.els.get_references(self.document):
            self.assertTrue(ref in references)

    def test_get_record(self):
        """Test that the whole record is correct."""
        source_file = pkg_resources.resource_filename(
            'harvestingkit.tests',
            os.path.join('data', 'sample_consyn_record.xml'))
        marc_file = pkg_resources.resource_filename(
            'harvestingkit.tests',
            os.path.join('data', 'sample_consyn_output.xml'))
        xml = self.els.get_record(source_file, test=True)
        with open(marc_file) as marc:
            result = marc.read()
        self.assertEqual(xml.strip(), result.strip())