示例#1
0
    def test_10_process_record_03_aam_no_licence(self):
        def mock_get_md(*args, **kwargs):
            md = epmcmod.EPMCMetadata(json.loads(open(EPMC_MD, "r").read()))
            return md, 1.0

        def mock_get_ft(*args, **kwargs):
            data = open(EPMC_FT, "r").read()
            xml = etree.fromstring(data)
            l = xml.xpath("//license")
            l[0].getparent().remove(l[0])
            s = etree.tostring(xml)
            return epmc.EPMCFullText(s)

        def mock_doaj(*args, **kwargs):
            return True

        def mock_romeo(*args, **kwargs):
            pass

        def mock_core(*args, **kwargs):
            pass

        workflow.get_epmc_md = mock_get_md
        workflow.get_epmc_fulltext = mock_get_ft
        workflow.doaj_lookup = mock_doaj
        workflow.embargo = mock_romeo
        workflow.ou_core = mock_core

        record = models.Record()
        record.pmcid = "PMC4219345"
        record.id = record.makeid()
        oag = []
        msg = workflow.WorkflowMessage(record=record, oag_register=oag)
        workflow.process_record(msg)

        assert record.confidence == 1.0
        assert record.pmcid == "PMC4219345"
        assert record.pmid == "24279897"
        assert record.doi == "10.1186/1471-2121-14-52"
        assert record.in_epmc is True
        assert record.is_oa is False
        assert len(record.issn) == 1
        assert "1471-2121" in record.issn
        assert record.id is not None  # implies it has been saved
        assert record.has_ft_xml is True
        assert record.aam is True
        assert record.aam_from_xml is True
        assert record.licence_type is None
        assert record.licence_source is None
        assert record.journal_type == "oa"
        assert len(oag) == 1
        assert oag[0]["id"] == "PMC4219345"
        assert oag[0]["type"] == "pmcid"
示例#2
0
    def test_10_process_record_02_no_md(self):
        def mock_get_md(*args, **kwargs):
            return None, None

        workflow.get_epmc_md = mock_get_md

        record = models.Record()
        record.pmcid = "PMC4219345"
        record.id = record.makeid()
        oag = []
        msg = workflow.WorkflowMessage(record=record, oag_register=oag)
        workflow.process_record(msg)

        assert record.confidence is None
        assert len(record.provenance) == 1
        assert len(oag) == 0
示例#3
0
    def test_10_process_record_02_no_md(self):
        def mock_get_md(*args, **kwargs):
            return None, None

        workflow.get_epmc_md = mock_get_md

        record = models.Record()
        record.pmcid = "PMC4219345"
        record.id = record.makeid()
        oag = []
        msg = workflow.WorkflowMessage(record=record, oag_register=oag)
        workflow.process_record(msg)

        assert record.confidence is None
        assert len(record.provenance) == 1
        assert len(oag) == 0
示例#4
0
    def test_10_process_record_01_everything(self):
        def mock_get_md(*args, **kwargs):
            md = epmcmod.EPMCMetadata(json.loads(open(EPMC_MD, "r").read()))
            return md, 1.0

        def mock_get_ft(*args, **kwargs):
            data = open(EPMC_FT, "r").read()
            return epmc.EPMCFullText(data)

        def mock_doaj(*args, **kwargs):
            return False

        def mock_romeo(*args, **kwargs):
            pass

        def mock_core(*args, **kwargs):
            pass

        workflow.get_epmc_md = mock_get_md
        workflow.get_epmc_fulltext = mock_get_ft
        workflow.doaj_lookup = mock_doaj
        workflow.embargo = mock_romeo
        workflow.ou_core = mock_core

        record = models.Record()
        record.pmcid = "PMC4219345"
        record.id = record.makeid()
        oag = []
        msg = workflow.WorkflowMessage(record=record, oag_register=oag)
        workflow.process_record(msg)

        assert record.confidence == 1.0
        assert record.pmcid == "PMC4219345"
        assert record.pmid == "24279897"
        assert record.doi == "10.1186/1471-2121-14-52"
        assert record.in_epmc is True
        assert record.is_oa is False
        assert len(record.issn) == 1
        assert "1471-2121" in record.issn
        assert record.id is not None  # implies it has been saved
        assert record.has_ft_xml is True
        assert record.aam is True
        assert record.aam_from_xml is True
        assert record.licence_type == "cc-by"
        assert record.licence_source == "epmc_xml"
        assert record.journal_type == "hybrid"
        assert len(oag) == 0
示例#5
0
    def test_10_process_record_03_aam_no_licence(self):
        def mock_get_md(*args, **kwargs):
            md = epmcmod.EPMCMetadata(json.loads(open(EPMC_MD, "r").read()))
            return md, 1.0

        def mock_get_ft(*args, **kwargs):
            data = open(EPMC_FT, "r").read()
            xml = etree.fromstring(data)
            l = xml.xpath("//license")
            l[0].getparent().remove(l[0])
            s = etree.tostring(xml)
            return epmc.EPMCFullText(s)

        def mock_doaj(*args, **kwargs):
            return True

        workflow.get_epmc_md = mock_get_md
        workflow.get_epmc_fulltext = mock_get_ft
        workflow.doaj_lookup = mock_doaj

        record = models.Record()
        record.pmcid = "PMC4219345"
        record.id = record.makeid()
        oag = []
        msg = workflow.WorkflowMessage(record=record, oag_register=oag)
        workflow.process_record(msg)

        assert record.confidence == 1.0
        assert record.pmcid == "PMC4219345"
        assert record.pmid == "24279897"
        assert record.doi == "10.1186/1471-2121-14-52"
        assert record.in_epmc is True
        assert record.is_oa is False
        assert len(record.issn) == 1
        assert "1471-2121" in record.issn
        assert record.id is not None # implies it has been saved
        assert record.has_ft_xml is True
        assert record.aam is True
        assert record.aam_from_xml is True
        assert record.licence_type is None
        assert record.licence_source is None
        assert record.journal_type == "oa"
        assert len(oag) == 1
        assert oag[0]["id"] == "PMC4219345"
        assert oag[0]["type"] == "pmcid"
示例#6
0
    def test_10_process_record_04_licence_no_aam(self):
        def mock_get_md(*args, **kwargs):
            md = epmcmod.EPMCMetadata(json.loads(open(EPMC_MD, "r").read()))
            return md, 1.0

        def mock_get_ft(*args, **kwargs):
            data = open(EPMC_FT, "r").read()
            xml = etree.fromstring(data)
            aids = xml.xpath("//article-id[@pub-id-type='manuscript']")
            aids[0].getparent().remove(aids[0])
            s = etree.tostring(xml)
            return epmc.EPMCFullText(s)

        def mock_doaj(*args, **kwargs):
            return True

        workflow.get_epmc_md = mock_get_md
        workflow.get_epmc_fulltext = mock_get_ft
        workflow.doaj_lookup = mock_doaj

        record = models.Record()
        record.pmcid = "PMC4219345"
        record.id = record.makeid()
        oag = []
        msg = workflow.WorkflowMessage(record=record, oag_register=oag)
        workflow.process_record(msg)

        assert record.confidence == 1.0
        assert record.pmcid == "PMC4219345"
        assert record.pmid == "24279897"
        assert record.doi == "10.1186/1471-2121-14-52"
        assert record.in_epmc is True
        assert record.is_oa is False
        assert len(record.issn) == 1
        assert "1471-2121" in record.issn
        assert record.id is not None  # implies it has been saved
        assert record.has_ft_xml is True
        assert record.aam is False
        assert record.aam_from_xml is True
        assert record.licence_type == "cc-by"
        assert record.licence_source == "epmc_xml"
        assert record.journal_type == "oa"
        assert len(oag) == 0
示例#7
0
    def test_10_process_record_05_no_ft(self):
        def mock_get_md(*args, **kwargs):
            md = epmcmod.EPMCMetadata(json.loads(open(EPMC_MD, "r").read()))
            return md, 1.0

        def mock_get_ft(*args, **kwargs):
            return None

        def mock_doaj(*args, **kwargs):
            return False

        workflow.get_epmc_md = mock_get_md
        workflow.get_epmc_fulltext = mock_get_ft
        workflow.doaj_lookup = mock_doaj

        record = models.Record()
        record.pmcid = "PMC4219345"
        record.id = record.makeid()
        oag = []
        msg = workflow.WorkflowMessage(record=record, oag_register=oag)
        workflow.process_record(msg)

        assert record.confidence == 1.0
        assert record.pmcid == "PMC4219345"
        assert record.pmid == "24279897"
        assert record.doi == "10.1186/1471-2121-14-52"
        assert record.in_epmc is True
        assert record.is_oa is False
        assert len(record.issn) == 1
        assert "1471-2121" in record.issn
        assert record.id is not None # implies it has been saved
        assert record.has_ft_xml is False
        assert record.aam is None
        assert record.aam_from_xml is False
        assert record.licence_type is None
        assert record.licence_source is None
        assert record.journal_type == "hybrid"
        assert len(oag) == 1
        assert oag[0]["id"] == "PMC4219345"
        assert oag[0]["type"] == "pmcid"
示例#8
0
    def test_10_process_record_05_no_ft(self):
        def mock_get_md(*args, **kwargs):
            md = epmcmod.EPMCMetadata(json.loads(open(EPMC_MD, "r").read()))
            return md, 1.0

        def mock_get_ft(*args, **kwargs):
            return None

        def mock_doaj(*args, **kwargs):
            return False

        workflow.get_epmc_md = mock_get_md
        workflow.get_epmc_fulltext = mock_get_ft
        workflow.doaj_lookup = mock_doaj

        record = models.Record()
        record.pmcid = "PMC4219345"
        record.id = record.makeid()
        oag = []
        msg = workflow.WorkflowMessage(record=record, oag_register=oag)
        workflow.process_record(msg)

        assert record.confidence == 1.0
        assert record.pmcid == "PMC4219345"
        assert record.pmid == "24279897"
        assert record.doi == "10.1186/1471-2121-14-52"
        assert record.in_epmc is True
        assert record.is_oa is False
        assert len(record.issn) == 1
        assert "1471-2121" in record.issn
        assert record.id is not None  # implies it has been saved
        assert record.has_ft_xml is False
        assert record.aam is None
        assert record.aam_from_xml is False
        assert record.licence_type is None
        assert record.licence_source is None
        assert record.journal_type == "hybrid"
        assert len(oag) == 1
        assert oag[0]["id"] == "PMC4219345"
        assert oag[0]["type"] == "pmcid"
示例#9
0
    record.upload_id = job.id
    record.upload_pos = 1

    if args.type.lower() == "pmcid":
        record.pmcid = args.identifier
    elif args.type.lower() == "pmid":
        record.pmid = args.identifier
    elif args.type.lower() == "doi":
        record.doi = args.identifier
    record.save()

    time.sleep(2)

    oag_register = []
    msg = workflow.WorkflowMessage(job, record, oag_register)
    workflow.process_record(msg)
    workflow.process_oag(oag_register, job)

    time.sleep(2)

    i = 0
    while True:
        i += 1
        pcc = job.pc_complete
        print i, job.pc_complete, "%",
        sys.stdout.flush()
        if int(pcc) == 100:
            break
        time.sleep(2)

    out = workflow.output_csv(job)
示例#10
0
    record.upload_id = job.id
    record.upload_pos = 1

    if args.type.lower() == "pmcid":
        record.pmcid = args.identifier
    elif args.type.lower() == "pmid":
        record.pmid = args.identifier
    elif args.type.lower() == "doi":
        record.doi = args.identifier
    record.save()

    time.sleep(2)

    oag_register = []
    msg = workflow.WorkflowMessage(job, record, oag_register)
    workflow.process_record(msg)
    workflow.process_oag(oag_register, job)

    time.sleep(2)

    i = 0
    while True:
        i += 1
        pcc = job.pc_complete
        print i, job.pc_complete, "%",
        sys.stdout.flush()
        if int(pcc) == 100:
            break
        time.sleep(2)

    out = workflow.output_csv(job)