def test_01_full_synchronous(self): # first pretend to do the file upload, using the test submission fh = FileHandle(open(TEST_SUBMISSION, "r").read()) job = workflow.csv_upload(fh, "test_submission.csv", "*****@*****.**") time.sleep(2) # now call the overall job processor workflow.process_jobs() # once the job processor is finished, we can export the csv for the job we ran csvcontent = workflow.output_csv(job) with codecs.open(os.path.join(UPLOAD_DIR, "output.csv"), "wb") as f: f.write(csvcontent)
def test_02_full_asynchronous(self): # first pretend to do the file upload, using the test submission fh = FileHandle(open(FULL_SUBMISSION, "r").read()) job = workflow.csv_upload(fh, "full_submission.csv", "*****@*****.**") time.sleep(2) # now call the overall job processor workflow.process_jobs() # once the job processor returns, we must monitor the job itself for completeness for i in range(100): pc = job.pc_complete print i, pc if int(pc) == 100: break time.sleep(2) # once the job processor is finished, we can export the csv for the job we ran csvcontent = workflow.output_csv(job) with codecs.open(os.path.join(UPLOAD_DIR, "output.csv"), "wb", "utf8") as f: f.write(csvcontent)
def test_01_export(self): # make a job - we don't much care about its content for this test job = models.SpreadsheetJob() job.save() now = datetime.now().strftime("%Y-%m-%dT%H:%M:%SZ") # make a few records for it # all fields filled in correctly r1 = models.Record() r1.pmcid = "PMC1234" r1.pmid = "1234" r1.doi = "10.1234" r1.title = "The Title" r1.has_ft_xml = True r1.in_epmc = True r1.aam = True r1.is_oa = True r1.licence_type = "CC0" r1.licence_source = "publisher" r1.journal_type = "hybrid" r1.confidence = 0.9 r1.add_provenance("test", "provenance", now) r1.upload_id = job.id r1.upload_pos = 1 r1.journal = "Journal of Science" r1.issn = ["1234-5678", "9876-5432"] r1.save() r2 = models.Record() r2.pmcid = "PMC9876" r2.upload_id = job.id r2.upload_pos = 2 r2.save() r3 = models.Record() r3.pmid = "9876" r3.upload_id = job.id r3.upload_pos = 3 r3.title = None r3.licence_type = "" r3.add_provenance("test", "provenance", now) r3.add_provenance("test", "more", now) r3.save() # refresh the index ready for querying models.SpreadsheetJob.refresh() models.Record.refresh() out = workflow.output_csv(job) s = StringIO(out) reader = csv.reader(s) rows = [r for r in reader] assert len(rows) == 4 assert rows[0] == [ 'PMCID', 'PMID', 'DOI', "Journal title", "ISSN", 'Article title', "Fulltext in EPMC?", 'XML Fulltext?', 'AAM?', 'Open Access?', 'Licence', 'Licence Source', 'Journal Type', 'Correct Article Confidence', 'Standard Compliance?', 'Deluxe Compliance?', 'Compliance Processing Ouptut' ] assert rows[1] == [ 'PMC1234', '1234', '10.1234', "Journal of Science", "1234-5678, 9876-5432", 'The Title', "True", 'True', 'True', 'True', 'CC0', 'publisher', 'hybrid', '0.9', "True", "True", '[' + now + ' test] provenance' ] assert rows[2] == [ "PMC9876", "", "", "", "", "", "", "", "unknown", "", "unknown", "", "", "", "False", "False", "" ] assert rows[3] == [ "", "9876", "", "", "", "", "", "", "unknown", "", "unknown", "", "", "", "False", "False", '[' + now + ' test] provenance\n\n[' + now + ' test] more' ]
if args.type.lower() == "pmcid": record.pmcid = args.identifier elif args.type.lower() == "pmid": record.pmid = args.identifier elif args.type.lower() == "doi": record.doi = args.identifier record.save() time.sleep(2) oag_register = [] msg = workflow.WorkflowMessage(job, record, oag_register) workflow.process_record(msg) workflow.process_oag(oag_register, job) time.sleep(2) i = 0 while True: i += 1 pcc = job.pc_complete print i, job.pc_complete, "%", sys.stdout.flush() if int(pcc) == 100: break time.sleep(2) out = workflow.output_csv(job) print out
def test_01_export(self): # make a job - we don't much care about its content for this test job = models.SpreadsheetJob() job.save() now = datetime.now().strftime("%Y-%m-%dT%H:%M:%SZ") # make a few records for it # all fields filled in correctly r1 = models.Record() r1.pmcid = "PMC1234" r1.pmid = "1234" r1.doi = "10.1234" r1.title = "The Title" r1.has_ft_xml = True r1.in_epmc = True r1.aam = True r1.is_oa = True r1.licence_type = "CC0" r1.licence_source = "publisher" r1.journal_type = "hybrid" r1.confidence = 0.9 r1.add_provenance("test", "provenance", now) r1.upload_id = job.id r1.upload_pos = 1 r1.journal = "Journal of Science" r1.issn = ["1234-5678", "9876-5432"] r1.save() r2 = models.Record() r2.pmcid = "PMC9876" r2.upload_id = job.id r2.upload_pos = 2 r2.save() r3 = models.Record() r3.pmid = "9876" r3.upload_id = job.id r3.upload_pos = 3 r3.title = None r3.licence_type = "" r3.add_provenance("test", "provenance", now) r3.add_provenance("test", "more", now) r3.save() # refresh the index ready for querying models.SpreadsheetJob.refresh() models.Record.refresh() out = workflow.output_csv(job) s = StringIO(out) reader = csv.reader(s) rows = [r for r in reader] assert len(rows) == 4 assert rows[0] == ['PMCID', 'PMID', 'DOI', "Journal title", "ISSN", 'Article title', "Fulltext in EPMC?", 'XML Fulltext?', 'AAM?', 'Open Access?', 'Licence', 'Licence Source', 'Journal Type', 'Correct Article Confidence', 'Standard Compliance?', 'Deluxe Compliance?', 'Compliance Processing Ouptut'] assert rows[1] == ['PMC1234', '1234', '10.1234', "Journal of Science", "1234-5678, 9876-5432", 'The Title', "True", 'True', 'True', 'True', 'CC0', 'publisher', 'hybrid', '0.9', "True", "True", '[' + now + ' test] provenance'] assert rows[2] == ["PMC9876", "", "", "", "", "", "", "", "unknown", "", "unknown", "", "", "", "False", "False", ""] assert rows[3] == ["", "9876", "", "", "", "", "", "", "unknown", "", "unknown", "", "", "", "False", "False", '[' + now + ' test] provenance\n\n[' + now + ' test] more']