def test_get_library_name(self): e = EBISubmission('2', 'Study Title', 'Study Abstract', investigation_type='Other', new_investigation_type='metagenome') obs = e._get_library_name("nasty<business>") exp = "nasty<business>" self.assertEqual(obs, exp)
def test_get_submission_alias(self): e = EBISubmission('2', 'Study Title', 'Study Abstract', investigation_type='Other', new_investigation_type='metagenome') obs = e._get_submission_alias() exp = '%s_submission_2' % qiita_config.ebi_organization_prefix self.assertEqual(obs, exp)
def test_get_experiment_alias(self): e = EBISubmission('2', 'Study Title', 'Study Abstract', investigation_type='Other', new_investigation_type='metagenome') e.add_sample('foo', '9606', 'h**o sapiens', 'desc1') exp = '%s_ppdid_2:foo' % qiita_config.ebi_organization_prefix self.assertEqual(e._get_experiment_alias('foo'), exp)
def test_generate_spot_descriptor(self): e = EBISubmission('2', 'Study Title', 'Study Abstract', 'metagenome') elm = ET.Element('design', {'foo': 'bar'}) e._generate_spot_descriptor(elm, 'LS454') exp = ''.join([l.strip() for l in GENSPOTDESC.splitlines()]) obs = ET.tostring(elm) self.assertEqual(obs, exp)
def test_get_study_alias(self): e = EBISubmission('2', 'Study Title', 'Study Abstract', investigation_type='Other', new_investigation_type='metagenome') exp = '%s_ppdid_2' % qiita_config.ebi_organization_prefix self.assertEqual(e._get_study_alias(), exp)
def test_generate_library_descriptor(self): e = EBISubmission('2', 'Study Title', 'Study Abstract', 'metagenome') elm = ET.Element('design', {'foo': 'bar'}) e._generate_library_descriptor(elm, 'sample', 10, 'libconsprot') exp = ''.join([l.strip() for l in GENLIBDESC.splitlines()]) obs = ET.tostring(elm) self.assertEqual(obs, exp)
def test_add_dict_as_tags_and_values(self): e = EBISubmission('2', 'Study Title', 'Study Abstract', 'metagenome') elm = ET.Element('TESTING', {'foo': 'bar'}) e._add_dict_as_tags_and_values(elm, 'foo', {'x': 'y', '>x': '<y'}) obs = ET.tostring(elm) exp = ''.join([v.strip() for v in ADDDICTTEST.splitlines()]) self.assertEqual(obs, exp)
def test__write_xml_file(self): e = EBISubmission('2', 'Study Title', 'Study Abstract', 'metagenome') elm = ET.Element('TESTING', {'foo': 'bar'}) e._write_xml_file(lambda: elm, 'thing', 'testfile') self.assertEqual(e.thing, 'testfile') obs = open('testfile').read() exp = '<?xml version="1.0" encoding="UTF-8"?>\n<TESTING foo="bar"/>\n' self.assertEqual(obs, exp) remove('testfile')
def test_add_samples_from_templates_bad_directory(self): sample_template = StringIO(EXP_SAMPLE_TEMPLATE) prep_template = StringIO(EXP_PREP_TEMPLATE) submission = EBISubmission('001', 'teststudy', 'test asbstract', 'metagenome') with self.assertRaises(IOError): submission.add_samples_from_templates( sample_template, [prep_template], self.path + 'WILL-NOT-EXIST-BOOM')
def test_add_samples_from_templates_bad_directory(self): sample_template = StringIO(EXP_SAMPLE_TEMPLATE) prep_template = StringIO(EXP_PREP_TEMPLATE) submission = EBISubmission('001', 'teststudy', 'test asbstract', 'metagenome') with self.assertRaises(IOError): submission.add_samples_from_templates( sample_template, [prep_template], self.path+'WILL-NOT-EXIST-BOOM')
def test_generate_study_xml(self): submission = EBISubmission('001', 'teststudy', 'test asbstract', 'metagenome') xmlelement = submission.generate_study_xml() xml = minidom.parseString(ET.tostring(xmlelement)) xmlstring = xml.toprettyxml(indent=' ', encoding='UTF-8') obs_stripped = ''.join([l.strip() for l in xmlstring.splitlines()]) exp_stripped = ''.join([l.strip() for l in STUDYXML.splitlines()]) self.assertEqual(obs_stripped, exp_stripped)
def test_add_sample(self): submission = EBISubmission('001', 'teststudy', 'test asbstract', 'metagenome') submission.add_sample('test1') submission.add_sample('test2') samples = submission.samples self.assertTrue('test1' in samples and 'test2' in samples) with self.assertRaises(SampleAlreadyExistsError): submission.add_sample('test1')
def test_add_sample_prep_exception(self): submission = EBISubmission('001', 'teststudy', 'test asbstract', 'metagenome') submission.add_sample('test1') submission.add_sample('test2') with self.assertRaises(ValueError): submission.add_sample_prep('test3', 'DOES-NOT-EXIST', 'fastq', self.path, 'experiment description', 'library protocol')
def test_add_sample(self): submission = EBISubmission('001', 'teststudy', 'test asbstract', investigation_type='Other', new_investigation_type='metagenome') submission.add_sample('test1', '9606', 'h**o sapiens', 'desc1') submission.add_sample('test2', '9606', 'h**o sapiens', 'desc2') samples = submission.samples self.assertTrue('test1' in samples and 'test2' in samples) with self.assertRaises(SampleAlreadyExistsError): submission.add_sample('test1', '9606', 'h**o sapiens', 'desc1')
def test_write_study_xml(self): submission = EBISubmission('001', 'teststudy', 'test asbstract', 'metagenome') fh, output = mkstemp() submission.write_study_xml(output) close(fh) obs_stripped = ''.join([l.strip() for l in open(output)]) exp_stripped = ''.join([l.strip() for l in STUDYXML.splitlines()]) self.assertEqual(obs_stripped, exp_stripped) remove(output)
def test_generate_sample_xml(self): submission = EBISubmission('001', 'teststudy', 'test asbstract', investigation_type='Other', new_investigation_type='metagenome') submission.add_sample('test1', '9606', 'h**o sapiens', 'desc1') submission.add_sample('test2', '9606', 'h**o sapiens', 'desc2') xmlelement = submission.generate_sample_xml() xml = minidom.parseString(ET.tostring(xmlelement)) xmlstring = xml.toprettyxml(indent=' ', encoding='UTF-8') obs_stripped = ''.join([l.strip() for l in xmlstring.splitlines()]) exp_stripped = ''.join([l.strip() for l in SAMPLEXML.splitlines()]) self.assertEqual(obs_stripped, exp_stripped)
def test_init_exceptions(self): with self.assertRaises(ValueError): EBISubmission('2', 'Study Title', 'Study Abstract', investigation_type='Other', new_investigation_type=None) with self.assertRaises(ValueError): EBISubmission('2', 'Study Title', 'Study Abstract', investigation_type='SASQUATCH SEQUENCING', new_investigation_type='metagenome')
def test_generate_run_xml(self): submission = EBISubmission('001', 'teststudy', 'test asbstract', investigation_type='Other', new_investigation_type='metagenome') submission.add_sample('test1') submission.add_sample_prep('test1', 'ILLUMINA', 'fastq', join(self.path, '__init__.py'), 'experiment description', 'library protocol') xmlelement = submission.generate_run_xml() xml = minidom.parseString(ET.tostring(xmlelement)) # insert the proper EBI directory, since it is a timestamp and hard # to predict RUNXML_mod = RUNXML % { 'study_alias': submission._get_study_alias(), 'ebi_dir': submission.ebi_dir, 'organization_prefix': qiita_config.ebi_organization_prefix } xmlstring = xml.toprettyxml(indent=' ', encoding='UTF-8') obs_stripped = ''.join([l.strip() for l in xmlstring.splitlines()]) exp_stripped = ''.join([l.strip() for l in RUNXML_mod.splitlines()]) self.assertEqual(obs_stripped, exp_stripped)
def test_generate_sample_xml(self): submission = EBISubmission('001', 'teststudy', 'test asbstract', investigation_type='Other', new_investigation_type='metagenome') submission.add_sample('test1') submission.add_sample('test2') xmlelement = submission.generate_sample_xml() xml = minidom.parseString(ET.tostring(xmlelement)) xmlstring = xml.toprettyxml(indent=' ', encoding='UTF-8') obs_stripped = ''.join([l.strip() for l in xmlstring.splitlines()]) exp_stripped = ''.join([l.strip() for l in SAMPLEXML.splitlines()]) self.assertEqual(obs_stripped, exp_stripped)
def test_write_sample_xml(self): submission = EBISubmission('001', 'teststudy', 'test asbstract', investigation_type='Other', new_investigation_type='metagenome') submission.add_sample('test1', '9606', 'h**o sapiens', 'desc1') submission.add_sample('test2', '9606', 'h**o sapiens', 'desc2') fh, output = mkstemp() close(fh) submission.write_sample_xml(output) obs_stripped = ''.join([l.strip() for l in open(output)]) exp_stripped = ''.join([l.strip() for l in SAMPLEXML.splitlines()]) self.assertEqual(obs_stripped, exp_stripped) remove(output)
def test_stringify_kwargs(self): e = EBISubmission('2', 'Study Title', 'Study Abstract', investigation_type='Other', new_investigation_type='metagenome', impossible_field=1, maybe_possible_field='BOOM') self.assertEqual(e.preprocessed_data_id, '2') self.assertEqual(e.study_title, 'Study Title') self.assertEqual(e.study_abstract, 'Study Abstract') self.assertEqual(e.investigation_type, 'Other') self.assertEqual(e.empty_value, 'no_data') self.assertEqual(e.study_xml_fp, None) self.assertEqual(e.sample_xml_fp, None) self.assertEqual(e.experiment_xml_fp, None) self.assertEqual(e.run_xml_fp, None) self.assertEqual(e.library_strategy, 'POOLCLONE') self.assertEqual(e.library_source, 'METAGENOMIC') self.assertEqual(e.library_selection, 'unspecified') self.assertEqual(e.additional_metadata, { "impossible_field": "1", "maybe_possible_field": "BOOM" })
def test_generate_submission_xml(self): submission = EBISubmission('001', 'teststudy', 'test asbstract', 'metagenome') submission.add_sample('test1') submission.add_sample_prep('test1', 'ILLUMINA', 'fastq', '__init__.py', 'experiment description', 'library protocol') with self.assertRaises(NoXMLError): submission.generate_submission_xml('VALIDATE')
def test_add_samples_from_templates(self): sample_template = StringIO(EXP_SAMPLE_TEMPLATE) prep_template = StringIO(EXP_PREP_TEMPLATE) submission = EBISubmission('001', 'teststudy', 'test asbstract', 'metagenome') submission.add_samples_from_templates(sample_template, prep_template, self.path) self.assertTrue('sample1' in submission.samples) self.assertTrue('sample2' in submission.samples) self.assertTrue('sample3' in submission.samples) self.assertEqual(submission.samples['sample2']['preps'][0]['platform'], 'ILLUMINA') self.assertEqual( submission.samples['sample2']['preps'][0]['file_path'], self.path + '/sample2.fastq.gz') with self.assertRaises(KeyError): submission.samples['nothere']
def test_add_samples_from_templates_filter_samples(self): sample_template = StringIO(EXP_SAMPLE_TEMPLATE) prep_template = StringIO(EXP_PREP_TEMPLATE_FILTERED) submission = EBISubmission('001', 'teststudy', 'test asbstract', investigation_type='Other', new_investigation_type='metagenome') submission.add_samples_from_templates(sample_template, prep_template, self.path) self.assertTrue('sample1' in submission.samples) self.assertTrue('sample2' in submission.samples) self.assertFalse('sample3' in submission.samples) self.assertEqual(submission.samples['sample2']['prep']['platform'], 'ILLUMINA') self.assertEqual( submission.samples['sample2']['prep']['file_path'], self.sample2_fp) with self.assertRaises(KeyError): submission.samples['nothere']
def submit_EBI_from_files(study_id, sample_template, prep_template, fastq_dir_fp, output_dir_fp, investigation_type, action, send): """EBI submission from files Parameters ---------- study_id : int The study id sample_template : File The file handler of the sample template file prep_template : File The file handler of the prep template file fastq_dir_fp : str The fastq filepath output_dir_fp : str The output directory investigation_type : str The investigation type string action : str The action to perform with this data, valid options are: %s send : bool True to actually send the files """ study = Study(study_id) study_id_str = str(study_id) # Get study-specific output directory and set filepaths get_output_fp = partial(join, output_dir_fp) study_fp = get_output_fp('study.xml') sample_fp = get_output_fp('sample.xml') experiment_fp = get_output_fp('experiment.xml') run_fp = get_output_fp('run.xml') submission_fp = get_output_fp('submission.xml') if not isdir(output_dir_fp): makedirs(output_dir_fp) else: raise ValueError('The output folder already exists: %s' % output_dir_fp) submission = EBISubmission.from_templates_and_per_sample_fastqs( study_id_str, study.title, study.info['study_abstract'], investigation_type, sample_template, prep_template, fastq_dir_fp) submission.write_all_xml_files(study_fp, sample_fp, experiment_fp, run_fp, submission_fp, action) if send: submission.send_sequences() submission.send_xml()
def test_from_templates_and_per_sample_fastqs(self): sample_template = StringIO(EXP_SAMPLE_TEMPLATE) prep_template = StringIO(EXP_PREP_TEMPLATE) submission = EBISubmission.from_templates_and_per_sample_fastqs( '001', 'test study', 'abstract', 'Metagenomics', sample_template, prep_template, self.path) self.assertEqual(submission.samples['sample2']['prep']['platform'], 'ILLUMINA') self.assertEqual(submission.samples['sample2']['prep']['file_path'], self.path + '/sample2.fastq.gz') with self.assertRaises(KeyError): submission.samples['nothere']
def test_write_experiment_xml(self): submission = EBISubmission('001', 'teststudy', 'test asbstract', 'metagenome') submission.add_sample('test1') submission.add_sample_prep('test1', 'ILLUMINA', 'fastq', 'fakepath', 'experiment description', 'library protocol') fh, output = mkstemp() close(fh) submission.write_experiment_xml(output) obs_stripped = ''.join([l.strip() for l in open(output)]) exp_stripped = ''.join([l.strip() for l in EXPERIMENTXML.splitlines()]) self.assertEqual(obs_stripped, exp_stripped) remove(output)
def test_generate_curl_command(self): sample_template = StringIO(EXP_SAMPLE_TEMPLATE) prep_template = StringIO(EXP_PREP_TEMPLATE) submission = EBISubmission.from_templates_and_per_sample_fastqs( '001', 'test study', 'abstract', 'Metagenomics', sample_template, prep_template, self.path) # Set these artificially since the function depends only on these fps submission.submission_xml_fp = 'submission.xml' submission.experiment_xml_fp = 'experiment.xml' submission.study_xml_fp = 'study.xml' submission.sample_xml_fp = 'sample.xml' # this should fail since we have not yet set the run.xml fp with self.assertRaises(NoXMLError): submission.generate_curl_command('1', '2', '3', '4') submission.run_xml_fp = 'run.xml' test_ebi_seq_xfer_user = '******' test_ebi_access_key = 'ebi_access_key' test_ebi_dropbox_url = 'ebi_dropbox_url' # Without curl certificate authentication test_ebi_skip_curl_cert = True obs = submission.generate_curl_command(test_ebi_seq_xfer_user, test_ebi_access_key, test_ebi_skip_curl_cert, test_ebi_dropbox_url) exp_skip_cert = ('curl -k ' '-F "[email protected]" ' '-F "[email protected]" ' '-F "[email protected]" ' '-F "[email protected]" ' '-F "[email protected]" ' '"ebi_dropbox_url/?auth=ERA%20ebi_seq_xfer_user' '%20ebi_access_key%3D"') self.assertEqual(obs, exp_skip_cert) # With curl certificate authentication test_ebi_skip_curl_cert = False obs = submission.generate_curl_command(test_ebi_seq_xfer_user, test_ebi_access_key, test_ebi_skip_curl_cert, test_ebi_dropbox_url) exp_with_cert = ('curl ' '-F "[email protected]" ' '-F "[email protected]" ' '-F "[email protected]" ' '-F "[email protected]" ' '-F "[email protected]" ' '"ebi_dropbox_url/?auth=ERA%20ebi_seq_xfer_user' '%20ebi_access_key%3D"') self.assertEqual(obs, exp_with_cert)
def test_generate_curl_command(self): sample_template = StringIO(EXP_SAMPLE_TEMPLATE) prep_template = StringIO(EXP_PREP_TEMPLATE) submission = EBISubmission.from_templates_and_per_sample_fastqs( '001', 'test study', 'abstract', 'type', sample_template, prep_template, self.path) # Set these artificially since the function depends only on these fps submission.submission_xml_fp = 'submission.xml' submission.experiment_xml_fp = 'experiment.xml' submission.study_xml_fp = 'study.xml' submission.sample_xml_fp = 'sample.xml' # this should fail since we have not yet set the run.xml fp with self.assertRaises(NoXMLError): submission.generate_curl_command('1', '2', '3', '4') submission.run_xml_fp = 'run.xml' test_ebi_seq_xfer_user = '******' test_ebi_access_key = 'ebi_access_key' test_ebi_dropbox_url = 'ebi_dropbox_url' # Without curl certificate authentication test_ebi_skip_curl_cert = True obs = submission.generate_curl_command(test_ebi_seq_xfer_user, test_ebi_access_key, test_ebi_skip_curl_cert, test_ebi_dropbox_url) exp_skip_cert = ('curl -k ' '-F "[email protected]" ' '-F "[email protected]" ' '-F "[email protected]" ' '-F "[email protected]" ' '-F "[email protected]" ' '"ebi_dropbox_url/?auth=ERA%20ebi_seq_xfer_user' '%20ebi_access_key%3D"') self.assertEqual(obs, exp_skip_cert) # With curl certificate authentication test_ebi_skip_curl_cert = False obs = submission.generate_curl_command(test_ebi_seq_xfer_user, test_ebi_access_key, test_ebi_skip_curl_cert, test_ebi_dropbox_url) exp_with_cert = ('curl ' '-F "[email protected]" ' '-F "[email protected]" ' '-F "[email protected]" ' '-F "[email protected]" ' '-F "[email protected]" ' '"ebi_dropbox_url/?auth=ERA%20ebi_seq_xfer_user' '%20ebi_access_key%3D"') self.assertEqual(obs, exp_with_cert)
def test_generate_study_xml(self): submission = EBISubmission('001', 'teststudy', 'test asbstract', investigation_type='Other', new_investigation_type='metagenome') xmlelement = submission.generate_study_xml() xml = minidom.parseString(ET.tostring(xmlelement)) xmlstring = xml.toprettyxml(indent=' ', encoding='UTF-8') obs_stripped = ''.join([l.strip() for l in xmlstring.splitlines()]) exp_stripped = ''.join([l.strip() for l in STUDYXML.splitlines()]) self.assertEqual(obs_stripped, exp_stripped) submission_pmids = \ EBISubmission('001', 'teststudy', 'test asbstract', 'Other', new_investigation_type='Amplicon Sequencing', pmids=[12, 15]) xmlelement = submission_pmids.generate_study_xml() xml = minidom.parseString(ET.tostring(xmlelement)) xmlstring = xml.toprettyxml(indent=' ', encoding='UTF-8') obs_stripped = ''.join([l.strip() for l in xmlstring.splitlines()]) exp_stripped = ''.join([l.strip() for l in STUDYXML_PMIDS.splitlines()]) self.assertEqual(obs_stripped, exp_stripped)
def test_from_templates_and_per_sample_fastqs(self): sample_template = StringIO(EXP_SAMPLE_TEMPLATE) prep_template = StringIO(EXP_PREP_TEMPLATE) submission = EBISubmission.from_templates_and_per_sample_fastqs( '001', 'test study', 'abstract', 'type', sample_template, prep_template, self.path) self.assertEqual(submission.samples['sample2']['preps'][0]['platform'], 'ILLUMINA') self.assertEqual( submission.samples['sample2']['preps'][0]['file_path'], self.path + '/sample2.fastq.gz') with self.assertRaises(KeyError): submission.samples['nothere']
def test_add_sample_prep(self): submission = EBISubmission('001', 'teststudy', 'test asbstract', 'metagenome') submission.add_sample('test1') submission.add_sample('test2') submission.add_sample_prep('test1', 'ILLUMINA', 'fastq', self.path, 'experiment description', 'library protocol') prep_info = submission.samples['test1']['preps'][0] self.assertEqual(prep_info['platform'], 'ILLUMINA') self.assertEqual(prep_info['file_path'], self.path) with self.assertRaises(KeyError): submission.add_sample_prep('test3', 'ILLUMINA', 'fastq', self.path, 'experiment description', 'library protocol')
def test_add_sample_prep_exception(self): submission = EBISubmission('001', 'teststudy', 'test asbstract', investigation_type='Other', new_investigation_type='metagenome') submission.add_sample('test1') submission.add_sample('test2') with self.assertRaises(ValueError): submission.add_sample_prep('test2', 'DOES-NOT-EXIST', 'fastq', self.path, 'experiment description', 'library protocol') with self.assertRaises(KeyError): submission.add_sample_prep('test3', 'DOES-NOT-EXIST', 'fastq', self.path, 'experiment description', 'library protocol')
def test_generate_experiment_xml(self): submission = EBISubmission('001', 'teststudy', 'test asbstract', 'metagenome') submission.add_sample('test1') submission.add_sample_prep('test1', 'ILLUMINA', 'fastq', 'fakepath', 'experiment description', 'library protocol') xmlelement = submission.generate_experiment_xml() xml = minidom.parseString(ET.tostring(xmlelement)) xmlstring = xml.toprettyxml(indent=' ', encoding='UTF-8') obs_stripped = ''.join([l.strip() for l in xmlstring.splitlines()]) exp_stripped = ''.join([l.strip() for l in EXPERIMENTXML.splitlines()]) self.assertEqual(obs_stripped, exp_stripped)
def test_add_sample_prep_exception(self): submission = EBISubmission('001', 'teststudy', 'test asbstract', investigation_type='Other', new_investigation_type='metagenome') submission.add_sample('test1', '9606', 'h**o sapiens', 'desc1') submission.add_sample('test2', '9606', 'h**o sapiens', 'desc1') with self.assertRaises(ValueError): submission.add_sample_prep('test2', 'DOES-NOT-EXIST', 'fastq', self.sample1_fp, 'experiment description', 'library protocol') with self.assertRaises(KeyError): submission.add_sample_prep('test3', 'DOES-NOT-EXIST', 'fastq', self.sample3_fp, 'experiment description', 'library protocol')
def test_generate_run_xml(self): submission = EBISubmission('001', 'teststudy', 'test asbstract', 'metagenome') submission.add_sample('test1') submission.add_sample_prep('test1', 'ILLUMINA', 'fastq', join(self.path, '__init__.py'), 'experiment description', 'library protocol') xmlelement = submission.generate_run_xml() xml = minidom.parseString(ET.tostring(xmlelement)) xmlstring = xml.toprettyxml(indent=' ', encoding='UTF-8') obs_stripped = ''.join([l.strip() for l in xmlstring.splitlines()]) exp_stripped = ''.join([l.strip() for l in RUNXML.splitlines()]) self.assertEqual(obs_stripped, exp_stripped)
def test_write_experiment_xml(self): submission = EBISubmission('001', 'teststudy', 'test asbstract', investigation_type='Other', new_investigation_type='metagenome') submission.add_sample('test1', '9606', 'h**o sapiens', 'desc1') submission.add_sample_prep('test1', 'ILLUMINA', 'fastq', self.sample1_fp, 'experiment description', 'library protocol') fh, output = mkstemp() close(fh) submission.write_experiment_xml(output) obs_stripped = ''.join([l.strip() for l in open(output)]) exp = EXPERIMENTXML % { 'path': self.sample1_fp, 'organization_prefix': qiita_config.ebi_organization_prefix} exp_stripped = ''.join([l.strip() for l in exp.splitlines()]) self.assertEqual(obs_stripped, exp_stripped) remove(output)
def test_init(self): e = EBISubmission('2', 'Study Title', 'Study Abstract', 'metagenome') self.assertEqual(e.study_id, '2') self.assertEqual(e.study_title, 'Study Title') self.assertEqual(e.study_abstract, 'Study Abstract') self.assertEqual(e.investigation_type, 'metagenome') self.assertEqual(e.empty_value, 'no_data') self.assertEqual(e.study_xml_fp, None) self.assertEqual(e.sample_xml_fp, None) self.assertEqual(e.experiment_xml_fp, None) self.assertEqual(e.run_xml_fp, None) self.assertEqual(e.library_strategy, 'POOLCLONE') self.assertEqual(e.library_source, 'METAGENOMIC') self.assertEqual(e.library_selection, 'unspecified') self.assertEqual(e.additional_metadata, {})
def test_add_sample_prep(self): submission = EBISubmission('001', 'teststudy', 'test asbstract', investigation_type='Other', new_investigation_type='metagenome') submission.add_sample('test1', '9606', 'h**o sapiens', 'desc1') submission.add_sample('test2', '9606', 'h**o sapiens', 'desc1') submission.add_sample_prep('test1', 'ILLUMINA', 'fastq', self.sample1_fp, 'experiment description', 'library protocol') prep_info = submission.samples['test1']['prep'] self.assertEqual(prep_info['platform'], 'ILLUMINA') self.assertEqual(prep_info['file_path'], self.sample1_fp) with self.assertRaises(KeyError): submission.add_sample_prep('test3', 'ILLUMINA', 'fastq', self.sample3_fp, 'experiment description', 'library protocol')
def test_write_sample_xml(self): submission = EBISubmission('001', 'teststudy', 'test asbstract', investigation_type='Other', new_investigation_type='metagenome') submission.add_sample('test1') submission.add_sample('test2') fh, output = mkstemp() close(fh) submission.write_sample_xml(output) obs_stripped = ''.join([l.strip() for l in open(output)]) exp_stripped = ''.join([l.strip() for l in SAMPLEXML.splitlines()]) self.assertEqual(obs_stripped, exp_stripped) remove(output)
def test_generate_experiment_xml(self): submission = EBISubmission('001', 'teststudy', 'test asbstract', investigation_type='Other', new_investigation_type='metagenome') submission.add_sample('test1', '9606', 'h**o sapiens', 'desc1') submission.add_sample_prep('test1', 'ILLUMINA', 'fastq', self.sample1_fp, 'experiment description', 'library protocol') xmlelement = submission.generate_experiment_xml() xml = minidom.parseString(ET.tostring(xmlelement)) xmlstring = xml.toprettyxml(indent=' ', encoding='UTF-8') obs_stripped = ''.join([l.strip() for l in xmlstring.splitlines()]) exp = EXPERIMENTXML % { 'path': self.sample1_fp, 'organization_prefix': qiita_config.ebi_organization_prefix} exp_stripped = ''.join([l.strip() for l in exp.splitlines()]) self.assertEqual(obs_stripped, exp_stripped)
def test_generate_run_xml(self): submission = EBISubmission('001', 'teststudy', 'test asbstract', investigation_type='Other', new_investigation_type='metagenome') submission.add_sample('test1', '9606', 'h**o sapiens', 'desc1') submission.add_sample_prep('test1', 'ILLUMINA', 'fastq', self.sample1_fp, 'experiment description', 'library protocol') xmlelement = submission.generate_run_xml() xml = minidom.parseString(ET.tostring(xmlelement)) # insert the proper EBI directory, since it is a timestamp and hard # to predict RUNXML_mod = RUNXML % { 'study_alias': submission._get_study_alias(), 'ebi_dir': submission.ebi_dir, 'organization_prefix': qiita_config.ebi_organization_prefix} xmlstring = xml.toprettyxml(indent=' ', encoding='UTF-8') obs_stripped = ''.join([l.strip() for l in xmlstring.splitlines()]) exp_stripped = ''.join([l.strip() for l in RUNXML_mod.splitlines()]) self.assertEqual(obs_stripped, exp_stripped)
def test_generate_study_xml(self): submission = EBISubmission('001', 'teststudy', 'test asbstract', investigation_type='Other', new_investigation_type='metagenome') xmlelement = submission.generate_study_xml() xml = minidom.parseString(ET.tostring(xmlelement)) xmlstring = xml.toprettyxml(indent=' ', encoding='UTF-8') obs_stripped = ''.join([l.strip() for l in xmlstring.splitlines()]) exp_stripped = ''.join([l.strip() for l in STUDYXML.splitlines()]) self.assertEqual(obs_stripped, exp_stripped) submission_pmids = \ EBISubmission('001', 'teststudy', 'test asbstract', 'Other', new_investigation_type='Amplicon Sequencing', pmids=[12, 15]) xmlelement = submission_pmids.generate_study_xml() xml = minidom.parseString(ET.tostring(xmlelement)) xmlstring = xml.toprettyxml(indent=' ', encoding='UTF-8') obs_stripped = ''.join([l.strip() for l in xmlstring.splitlines()]) exp_stripped = ''.join( [l.strip() for l in STUDYXML_PMIDS.splitlines()]) self.assertEqual(obs_stripped, exp_stripped)
def submit_EBI(preprocessed_data_id, action, send, fastq_dir_fp=None): """Submit a preprocessed data to EBI Parameters ---------- preprocessed_data_id : int The preprocesssed data id action : %s The action to perform with this data send : bool True to actually send the files fastq_dir_fp : str, optional The fastq filepath """ preprocessed_data = PreprocessedData(preprocessed_data_id) preprocessed_data_id_str = str(preprocessed_data_id) study = Study(preprocessed_data.study) sample_template = SampleTemplate(study.sample_template) prep_template = PrepTemplate(preprocessed_data.prep_template) investigation_type = None new_investigation_type = None status = preprocessed_data.submitted_to_insdc_status() if status in ('submitting', 'success'): raise ValueError("Cannot resubmit! Current status is: %s" % status) if send: # If we intend actually to send the files, then change the status in # the database preprocessed_data.update_insdc_status('submitting') # we need to figure out whether the investigation type is a known one # or if we have to submit a "new_investigation_type" to EBI current_type = prep_template.investigation_type ena_ontology = Ontology(convert_to_id('ENA', 'ontology')) if current_type in ena_ontology.terms: investigation_type = current_type elif current_type in ena_ontology.user_defined_terms: investigation_type = 'Other' new_investigation_type = current_type else: # This should never happen raise ValueError("Unrecognized investigation type: '%s'. This term " "is neither one of the official terms nor one of the " "user-defined terms in the ENA ontology") if fastq_dir_fp is not None: # If the user specifies a FASTQ directory, use it # Set demux_samples to None so that MetadataTemplate.to_file will put # all samples in the template files demux_samples = None else: # If the user does not specify a FASTQ directory, create one and # re-serialize the per-sample FASTQs from the demux file fastq_dir_fp = mkdtemp(prefix=qiita_config.working_dir) demux = [ path for _, path, ftype in preprocessed_data.get_filepaths() if ftype == 'preprocessed_demux' ][0] # Keep track of which files were actually in the demux file so that we # can write those rows to the prep and samples templates demux_samples = set() with open_file(demux) as demux_fh: for samp, iterator in to_per_sample_ascii(demux_fh, list(sample_template)): demux_samples.add(samp) sample_fp = join(fastq_dir_fp, "%s.fastq.gz" % samp) with gzopen(sample_fp, 'w') as fh: for record in iterator: fh.write(record) output_dir = fastq_dir_fp + '_submission' samp_fp = join(fastq_dir_fp, 'sample_metadata.txt') prep_fp = join(fastq_dir_fp, 'prep_metadata.txt') sample_template.to_file(samp_fp, demux_samples) prep_template.to_file(prep_fp, demux_samples) # Get specific output directory and set filepaths get_output_fp = partial(join, output_dir) study_fp = get_output_fp('study.xml') sample_fp = get_output_fp('sample.xml') experiment_fp = get_output_fp('experiment.xml') run_fp = get_output_fp('run.xml') submission_fp = get_output_fp('submission.xml') if not isdir(output_dir): makedirs(output_dir) else: raise IOError('The output folder already exists: %s' % output_dir) with open(samp_fp, 'U') as st, open(prep_fp, 'U') as pt: submission = EBISubmission.from_templates_and_per_sample_fastqs( preprocessed_data_id_str, study.title, study.info['study_abstract'], investigation_type, st, pt, fastq_dir_fp, new_investigation_type=new_investigation_type, pmids=study.pmids) submission.write_all_xml_files(study_fp, sample_fp, experiment_fp, run_fp, submission_fp, action) if send: submission.send_sequences() study_accession, submission_accession = submission.send_xml() if study_accession is None or submission_accession is None: preprocessed_data.update_insdc_status('failed') raise ComputeError("EBI Submission failed!") else: preprocessed_data.update_insdc_status('success', study_accession, submission_accession) else: study_accession, submission_accession = None, None return study_accession, submission_accession
def submit_EBI(artifact_id, action, send, test=False, test_size=False): """Submit an artifact to EBI Parameters ---------- artifact_id : int The artifact id action : %s The action to perform with this data send : bool True to actually send the files test : bool If True some restrictions will be ignored, only used in parse_EBI_reply test_size : bool If True the EBI-ENA restriction size will be changed to 6000 """ # step 1: init and validate ebi_submission = EBISubmission(artifact_id, action) # step 2: generate demux fastq files try: ebi_submission.generate_demultiplexed_fastq() except Exception: error_msg = format_exc() if isdir(ebi_submission.full_ebi_dir): rmtree(ebi_submission.full_ebi_dir) LogEntry.create('Runtime', error_msg, info={'ebi_submission': artifact_id}) raise # step 3: generate and write xml files ebi_submission.generate_xml_files() # before we continue let's check the size of the submission to_review = [ ebi_submission.study_xml_fp, ebi_submission.sample_xml_fp, ebi_submission.experiment_xml_fp, ebi_submission.run_xml_fp, ebi_submission.submission_xml_fp ] total_size = sum([stat(tr).st_size for tr in to_review if tr is not None]) # note that the max for EBI is 10M but let's play it safe max_size = 10e+6 if not test_size else 5000 if total_size > max_size: LogEntry.create( 'Runtime', 'The submission: %d is larger than allowed (%d), will ' 'try to fix: %d' % (artifact_id, max_size, total_size)) # transform current metadata to dataframe for easier curation rows = {k: dict(v) for k, v in ebi_submission.samples.items()} df = pd.DataFrame.from_dict(rows, orient='index') # remove unique columns and same value in all columns nunique = df.apply(pd.Series.nunique) nsamples = len(df.index) cols_to_drop = set(nunique[(nunique == 1) | (nunique == nsamples)].index) # maximize deletion by removing also columns that are almost all the # same or almost all unique cols_to_drop = set(nunique[(nunique <= int(nsamples * .01)) | (nunique >= int(nsamples * .5))].index) cols_to_drop = cols_to_drop - { 'taxon_id', 'scientific_name', 'description' } all_samples = ebi_submission.sample_template.ebi_sample_accessions samples = [k for k in ebi_submission.samples if all_samples[k] is None] if samples: ebi_submission.write_xml_file( ebi_submission.generate_sample_xml(samples, cols_to_drop), ebi_submission.sample_xml_fp) # now let's recalculate the size to make sure it's fine new_total_size = sum( [stat(tr).st_size for tr in to_review if tr is not None]) LogEntry.create( 'Runtime', 'The submission: %d after cleaning is %d and was %d' % (artifact_id, total_size, new_total_size)) if new_total_size > max_size: raise ComputeError( 'Even after cleaning the submission: %d is too large. Before ' 'cleaning: %d, after: %d' % (artifact_id, total_size, new_total_size)) st_acc, sa_acc, bio_acc, ex_acc, run_acc = None, None, None, None, None if send: # getting aspera's password old_ascp_pass = environ.get('ASPERA_SCP_PASS', '') if old_ascp_pass == '': environ['ASPERA_SCP_PASS'] = qiita_config.ebi_seq_xfer_pass ascp_passwd = environ['ASPERA_SCP_PASS'] LogEntry.create('Runtime', ('Submission of sequences of pre_processed_id: ' '%d completed successfully' % artifact_id)) # step 4: sending sequences if action != 'MODIFY': LogEntry.create('Runtime', ("Submitting sequences for pre_processed_id: " "%d" % artifact_id)) for cmd in ebi_submission.generate_send_sequences_cmd(): stdout, stderr, rv = system_call(cmd) if rv != 0: error_msg = ("ASCP Error:\nStd output:%s\nStd error:%s" % (stdout, stderr)) environ['ASPERA_SCP_PASS'] = old_ascp_pass raise ComputeError(error_msg) open(ebi_submission.ascp_reply, 'a').write('stdout:\n%s\n\nstderr: %s' % (stdout, stderr)) environ['ASPERA_SCP_PASS'] = old_ascp_pass # step 5: sending xml xmls_cmds = ebi_submission.generate_curl_command( ebi_seq_xfer_pass=ascp_passwd) LogEntry.create('Runtime', ("Submitting XMLs for pre_processed_id: " "%d" % artifact_id)) xml_content, stderr, rv = system_call(xmls_cmds) if rv != 0: error_msg = ("Error:\nStd output:%s\nStd error:%s" % (xml_content, stderr)) raise ComputeError(error_msg) else: LogEntry.create('Runtime', ('Submission of sequences of pre_processed_id: ' '%d completed successfully' % artifact_id)) open(ebi_submission.curl_reply, 'w').write('stdout:\n%s\n\nstderr: %s' % (xml_content, stderr)) # parsing answer / only if adding if action == 'ADD' or test: try: st_acc, sa_acc, bio_acc, ex_acc, run_acc = \ ebi_submission.parse_EBI_reply(xml_content, test=test) except EBISubmissionError as e: error = str(e) le = LogEntry.create('Fatal', "Command: %s\nError: %s\n" % (xml_content, error), info={'ebi_submission': artifact_id}) raise ComputeError("EBI Submission failed! Log id: %d\n%s" % (le.id, error)) if st_acc: ebi_submission.study.ebi_study_accession = st_acc if sa_acc: ebi_submission.sample_template.ebi_sample_accessions = sa_acc if bio_acc: ebi_submission.sample_template.biosample_accessions = bio_acc if ex_acc: ebi_submission.prep_template.ebi_experiment_accessions = ex_acc ebi_submission.artifact.ebi_run_accessions = run_acc return st_acc, sa_acc, bio_acc, ex_acc, run_acc
def submit_EBI(artifact_id, action, send, test=False): """Submit an artifact to EBI Parameters ---------- artifact_id : int The artifact id action : %s The action to perform with this data send : bool True to actually send the files test : bool If True some restrictions will be ignored, only used in parse_EBI_reply """ # step 1: init and validate ebi_submission = EBISubmission(artifact_id, action) # step 2: generate demux fastq files try: ebi_submission.generate_demultiplexed_fastq() except Exception: error_msg = format_exc() if isdir(ebi_submission.full_ebi_dir): rmtree(ebi_submission.full_ebi_dir) LogEntry.create('Runtime', error_msg, info={'ebi_submission': artifact_id}) raise # step 3: generate and write xml files ebi_submission.generate_xml_files() if send: # getting aspera's password old_ascp_pass = environ.get('ASPERA_SCP_PASS', '') if old_ascp_pass == '': environ['ASPERA_SCP_PASS'] = qiita_config.ebi_seq_xfer_pass ascp_passwd = environ['ASPERA_SCP_PASS'] LogEntry.create('Runtime', ('Submission of sequences of pre_processed_id: ' '%d completed successfully' % artifact_id)) # step 4: sending sequences if action != 'MODIFY': LogEntry.create('Runtime', ("Submitting sequences for pre_processed_id: " "%d" % artifact_id)) for cmd in ebi_submission.generate_send_sequences_cmd(): stdout, stderr, rv = system_call(cmd) if rv != 0: error_msg = ("ASCP Error:\nStd output:%s\nStd error:%s" % (stdout, stderr)) environ['ASPERA_SCP_PASS'] = old_ascp_pass raise ComputeError(error_msg) open(ebi_submission.ascp_reply, 'a').write('stdout:\n%s\n\nstderr: %s' % (stdout, stderr)) environ['ASPERA_SCP_PASS'] = old_ascp_pass # step 5: sending xml and parsing answer xmls_cmds = ebi_submission.generate_curl_command( ebi_seq_xfer_pass=ascp_passwd) LogEntry.create('Runtime', ("Submitting XMLs for pre_processed_id: " "%d" % artifact_id)) xml_content, stderr, rv = system_call(xmls_cmds) if rv != 0: error_msg = ("Error:\nStd output:%s\nStd error:%s" % (xml_content, stderr)) raise ComputeError(error_msg) else: LogEntry.create('Runtime', ('Submission of sequences of pre_processed_id: ' '%d completed successfully' % artifact_id)) open(ebi_submission.curl_reply, 'w').write('stdout:\n%s\n\nstderr: %s' % (xml_content, stderr)) try: st_acc, sa_acc, bio_acc, ex_acc, run_acc = \ ebi_submission.parse_EBI_reply(xml_content, test=test) except EBISubmissionError as e: error = str(e) le = LogEntry.create('Fatal', "Command: %s\nError: %s\n" % (xml_content, error), info={'ebi_submission': artifact_id}) raise ComputeError("EBI Submission failed! Log id: %d\n%s" % (le.id, error)) if action == 'ADD' or test: if st_acc: ebi_submission.study.ebi_study_accession = st_acc if sa_acc: ebi_submission.sample_template.ebi_sample_accessions = sa_acc if bio_acc: ebi_submission.sample_template.biosample_accessions = bio_acc if ex_acc: ebi_submission.prep_template.ebi_experiment_accessions = ex_acc ebi_submission.artifact.ebi_run_accessions = run_acc else: st_acc, sa_acc, bio_acc, ex_acc, run_acc = None, None, None, None, None return st_acc, sa_acc, bio_acc, ex_acc, run_acc
def test_get_experiment_alias(self): e = EBISubmission('2', 'Study Title', 'Study Abstract', 'metagenome') e.add_sample('foo') self.assertEqual(e._get_experiment_alias('foo', 0), 'qiime_study_2:foo:0')