def test_get_study_assemblies_should_filter_fields(self): ena = ena_handler.EnaApiHandler() assemblies = ena.get_study_assemblies( 'ERP112609', fields='analysis_accession,study_accession') for assembly in assemblies: assert len(assembly) == 2 assert isinstance(assembly, dict)
def test_authentication_not_set(self, mocked_class): if os.environ['ENA_API_USER']: del os.environ['ENA_API_USER'] if os.environ['ENA_API_PASSWORD']: del os.environ['ENA_API_PASSWORD'] ena = ena_handler.EnaApiHandler() assert ena.auth is None
def test_get_study_secondary_accession_should_retrieve_study_filtered_fields( self, accession_arg): ena = ena_handler.EnaApiHandler() study = ena.get_study(fields='study_accession', **accession_arg) assert isinstance(study, dict) assert len(study.keys()) == 1 assert 'study_accession' in study
def test_get_study_runs_should_have_all_fields(self): ena = ena_handler.EnaApiHandler() runs = ena.get_study_runs('SRP125161') assert len(runs) == 4 for run in runs: assert 21 == len(run) assert isinstance(run, dict)
def test_get_study_assemblies_should_filter_accessions(self): ena = ena_handler.EnaApiHandler() assemblies = ena.get_study_assemblies( 'ERP112609', filter_accessions=['GCA_001751075']) for assembly in assemblies: assert len(assembly) == 28 assert isinstance(assembly, dict)
def test_get_study_runs_should_have_filter_run_accessions(self): ena = ena_handler.EnaApiHandler() runs = ena.get_study_runs('SRP125161', filter_accessions=['SRR6301444']) assert len(runs) == 1 for run in runs: assert 21 == len(run) assert isinstance(run, dict)
def test_get_study_runs_should_return_all_accessions_including_amplicon( self): ena = ena_handler.EnaApiHandler() assert len([ r['run_accession'] for r in ena.get_study_runs('ERP001506', filter_assembly_runs=False) ]) == 24
def test_get_study_from_accessions_should_retrieve_default_fields( self, accession_arg): """ This will iterate over all cases above. It will test each accession type individual and together. :param accession_arg: :return: """ ena = ena_handler.EnaApiHandler() study = ena.get_study(**accession_arg) assert isinstance(study, dict) assert len(study.keys()) == len( ena_handler.STUDY_DEFAULT_FIELDS.split(','))
def test_get_study_runs_should_not_fetch_size_if_private( self, mock_json_load, mock_post_request): ena = ena_handler.EnaApiHandler() response = requests.Response() response.status_code = 200 mock_post_request.return_value = response mock_json_load.return_value = MOCKED_RUNS runs = ena.get_study_runs( 'SRP125161', filter_accessions=['SRR6301444']) # private=True was removed assert len(runs) == 1 for run in runs: assert 20 == len(run) assert isinstance(run, dict) assert run['raw_data_size'] is None
def main(raw_args=sys.argv[1:]): if sys.version_info.major < 3: raise SyntaxError("Must be using Python 3") args = parse_args(raw_args) log_level = logging.DEBUG if args.verbose else logging.info logging.basicConfig(filename=os.path.join('~', 'backlog-populator', 'backlog.log'), level=log_level) if not args.cutoffdate: cutoff = load_cutoff_date() or '1970-01-01' else: cutoff = args.cutoffdate # Setup ENA API module ena = ena_handler.EnaApiHandler() mgnify = mgnify_handler.MgnifyHandler(args.db) studies_created, studies_updated, study_errors = sync.sync_studies( ena, mgnify, cutoff) runs_created, runs_updated, run_errors = sync.sync_runs( ena, mgnify, cutoff) assem_created, assem_updated, assem_errors = sync.sync_assemblies( ena, mgnify, cutoff) logging.info('Created {} studies'.format(studies_created)) logging.info('Updated {} studies'.format(studies_updated)) logging.info('Created {} runs'.format(runs_created)) logging.info('Updated {} runs'.format(runs_updated)) logging.info('Created {} assemblies'.format(assem_created)) logging.info('Updated {} assemblies'.format(assem_updated)) errors = study_errors + run_errors + assem_errors if len(errors) > 10: logging.warning( 'More than 10 update errors occured, see error.log for details') with open('error.log', 'w') as f: f.writelines([a + ': ' + b for a, b in errors]) else: if 0 < len(errors) <= 10: for accession, error in errors: logging.error(accession, error) else: save_cutoff_date(datetime.today().strftime('%Y-%m-%d'))
def test_download_runs(self, tmpdir): tmpdir = tmpdir.strpath current_dir = os.getcwd() os.chdir(tmpdir) run = { 'fastq_ftp': 'ftp.sra.ebi.ac.uk/vol1/fastq/ERR866/ERR866589/ERR866589_1.fastq.gz;' 'ftp.sra.ebi.ac.uk/vol1/fastq/ERR866/ERR866589/ERR866589_2.fastq.gz' } ena = ena_handler.EnaApiHandler() try: ena.download_runs([run]) fs = os.listdir(tmpdir) assert 2 == len(fs) assert 'ERR866589_1.fastq.gz' in fs assert 'ERR866589_2.fastq.gz' in fs os.chdir(current_dir) except requests.exceptions.ConnectionError: # If Max retries exceeded with url then there is no way of running this test successfully assert True
import logging from django.db.models import Q from django.utils import timezone from django.core.exceptions import ObjectDoesNotExist from ena_portal_api import ena_handler from emgapi import models as emg_models from emgapianns.management.commands.import_publication import lookup_publication_by_pubmed_id, \ update_or_create_publication from emgapianns.management.lib import utils from emgena import models as ena_models from emgena.models import RunStudy, AssemblyStudy ena = ena_handler.EnaApiHandler() class StudyImporter: """ Creates a new study object in EMG or updates an existing one. """ def __init__(self, study_accession, study_dir, lineage, ena_db, emg_db): self.study_accession = study_accession self.study_dir = study_dir self.lineage = lineage self.ena_db = ena_db self.emg_db = emg_db def run(self): logging.info("Creating or updating study {}".format(
def test_get_study_runs_should_not_return_amplicons(self): ena = ena_handler.EnaApiHandler() assert len([ r['run_accession'] for r in ena.get_study_runs('ERP001506') ]) == 18
def test_get_study_runs_should_return_all_accessions(self): ena = ena_handler.EnaApiHandler() assert set([ r['run_accession'] for r in ena.get_study_runs('ERP000339') ]) == {'ERR019477', 'ERR019478'}
def test_get_assembly_invalid_accession(self): ena = ena_handler.EnaApiHandler() with pytest.raises(ValueError): ena.get_assembly('Invalid_accession')
def test_get_run_should_retrieve_run_filtered_fields(self): ena = ena_handler.EnaApiHandler() run = ena.get_run('ERR1701760', fields='run_accession') assert isinstance(run, dict) assert len(run) == 1 assert 'run_accession' in run
def test_get_study_runs_should_return_empty_list_if_study_contains_no_runs( self): ena = ena_handler.EnaApiHandler() assert len(ena.get_study_runs('ERP105889')) == 0
def test_get_assembly_should_have_all_fields(self): ena = ena_handler.EnaApiHandler() assembly = ena.get_assembly('ERZ1645181') assert len(assembly) == 28 assert isinstance(assembly, dict)
def test_get_study_api_no_results(self): ena = ena_handler.EnaApiHandler() ena.post_request = lambda r: MockResponse(204, text=None) with pytest.raises(ValueError): ena.get_study('ERP001736')
def test_get_run_should_retrieve_run_all_fields(self): ena = ena_handler.EnaApiHandler() run = ena.get_run('ERR1701760') assert isinstance(run, dict) assert 21 == len(run)
def test_get_study_assemblies_should_have_all_fields(self): ena = ena_handler.EnaApiHandler() assemblies = ena.get_study_assemblies('ERP112609') for assembly in assemblies: assert len(assembly) == 28 assert isinstance(assembly, dict)
def test_get_study_runs_api_unavailable(self): ena = ena_handler.EnaApiHandler() ena.post_request = lambda r: MockResponse(500) with pytest.raises(ValueError): ena.get_study_runs('SRP125161')
def test_get_study_assembly_accessions_should_return_all_accessions(self): ena = ena_handler.EnaApiHandler() assert { a['analysis_accession'] for a in ena.get_study_assemblies('ERP124518') } == {'ERZ1645181', 'ERZ1645182', 'ERZ1645183', 'ERZ1645184'}
def test_get_assembly_should_filter_fields(self): ena = ena_handler.EnaApiHandler() assembly = ena.get_assembly( 'ERZ1645181', fields='analysis_accession,study_accession') assert len(assembly) == 2 assert isinstance(assembly, dict)
def test_get_study_assembly_accessions_should_return_empty_list_if_study_contains_no_assemblies( self): ena = ena_handler.EnaApiHandler() assert len(ena.get_study_assemblies('PRJEB2280')) == 0
def test_get_assembly_api_unavailable(self): ena = ena_handler.EnaApiHandler() ena.post_request = lambda r: MockResponse(500) with pytest.raises(ValueError): ena.get_assembly('ERZ795049')
def test_authentication_set(self): ena = ena_handler.EnaApiHandler() assert ena.auth == ('username', 'password')
def test_get_run_api_unavailable(self): ena = ena_handler.EnaApiHandler() ena.post_request = lambda r: MockResponse(500) with pytest.raises(ValueError): ena.get_run('ERR1701760')
def test_get_study_assemblies_invalid_accession(self): ena = ena_handler.EnaApiHandler() assert [] == ena.get_study_assemblies('Invalid accession')
def __init__(self, db): self.db = db self.ena = ena_handler.EnaApiHandler() self.studies = [] self.update_taggable_studies()