def test_get_study_assemblies_should_filter_fields(self):
     ena = ena_handler.EnaApiHandler()
     assemblies = ena.get_study_assemblies(
         'ERP112609', fields='analysis_accession,study_accession')
     for assembly in assemblies:
         assert len(assembly) == 2
         assert isinstance(assembly, dict)
 def test_authentication_not_set(self, mocked_class):
     if os.environ['ENA_API_USER']:
         del os.environ['ENA_API_USER']
     if os.environ['ENA_API_PASSWORD']:
         del os.environ['ENA_API_PASSWORD']
     ena = ena_handler.EnaApiHandler()
     assert ena.auth is None
 def test_get_study_secondary_accession_should_retrieve_study_filtered_fields(
         self, accession_arg):
     ena = ena_handler.EnaApiHandler()
     study = ena.get_study(fields='study_accession', **accession_arg)
     assert isinstance(study, dict)
     assert len(study.keys()) == 1
     assert 'study_accession' in study
 def test_get_study_runs_should_have_all_fields(self):
     ena = ena_handler.EnaApiHandler()
     runs = ena.get_study_runs('SRP125161')
     assert len(runs) == 4
     for run in runs:
         assert 21 == len(run)
         assert isinstance(run, dict)
 def test_get_study_assemblies_should_filter_accessions(self):
     ena = ena_handler.EnaApiHandler()
     assemblies = ena.get_study_assemblies(
         'ERP112609', filter_accessions=['GCA_001751075'])
     for assembly in assemblies:
         assert len(assembly) == 28
         assert isinstance(assembly, dict)
 def test_get_study_runs_should_have_filter_run_accessions(self):
     ena = ena_handler.EnaApiHandler()
     runs = ena.get_study_runs('SRP125161',
                               filter_accessions=['SRR6301444'])
     assert len(runs) == 1
     for run in runs:
         assert 21 == len(run)
         assert isinstance(run, dict)
 def test_get_study_runs_should_return_all_accessions_including_amplicon(
         self):
     ena = ena_handler.EnaApiHandler()
     assert len([
         r['run_accession']
         for r in ena.get_study_runs('ERP001506',
                                     filter_assembly_runs=False)
     ]) == 24
 def test_get_study_from_accessions_should_retrieve_default_fields(
         self, accession_arg):
     """
         This will iterate over all cases above. It will test each accession
         type individual and together.
     :param accession_arg:
     :return:
     """
     ena = ena_handler.EnaApiHandler()
     study = ena.get_study(**accession_arg)
     assert isinstance(study, dict)
     assert len(study.keys()) == len(
         ena_handler.STUDY_DEFAULT_FIELDS.split(','))
    def test_get_study_runs_should_not_fetch_size_if_private(
            self, mock_json_load, mock_post_request):
        ena = ena_handler.EnaApiHandler()
        response = requests.Response()
        response.status_code = 200
        mock_post_request.return_value = response
        mock_json_load.return_value = MOCKED_RUNS

        runs = ena.get_study_runs(
            'SRP125161',
            filter_accessions=['SRR6301444'])  # private=True was removed
        assert len(runs) == 1
        for run in runs:
            assert 20 == len(run)
            assert isinstance(run, dict)
            assert run['raw_data_size'] is None
def main(raw_args=sys.argv[1:]):
    if sys.version_info.major < 3:
        raise SyntaxError("Must be using Python 3")
    args = parse_args(raw_args)
    log_level = logging.DEBUG if args.verbose else logging.info
    logging.basicConfig(filename=os.path.join('~', 'backlog-populator',
                                              'backlog.log'),
                        level=log_level)

    if not args.cutoffdate:
        cutoff = load_cutoff_date() or '1970-01-01'
    else:
        cutoff = args.cutoffdate
    # Setup ENA API module
    ena = ena_handler.EnaApiHandler()
    mgnify = mgnify_handler.MgnifyHandler(args.db)
    studies_created, studies_updated, study_errors = sync.sync_studies(
        ena, mgnify, cutoff)
    runs_created, runs_updated, run_errors = sync.sync_runs(
        ena, mgnify, cutoff)
    assem_created, assem_updated, assem_errors = sync.sync_assemblies(
        ena, mgnify, cutoff)

    logging.info('Created {} studies'.format(studies_created))
    logging.info('Updated {} studies'.format(studies_updated))
    logging.info('Created {} runs'.format(runs_created))
    logging.info('Updated {} runs'.format(runs_updated))
    logging.info('Created {} assemblies'.format(assem_created))
    logging.info('Updated {} assemblies'.format(assem_updated))

    errors = study_errors + run_errors + assem_errors
    if len(errors) > 10:
        logging.warning(
            'More than 10 update errors occured, see error.log for details')
        with open('error.log', 'w') as f:
            f.writelines([a + ': ' + b for a, b in errors])
    else:
        if 0 < len(errors) <= 10:
            for accession, error in errors:
                logging.error(accession, error)
        else:
            save_cutoff_date(datetime.today().strftime('%Y-%m-%d'))
    def test_download_runs(self, tmpdir):
        tmpdir = tmpdir.strpath
        current_dir = os.getcwd()
        os.chdir(tmpdir)
        run = {
            'fastq_ftp':
            'ftp.sra.ebi.ac.uk/vol1/fastq/ERR866/ERR866589/ERR866589_1.fastq.gz;'
            'ftp.sra.ebi.ac.uk/vol1/fastq/ERR866/ERR866589/ERR866589_2.fastq.gz'
        }

        ena = ena_handler.EnaApiHandler()
        try:
            ena.download_runs([run])
            fs = os.listdir(tmpdir)
            assert 2 == len(fs)
            assert 'ERR866589_1.fastq.gz' in fs
            assert 'ERR866589_2.fastq.gz' in fs

            os.chdir(current_dir)
        except requests.exceptions.ConnectionError:
            # If Max retries exceeded with url then there is no way of running this test successfully
            assert True
示例#12
0
import logging

from django.db.models import Q
from django.utils import timezone
from django.core.exceptions import ObjectDoesNotExist
from ena_portal_api import ena_handler

from emgapi import models as emg_models
from emgapianns.management.commands.import_publication import lookup_publication_by_pubmed_id, \
    update_or_create_publication
from emgapianns.management.lib import utils
from emgena import models as ena_models
from emgena.models import RunStudy, AssemblyStudy

ena = ena_handler.EnaApiHandler()


class StudyImporter:
    """
        Creates a new study object in EMG or updates an existing one.
    """
    def __init__(self, study_accession, study_dir, lineage, ena_db, emg_db):
        self.study_accession = study_accession
        self.study_dir = study_dir
        self.lineage = lineage
        self.ena_db = ena_db
        self.emg_db = emg_db

    def run(self):
        logging.info("Creating or updating study {}".format(
 def test_get_study_runs_should_not_return_amplicons(self):
     ena = ena_handler.EnaApiHandler()
     assert len([
         r['run_accession'] for r in ena.get_study_runs('ERP001506')
     ]) == 18
 def test_get_study_runs_should_return_all_accessions(self):
     ena = ena_handler.EnaApiHandler()
     assert set([
         r['run_accession'] for r in ena.get_study_runs('ERP000339')
     ]) == {'ERR019477', 'ERR019478'}
 def test_get_assembly_invalid_accession(self):
     ena = ena_handler.EnaApiHandler()
     with pytest.raises(ValueError):
         ena.get_assembly('Invalid_accession')
 def test_get_run_should_retrieve_run_filtered_fields(self):
     ena = ena_handler.EnaApiHandler()
     run = ena.get_run('ERR1701760', fields='run_accession')
     assert isinstance(run, dict)
     assert len(run) == 1
     assert 'run_accession' in run
 def test_get_study_runs_should_return_empty_list_if_study_contains_no_runs(
         self):
     ena = ena_handler.EnaApiHandler()
     assert len(ena.get_study_runs('ERP105889')) == 0
 def test_get_assembly_should_have_all_fields(self):
     ena = ena_handler.EnaApiHandler()
     assembly = ena.get_assembly('ERZ1645181')
     assert len(assembly) == 28
     assert isinstance(assembly, dict)
 def test_get_study_api_no_results(self):
     ena = ena_handler.EnaApiHandler()
     ena.post_request = lambda r: MockResponse(204, text=None)
     with pytest.raises(ValueError):
         ena.get_study('ERP001736')
 def test_get_run_should_retrieve_run_all_fields(self):
     ena = ena_handler.EnaApiHandler()
     run = ena.get_run('ERR1701760')
     assert isinstance(run, dict)
     assert 21 == len(run)
 def test_get_study_assemblies_should_have_all_fields(self):
     ena = ena_handler.EnaApiHandler()
     assemblies = ena.get_study_assemblies('ERP112609')
     for assembly in assemblies:
         assert len(assembly) == 28
         assert isinstance(assembly, dict)
 def test_get_study_runs_api_unavailable(self):
     ena = ena_handler.EnaApiHandler()
     ena.post_request = lambda r: MockResponse(500)
     with pytest.raises(ValueError):
         ena.get_study_runs('SRP125161')
 def test_get_study_assembly_accessions_should_return_all_accessions(self):
     ena = ena_handler.EnaApiHandler()
     assert {
         a['analysis_accession']
         for a in ena.get_study_assemblies('ERP124518')
     } == {'ERZ1645181', 'ERZ1645182', 'ERZ1645183', 'ERZ1645184'}
 def test_get_assembly_should_filter_fields(self):
     ena = ena_handler.EnaApiHandler()
     assembly = ena.get_assembly(
         'ERZ1645181', fields='analysis_accession,study_accession')
     assert len(assembly) == 2
     assert isinstance(assembly, dict)
 def test_get_study_assembly_accessions_should_return_empty_list_if_study_contains_no_assemblies(
         self):
     ena = ena_handler.EnaApiHandler()
     assert len(ena.get_study_assemblies('PRJEB2280')) == 0
 def test_get_assembly_api_unavailable(self):
     ena = ena_handler.EnaApiHandler()
     ena.post_request = lambda r: MockResponse(500)
     with pytest.raises(ValueError):
         ena.get_assembly('ERZ795049')
 def test_authentication_set(self):
     ena = ena_handler.EnaApiHandler()
     assert ena.auth == ('username', 'password')
 def test_get_run_api_unavailable(self):
     ena = ena_handler.EnaApiHandler()
     ena.post_request = lambda r: MockResponse(500)
     with pytest.raises(ValueError):
         ena.get_run('ERR1701760')
 def test_get_study_assemblies_invalid_accession(self):
     ena = ena_handler.EnaApiHandler()
     assert [] == ena.get_study_assemblies('Invalid accession')
示例#30
0
    def __init__(self, db):
        self.db = db
        self.ena = ena_handler.EnaApiHandler()

        self.studies = []
        self.update_taggable_studies()