Пример #1
0
def run_ihec_container(libraries_file,
                       source,
                       config_path,
                       merge_antibodies=False,
                       outdir=None,
                       tracking=True,
                       mode='TEST',
                       log_level='INFO'):
    logger.setLevel(getattr(logging, log_level))
    if libraries_file:
        libraries = ResolveInputs.parse_libraries(libraries_file)
    else:
        libraries = None
    resolved_inputs = ResolveInputs(config_path=config_path,
                                    libraries=libraries,
                                    source=source,
                                    merge_antibodies=merge_antibodies,
                                    outdir=outdir,
                                    tracking=tracking,
                                    mode=mode)
    tracker = Tracker(resolved_inputs.metadata_path)
    jobs = []
    for source in resolved_inputs.inputs:
        if not resolved_inputs.inputs[source]:
            logger.warning('Skipping analysis of {}'.format(source))
            continue
        controls = resolved_inputs.inputs[source]['controls']
        if len(controls) != 1:
            logger.warning(
                'Source {} has either multiple or no controls! Skipping analysis!'
                .format(source))
            logger.warning(controls)
            continue
        control = controls[0]
        for treatment in resolved_inputs.inputs[source]['treatments']:
            job = ContainerJob(resolved_inputs, treatment, control)
            if tracking:
                # Insert DB record
                job.record, exists = tracker.create(job.metadata)
                # Update job outdir
                job.change_outdir(job.record['analysis_data_path'], exists)
            jobs.append(job)
    wrapper_script = generate_wrapper_script(jobs, tracking=tracking)
    write_script(wrapper_script,
                 get_wrapper_script_path(resolved_inputs.outdir))
    return jobs
Пример #2
0
def test_merge_antibodies():
    libraries = [
        'K002490', 'K002150', 'K002179', 'K002530', 'K002395', 'K002055',
        'K002157', 'K002440', 'K002455', 'K002463', 'K002244', 'K002047',
        'K002213', 'K002336', 'K002300', 'K002471', 'K002414'
    ]
    resolved_inputs = ResolveInputs(config_path,
                                    libraries=libraries,
                                    source=None,
                                    merge_antibodies=True)
    print(json.dumps(resolved_inputs.data['INPUTS'], indent=4))
    print('*' * 50)
    resolved_inputs = ResolveInputs(config_path,
                                    libraries=libraries,
                                    source=None,
                                    merge_antibodies=False)
    print(json.dumps(resolved_inputs.data['INPUTS'], indent=4))
Пример #3
0
 def __init__(self, resolved_inputs_metadata):
     self.resolved_inputs = ResolveInputs(config_path=None,
                                          libraries=None,
                                          source=None,
                                          load=resolved_inputs_metadata,
                                          fetch_inputs=False)
     self.api = self.resolved_inputs.api
     pass
Пример #4
0
def test_validator():
    no_track = False
    source = 'MS012403'
    test_dir = '/projects/trans_scratch/dmacmillan/testing/pipelines/ihec_container_pipeline/test_validator'
    resolved_inputs = ResolveInputs(config_path,
                                    libraries=['K003436', 'K002831'],
                                    source=None,
                                    merge_antibodies=True,
                                    outdir=test_dir)

    # Create the container job
    container_job = ContainerJob(
        resolved_inputs, resolved_inputs.inputs[source]['treatments'][0],
        resolved_inputs.inputs[source]['controls'][0])

    tracker = Tracker(resolved_inputs.metadata_path)

    metadata = Metadata(container_job.metadata_path)

    response, exists = tracker.create(metadata)
    # Need to save the metadata to update the id in the file so it can be loaded
    metadata.save()

    validator = Validator(valid_cromwell_dir, container_job.metadata_path,
                          resolved_inputs.metadata_path, no_track)

    assert validator.is_valid is True

    # Need to re-create the metadata because the validator created a separate instance and updated it
    metadata = Metadata(container_job.metadata_path)

    assert metadata.data['successful'] is True

    # Don't worry, this actually reads from the database using the metadata id
    record = tracker.read(metadata)

    assert record['successful'] is True

    # Now test invalid
    validator = Validator(invalid_cromwell_dir, container_job.metadata_path,
                          resolved_inputs.metadata_path, no_track)

    assert validator.is_valid is False

    # Need to re-create the metadata because the validator created a separate instance and updated it
    metadata = Metadata(container_job.metadata_path)

    assert metadata.data['successful'] is False

    # Don't worry, this actually reads from the database using the metadata id
    record = tracker.read(metadata)

    assert record['successful'] is False

    # Clean
    tracker.delete(metadata)
    shutil.rmtree(resolved_inputs.outdir)
    os.makedirs(resolved_inputs.outdir)
Пример #5
0
def test_deep_2():

    # Read in deep libraries
    deep_libs_path = os.path.join(current_dir, 'inputs', 'deep2')
    deep_libs = ResolveInputs.read_file(deep_libs_path)

    resolved_inputs = ResolveInputs(config_path,
                                    libraries=deep_libs,
                                    source=None,
                                    merge_antibodies=True)

    source = list(resolved_inputs.inputs.keys())[0]

    container_job = ContainerJob(
        resolved_inputs, resolved_inputs.inputs[source]['treatments'][0],
        resolved_inputs.inputs[source]['controls'][0])

    print(json.dumps(container_job.control_data, indent=4))
    # print(json.dumps(container_job.json_config, indent=4))

    # Load valid json config
    valid_json_config_path = os.path.join(current_dir, 'valid', 'json_configs',
                                          'deep.json')
    valid_json_config = resolved_inputs.read_json_or_yaml(
        valid_json_config_path)
    container_job.json_config['chip.ctl_fastqs'][0][0] = sorted(
        container_job.json_config['chip.ctl_fastqs'][0][0])
    container_job.json_config['chip.fastqs'][0][0] = sorted(
        container_job.json_config['chip.fastqs'][0][0])
    assert container_job.json_config == valid_json_config
    container_job.write_script(container_job.get_most_recent_jid_path(),
                               'test_jid')
    print(container_job.outdir)

    # Clean outdir
    shutil.rmtree(container_job.outdir)
Пример #6
0
def test_resolve_inputs():
    resolved_inputs = ResolveInputs(config_path, [])

    library = 'K003134'

    actual = resolved_inputs.api.get_chip_library_info(library)

    expected = {
        'antibody_name': 'H3K27me3',
        'library': 'K003134',
        'project': 'EXTERNAL CREST',
        'source': 'JKU012_endometrium_normal'
    }

    assert actual == expected
def test_create():

    # Deep data
    source = '51_Hf01_BlCM_Ct'
    resolved_inputs = ResolveInputs(
        config_path,
        libraries=[
            'K002434', 'K001955', 'K001967', 'K001969', 'K001972', 'K001988',
            'K002002', 'K002026', 'K002043', 'K002159', 'K002234', 'K002241',
            'K002247', 'K002284', 'K002309', 'K002314', 'K002322', 'K002326',
            'K002352', 'K002365', 'K002370', 'K002443', 'K002444', 'K002496',
            'K002533'
        ],
        merge_antibodies=True)

    # Create the container job
    container_job = ContainerJob(
        resolved_inputs, resolved_inputs.inputs[source]['treatments'][0],
        resolved_inputs.inputs[source]['controls'][0])

    tracker = Tracker(resolved_inputs.metadata_path)

    metadata = Metadata(container_job.metadata_path)

    logger.debug('Creating db record ...')
    response = tracker.create(metadata)

    assert 'id' in response

    actual = resolved_inputs.api.get('chipseq_analysis/{}'.format(
        response['id']))

    assert actual['id'] == response['id']
    assert actual['status'] == 'production'

    metadata.update({'status': 'test'})

    assert metadata.data['status'] == 'test'

    response = tracker.update(metadata)

    assert response['status'] == 'test'

    # Clean
    tracker.delete(metadata)
    shutil.rmtree(resolved_inputs.outdir)
    os.makedirs(resolved_inputs.outdir)