Exemplo n.º 1
0
def load_samples(manifest_path, database, **kwargs):
    manifest = ManifestReader(manifest_path)

    # Ensure the sample is marked to be loaded..
    if not manifest.marked_for_load():
        log.info('Sample not marked for load', extra={
            'manifest_path': manifest_path,
        })
        return

    # Ensure the sample section is valid..
    if not check_sample_section(manifest):
        log.info('Manifest sample section is not valid', extra={
            'manifest_path': manifest_path,
        })
        return

    # [sample]
    # project = PCGC
    # batch = OTHER
    # sample = 1-03131
    # version = 1

    sample_info = manifest.section('sample')
    vcf_info = manifest.section('vcf')

    # ignore whatever sample is listed in the manifest and scan the vcf
    # for samples
    vcf_path = os.path.join(os.path.dirname(manifest_path), vcf_info['file'])

    with open(vcf_path) as file_obj:
        log.debug("opening {0} in load_samples".format(vcf_path))
        reader = vcf.Reader(file_obj)
        samples = reader.samples

    if 'sample' in sample_info:
        pretty_names = sample_info['sample'].split(',')
    else:
        pretty_names = samples

    if len(samples) != len(pretty_names):
        log.info('Length of comma-delimited samples field in manifest '
                 'does not match the length of samples in {0}'
                 .format(vcf_info['file']))
        return

    # Create the sample (and batch and project if needed)..
    num_created = 0
    num_skipped = 0

    for pretty_name, vcf_sample in zip(pretty_names, samples):
        log.debug('Trying to create {0} sample record'.format(vcf_sample))
        sample, created = create_sample(sample_name=pretty_name,
                                        vcf_colname=vcf_sample,
                                        batch_name=sample_info['batch'],
                                        project_name=sample_info['project'],
                                        version=sample_info['version'])
        log.debug('{0} created'.format(sample))

        if created:
            num_created += 1
            sts.transition(sample, 'Sample Record Created')
        else:
            num_skipped += 1

        manifest = SampleManifest.objects.filter(sample=sample)
        # Create a manfiest object for the sample if one does not exist
        if created or not manifest.exists():
            sample_manifest = SampleManifest(sample=sample)
            sample_manifest.load_content(manifest_path)
            sample_manifest.save()
            sts.transition(sample, 'Sample Manifest Created')

    # Publish to channel that this manifest is eligible for processing
    # downstream..
    if num_created > 0 or kwargs.get('force', False):
        SAMPLE_CHANNEL.publish(manifest_path=manifest_path, database=database)

    # Returns whether the sample has been created
    load_dict = {'created': num_created, 'skipped': num_skipped}
    return load_dict