def load_samples(manifest_path, database, **kwargs): manifest = ManifestReader(manifest_path) # Ensure the sample is marked to be loaded.. if not manifest.marked_for_load(): log.info('Sample not marked for load', extra={ 'manifest_path': manifest_path, }) return # Ensure the sample section is valid.. if not check_sample_section(manifest): log.info('Manifest sample section is not valid', extra={ 'manifest_path': manifest_path, }) return # [sample] # project = PCGC # batch = OTHER # sample = 1-03131 # version = 1 sample_info = manifest.section('sample') vcf_info = manifest.section('vcf') # ignore whatever sample is listed in the manifest and scan the vcf # for samples vcf_path = os.path.join(os.path.dirname(manifest_path), vcf_info['file']) with open(vcf_path) as file_obj: log.debug("opening {0} in load_samples".format(vcf_path)) reader = vcf.Reader(file_obj) samples = reader.samples if 'sample' in sample_info: pretty_names = sample_info['sample'].split(',') else: pretty_names = samples if len(samples) != len(pretty_names): log.info('Length of comma-delimited samples field in manifest ' 'does not match the length of samples in {0}' .format(vcf_info['file'])) return # Create the sample (and batch and project if needed).. num_created = 0 num_skipped = 0 for pretty_name, vcf_sample in zip(pretty_names, samples): log.debug('Trying to create {0} sample record'.format(vcf_sample)) sample, created = create_sample(sample_name=pretty_name, vcf_colname=vcf_sample, batch_name=sample_info['batch'], project_name=sample_info['project'], version=sample_info['version']) log.debug('{0} created'.format(sample)) if created: num_created += 1 sts.transition(sample, 'Sample Record Created') else: num_skipped += 1 manifest = SampleManifest.objects.filter(sample=sample) # Create a manfiest object for the sample if one does not exist if created or not manifest.exists(): sample_manifest = SampleManifest(sample=sample) sample_manifest.load_content(manifest_path) sample_manifest.save() sts.transition(sample, 'Sample Manifest Created') # Publish to channel that this manifest is eligible for processing # downstream.. if num_created > 0 or kwargs.get('force', False): SAMPLE_CHANNEL.publish(manifest_path=manifest_path, database=database) # Returns whether the sample has been created load_dict = {'created': num_created, 'skipped': num_skipped} return load_dict