def samples_to_records(samples, default_keys=None): """Convert samples into output CWL records. """ from bcbio.pipeline import run_info RECORD_CONVERT_TO_LIST = set([ "config__algorithm__tools_on", "config__algorithm__tools_off", "reference__genome_context" ]) all_keys = _get_all_cwlkeys(samples, default_keys) out = [] for data in samples: for raw_key in sorted(list(all_keys)): key = raw_key.split("__") if tz.get_in(key, data) is None: data = tz.update_in(data, key, lambda x: None) if raw_key not in data["cwl_keys"]: data["cwl_keys"].append(raw_key) if raw_key in RECORD_CONVERT_TO_LIST: val = tz.get_in(key, data) if not val: val = [] elif not isinstance(val, (list, tuple)): val = [val] data = tz.update_in(data, key, lambda x: val) # Booleans are problematic for CWL serialization, convert into string representation if isinstance(tz.get_in(key, data), bool): data = tz.update_in(data, key, lambda x: str(tz.get_in(key, data))) data["metadata"] = run_info.add_metadata_defaults( data.get("metadata", {})) out.append(data) return out
def samples_to_records(samples, default_keys=None): """Convert samples into output CWL records. """ from bcbio.pipeline import run_info RECORD_CONVERT_TO_LIST = set(["config__algorithm__tools_on", "config__algorithm__tools_off", "reference__genome_context"]) all_keys = _get_all_cwlkeys(samples, default_keys) out = [] for data in samples: for raw_key in sorted(list(all_keys)): key = raw_key.split("__") if tz.get_in(key, data) is None: data = tz.update_in(data, key, lambda x: None) if raw_key not in data["cwl_keys"]: data["cwl_keys"].append(raw_key) if raw_key in RECORD_CONVERT_TO_LIST: val = tz.get_in(key, data) if not val: val = [] elif not isinstance(val, (list, tuple)): val = [val] data = tz.update_in(data, key, lambda x: val) # Booleans are problematic for CWL serialization, convert into string representation if isinstance(tz.get_in(key, data), bool): data = tz.update_in(data, key, lambda x: str(tz.get_in(key, data))) data["metadata"] = run_info.add_metadata_defaults(data.get("metadata", {})) out.append(data) return out
def samples_to_records(samples): """Convert samples into output CWL records. """ from bcbio.pipeline import run_info RECORD_CONVERT_TO_LIST = set([ "config__algorithm__tools_on", "config__algorithm__tools_off", "config__algorithm__svcaller" ]) all_keys = _get_all_cwlkeys(samples) out = [] for data in samples: for raw_key in sorted(list(all_keys)): key = raw_key.split("__") if tz.get_in(key, data) is None: data = tz.update_in(data, key, lambda x: None) data["cwl_keys"].append(raw_key) if raw_key in RECORD_CONVERT_TO_LIST: val = tz.get_in(key, data) if not val: val = [] elif not isinstance(val, (list, tuple)): val = [val] data = tz.update_in(data, key, lambda x: val) data["metadata"] = run_info.add_metadata_defaults( data.get("metadata", {})) out.append(data) return out
def _samples_to_records(samples): """Convert samples into output CWL records. """ from bcbio.pipeline import run_info RECORD_CONVERT_TO_LIST = set(["config__algorithm__tools_on", "config__algorithm__tools_off"]) all_keys = _get_all_cwlkeys(samples) out = [] for data in samples: for raw_key in sorted(list(all_keys)): key = raw_key.split("__") if tz.get_in(key, data) is None: data = tz.update_in(data, key, lambda x: None) data["cwl_keys"].append(raw_key) if raw_key in RECORD_CONVERT_TO_LIST: val = tz.get_in(key, data) if not val: val = [] elif not isinstance(val, (list, tuple)): val = [val] data = tz.update_in(data, key, lambda x: val) data["metadata"] = run_info.add_metadata_defaults(data.get("metadata", {})) out.append(data) return out
def batch_for_variantcall(samples): """Prepare a set of samples for parallel variant calling. CWL input target that groups samples into batches and variant callers for parallel processing. """ from bcbio.pipeline import run_info convert_to_list = set(["config__algorithm__tools_on", "config__algorithm__tools_off"]) default_keys = set(["metadata__batch", "config__algorithm__validate", "config__algorithm__validate_regions"]) to_process, extras = _dup_samples_by_variantcaller(samples, require_bam=False) batch_groups = collections.defaultdict(list) to_process = [utils.to_single_data(x) for x in to_process] all_keys = set([]) for data in to_process: all_keys.update(set(data["cwl_keys"])) all_keys.update(default_keys) for data in to_process: for raw_key in sorted(list(all_keys)): key = raw_key.split("__") if tz.get_in(key, data) is None: data = tz.update_in(data, key, lambda x: None) data["cwl_keys"].append(raw_key) if raw_key in convert_to_list: val = tz.get_in(key, data) if not val: val = [] elif not isinstance(val, (list, tuple)): val = [val] data = tz.update_in(data, key, lambda x: val) vc = get_variantcaller(data, require_bam=False) data["metadata"] = run_info.add_metadata_defaults(data.get("metadata", {})) batches = dd.get_batches(data) or dd.get_sample_name(data) if not isinstance(batches, (list, tuple)): batches = [batches] for b in batches: batch_groups[(b, vc)].append(utils.deepish_copy(data)) return list(batch_groups.values()) + extras