def process_lane(lane_items, fc_name, fc_date, dirs, config): """Prepare lanes, potentially splitting based on barcodes. """ lane_name = "%s_%s_%s" % (lane_items[0]['lane'], fc_date, fc_name) logger.info("Preparing %s" % lane_name) full_fastq1, full_fastq2 = get_fastq_files( dirs["fastq"], dirs["work"], lane_items[0], fc_name, dirs=dirs, config=shared.update_config_w_custom(config, lane_items[0])) bc_files = split_by_barcode(full_fastq1, full_fastq2, lane_items, lane_name, dirs, config) out = [] for item in lane_items: config = shared.update_config_w_custom(config, item) # Can specify all barcodes but might not have actual sequences # Would be nice to have a good way to check this is okay here. if item["barcode_id"] in bc_files: for fastq1, fastq2, lane_ext in _prep_fastq_files( item, bc_files, dirs, config): cur_lane_name = lane_name cur_lane_desc = item["description"] if item.get("name", "") and config["algorithm"].get( "include_short_name", True): cur_lane_desc = "%s : %s" % (item["name"], cur_lane_desc) if item["barcode_id"] is not None: cur_lane_name += "_%s" % (item["barcode_id"]) if lane_ext is not None: cur_lane_name += "_s{0}".format(lane_ext) out.append((fastq1, fastq2, item, cur_lane_name, cur_lane_desc, dirs, config)) return out
def process_lane(lane_items, fc_name, fc_date, dirs, config): """Prepare lanes, potentially splitting based on barcodes. """ lane_name = "%s_%s_%s" % (lane_items[0]['lane'], fc_date, fc_name) logger.info("Demulitplexing %s" % lane_name) full_fastq1, full_fastq2 = get_fastq_files(dirs["fastq"], dirs["work"], lane_items[0], fc_name, config=config) bc_files = split_by_barcode(full_fastq1, full_fastq2, lane_items, lane_name, dirs, config) out = [] for item in lane_items: config = _update_config_w_custom(config, item) # Can specify all barcodes but might not have actual sequences # Would be nice to have a good way to check this is okay here. if bc_files.has_key(item["barcode_id"]): for fastq1, fastq2, lane_ext in _prep_fastq_files(item, bc_files, dirs, config): cur_lane_name = lane_name cur_lane_desc = item["description"] if item.get("name", "") and config["algorithm"].get("include_short_name", True): cur_lane_desc = "%s : %s" % (item["name"], cur_lane_desc) if item["barcode_id"] is not None: cur_lane_name += "_%s" % (item["barcode_id"]) if lane_ext is not None: cur_lane_name += "_s{0}".format(lane_ext) if config["algorithm"].get("trim_reads", False): trim_info = brun_trim_fastq([x for x in [fastq1, fastq2] if x is not None], dirs, config) fastq1 = trim_info[0] if fastq2 is not None: fastq2 = trim_info[1] out.append((fastq1, fastq2, item, cur_lane_name, cur_lane_desc, dirs, config)) return out
def process_lane(info, fc_name, fc_date, dirs, config): """Prepare lanes, potentially splitting based on barcodes. """ config = _update_config_w_custom(config, info) sample_name = info.get("description", "") if (config["algorithm"].get("include_short_name", True) and info.get("name", "")): sample_name = "%s---%s" % (info.get("name", ""), sample_name) genome_build = info.get("genome_build", None) multiplex = info.get("multiplex", None) log.info("Processing sample: %s; lane %s; reference genome %s; " \ "researcher %s; analysis method %s" % (sample_name, info["lane"], genome_build, info.get("researcher", ""), info.get("analysis", ""))) if multiplex: log.debug("Sample %s multiplexed as: %s" % (sample_name, multiplex)) full_fastq1, full_fastq2 = get_fastq_files(dirs["fastq"], info, fc_name) lane_name = "%s_%s_%s" % (info['lane'], fc_date, fc_name) lane_items = [] for mname, msample, fastq1, fastq2 in split_by_barcode(full_fastq1, full_fastq2, multiplex, lane_name, dirs, config): mlane_name = "%s_%s" % (lane_name, mname) if mname else lane_name if msample is None: msample = "%s---%s" % (sample_name, mname) lane_items.append((fastq1, fastq2, genome_build, mlane_name, msample, dirs, config)) return lane_items
def process_lane(lane_items, fc_name, fc_date, dirs, config): """Prepare lanes, potentially splitting based on barcodes. """ lane_name = "%s_%s_%s" % (lane_items[0]['lane'], fc_date, fc_name) full_fastq1, full_fastq2 = get_fastq_files(dirs["fastq"], dirs["work"], lane_items[0], fc_name, config=config) # Filter phiX custom_config = _update_config_w_custom(config, lane_items[0]) if custom_config["algorithm"].get("filter_phix", False): # If we are starting from demultiplexed material, we will skip a lane-wise screening # Screening will be performed on a sample basis if custom_config["algorithm"].get("demultiplexed", False): logger.warn("Will not filter phix lane-wise on already demultiplexed files. " \ "You will have to specify genomes_filter_out option for each sample") else: logger.info("Filtering phiX from %s" % lane_name) info = {"genomes_filter_out": "spiked_phix", "description": lane_name} processed = remove_contaminants(full_fastq1, full_fastq2, info, lane_name, info["description"], dirs, custom_config) (full_fastq1, full_fastq2, _, lane_name) = processed[0][0:4] logger.info("Demultiplexing %s" % lane_name) bc_files = split_by_barcode(full_fastq1, full_fastq2, lane_items, lane_name, dirs, config) out = [] for item in lane_items: config = _update_config_w_custom(config, item) # Can specify all barcodes but might not have actual sequences # Would be nice to have a good way to check this is okay here. if item["barcode_id"] in bc_files: fastq1, fastq2 = bc_files[item["barcode_id"]] cur_lane_name = lane_name cur_lane_desc = item["description"] if item.get("name", "") and config["algorithm"].get("include_short_name", True): cur_lane_desc = "%s : %s" % (item["name"], cur_lane_desc) if item["barcode_id"] is not None: cur_lane_name += "_%s" % (item["barcode_id"]) if config["algorithm"].get("trim_reads", False): trim_info = brun_trim_fastq([x for x in [fastq1, fastq2] if x is not None], dirs, config) fastq1 = trim_info[0] if fastq2 is not None: fastq2 = trim_info[1] out.append((fastq1, fastq2, item, cur_lane_name, cur_lane_desc, dirs, config)) return out
def process_lane(lane_items, fc_name, fc_date, dirs, config): """Prepare lanes, potentially splitting based on barcodes. """ lane_name = "%s_%s_%s" % (lane_items[0]["lane"], fc_date, fc_name) log.debug("Demulitplexing %s" % lane_name) full_fastq1, full_fastq2 = get_fastq_files(dirs["fastq"], lane_items[0], fc_name) bc_files = split_by_barcode(full_fastq1, full_fastq2, lane_items, lane_name, dirs, config) out = [] for item in lane_items: config = _update_config_w_custom(config, item) # Can specify all barcodes but might not have actual sequences # Would be nice to have a good way to check this is okay here. if bc_files.has_key(item["barcode_id"]): fastq1, fastq2 = bc_files[item["barcode_id"]] cur_lane_name = lane_name cur_lane_desc = item["description"] if item.get("name", ""): cur_lane_desc = "%s : %s" % (item["name"], cur_lane_desc) if item["barcode_id"] is not None: cur_lane_name += "_%s" % (item["barcode_id"]) out.append((fastq1, fastq2, item, cur_lane_name, cur_lane_desc, dirs, config)) return out
def process_lane(lane_items, fc_name, fc_date, dirs, config): """Prepare lanes, potentially splitting based on barcodes. """ full_fastq1, full_fastq2 = get_fastq_files(dirs["fastq"], dirs["work"], lane_items[0], fc_name, dirs=dirs, config=config_utils.update_w_custom(config, lane_items[0])) bc_files = split_by_barcode(full_fastq1, full_fastq2, lane_items, lane_items[0]["rgnames"]["lane"], dirs, config) out = [] for item in lane_items: logger.debug("Preparing %s" % item["rgnames"]["lane"]) config = config_utils.update_w_custom(config, item) # Can specify all barcodes but might not have actual sequences # Would be nice to have a good way to check this is okay here. if item["barcode_id"] in bc_files: for fastq1, fastq2, lane_ext in _prep_fastq_files(item, bc_files, dirs, config): if item["barcode_id"] is not None: item["rgnames"]["lane"] += "_%s" % (item["barcode_id"]) if lane_ext is not None: item["rgnames"]["lane"] += "_s{0}".format(lane_ext) out.append((fastq1, fastq2, item, dirs, config)) return out
def process_lane(lane_items, fc_name, fc_date, dirs, config): """Prepare lanes, potentially splitting based on barcodes. """ lane_name = "%s_%s_%s" % (lane_items[0]['lane'], fc_date, fc_name) full_fastq1, full_fastq2 = get_fastq_files(dirs["fastq"], dirs["work"], lane_items[0], fc_name, config=config) # Filter phiX custom_config = _update_config_w_custom(config, lane_items[0]) if custom_config["algorithm"].get("filter_phix", False): # If we are starting from demultiplexed material, we will skip a lane-wise screening # Screening will be performed on a sample basis if custom_config["algorithm"].get("demultiplexed", False): logger.warn("Will not filter phix lane-wise on already demultiplexed files. " \ "You will have to specify genomes_filter_out option for each sample") else: logger.info("Filtering phiX from %s" % lane_name) info = { "genomes_filter_out": "spiked_phix", "description": lane_name } processed = remove_contaminants(full_fastq1, full_fastq2, info, lane_name, info["description"], dirs, custom_config) (full_fastq1, full_fastq2, _, lane_name) = processed[0][0:4] logger.info("Demultiplexing %s" % lane_name) bc_files = split_by_barcode(full_fastq1, full_fastq2, lane_items, lane_name, dirs, config) out = [] for item in lane_items: config = _update_config_w_custom(config, item) # Can specify all barcodes but might not have actual sequences # Would be nice to have a good way to check this is okay here. if item["barcode_id"] in bc_files: fastq1, fastq2 = bc_files[item["barcode_id"]] cur_lane_name = lane_name cur_lane_desc = item["description"] if item.get("name", "") and config["algorithm"].get( "include_short_name", True): cur_lane_desc = "%s : %s" % (item["name"], cur_lane_desc) if item["barcode_id"] is not None: cur_lane_name += "_%s" % (item["barcode_id"]) if config["algorithm"].get("trim_reads", False): trim_info = brun_trim_fastq( [x for x in [fastq1, fastq2] if x is not None], dirs, config) fastq1 = trim_info[0] if fastq2 is not None: fastq2 = trim_info[1] out.append((fastq1, fastq2, item, cur_lane_name, cur_lane_desc, dirs, config)) return out