def trim_lane(item): """ if trim_reads is set with no trimmer specified, default to B-run trimming only. if trimmer is set to a supported type, perform that trimming instead. """ to_trim = [x for x in item["files"] if x is not None] dirs = item["dirs"] config = item["config"] # this block is to maintain legacy configuration files trim_reads = config["algorithm"].get("trim_reads", False) if not trim_reads: logger.info("Skipping trimming of %s." % (", ".join(to_trim))) return item # swap the default to None if trim_reads gets deprecated if trim_reads == "low_quality" or trim_reads == "true": logger.info("Trimming low quality ends from %s." % (", ".join(to_trim))) out_files = brun_trim_fastq(to_trim, dirs, config) if trim_reads == "read_through": logger.info("Trimming low quality ends and read through adapter " "sequence from %s." % (", ".join(to_trim))) out_files = trim_read_through(to_trim, dirs, config) else: logger.info("Trimming low quality ends from %s." % (", ".join(to_trim))) out_files = brun_trim_fastq(to_trim, dirs, config) item["files"] = out_files return [[item]]
def process_lane(lane_items, fc_name, fc_date, dirs, config): """Prepare lanes, potentially splitting based on barcodes. """ lane_name = "%s_%s_%s" % (lane_items[0]['lane'], fc_date, fc_name) logger.info("Demulitplexing %s" % lane_name) full_fastq1, full_fastq2 = get_fastq_files(dirs["fastq"], dirs["work"], lane_items[0], fc_name, config=config) bc_files = split_by_barcode(full_fastq1, full_fastq2, lane_items, lane_name, dirs, config) out = [] for item in lane_items: config = _update_config_w_custom(config, item) # Can specify all barcodes but might not have actual sequences # Would be nice to have a good way to check this is okay here. if bc_files.has_key(item["barcode_id"]): for fastq1, fastq2, lane_ext in _prep_fastq_files(item, bc_files, dirs, config): cur_lane_name = lane_name cur_lane_desc = item["description"] if item.get("name", "") and config["algorithm"].get("include_short_name", True): cur_lane_desc = "%s : %s" % (item["name"], cur_lane_desc) if item["barcode_id"] is not None: cur_lane_name += "_%s" % (item["barcode_id"]) if lane_ext is not None: cur_lane_name += "_s{0}".format(lane_ext) if config["algorithm"].get("trim_reads", False): trim_info = brun_trim_fastq([x for x in [fastq1, fastq2] if x is not None], dirs, config) fastq1 = trim_info[0] if fastq2 is not None: fastq2 = trim_info[1] out.append((fastq1, fastq2, item, cur_lane_name, cur_lane_desc, dirs, config)) return out
def trim_lane(fastq1, fastq2, info, lane_name, lane_desc, dirs, config): """ if trim_reads is set with no trimmer specified, default to B-run trimming only. if trimmer is set to a supported type, perform that trimming instead. """ # this block is to maintain legacy configuration files if not config["algorithm"].get("trim_reads", False): return [(fastq1, fastq2, info, lane_name, lane_desc, dirs, config)] # swap the default to None if trim_reads gets deprecated trimmer = config["algorithm"].get("trimmer", "low_quality") to_trim = [x for x in [fastq1, fastq2] if x is not None] if trimmer == "low_quality": logger.info("Trimming low quality ends from %s." % (", ".join(to_trim))) out_files = brun_trim_fastq(to_trim, dirs, config) elif trimmer == "adapter": logger.info("Trimming low quality ends and adapter sequence " "from %s." % (", ".join(to_trim))) out_files = cutadapt_trim(to_trim, dirs, config) else: logger.info("Skipping trimming of %s." % (", ".join(to_trim))) out_files = [fastq1, fastq2] fastq1 = out_files[0] if fastq2 is not None: fastq2 = out_files[1] return [(fastq1, fastq2, info, lane_name, lane_desc, dirs, config)]
def process_lane(lane_items, fc_name, fc_date, dirs, config): """Prepare lanes, potentially splitting based on barcodes. """ lane_name = "%s_%s_%s" % (lane_items[0]['lane'], fc_date, fc_name) full_fastq1, full_fastq2 = get_fastq_files(dirs["fastq"], dirs["work"], lane_items[0], fc_name, config=config) # Filter phiX custom_config = _update_config_w_custom(config, lane_items[0]) if custom_config["algorithm"].get("filter_phix", False): # If we are starting from demultiplexed material, we will skip a lane-wise screening # Screening will be performed on a sample basis if custom_config["algorithm"].get("demultiplexed", False): logger.warn("Will not filter phix lane-wise on already demultiplexed files. " \ "You will have to specify genomes_filter_out option for each sample") else: logger.info("Filtering phiX from %s" % lane_name) info = {"genomes_filter_out": "spiked_phix", "description": lane_name} processed = remove_contaminants(full_fastq1, full_fastq2, info, lane_name, info["description"], dirs, custom_config) (full_fastq1, full_fastq2, _, lane_name) = processed[0][0:4] logger.info("Demultiplexing %s" % lane_name) bc_files = split_by_barcode(full_fastq1, full_fastq2, lane_items, lane_name, dirs, config) out = [] for item in lane_items: config = _update_config_w_custom(config, item) # Can specify all barcodes but might not have actual sequences # Would be nice to have a good way to check this is okay here. if item["barcode_id"] in bc_files: fastq1, fastq2 = bc_files[item["barcode_id"]] cur_lane_name = lane_name cur_lane_desc = item["description"] if item.get("name", "") and config["algorithm"].get("include_short_name", True): cur_lane_desc = "%s : %s" % (item["name"], cur_lane_desc) if item["barcode_id"] is not None: cur_lane_name += "_%s" % (item["barcode_id"]) if config["algorithm"].get("trim_reads", False): trim_info = brun_trim_fastq([x for x in [fastq1, fastq2] if x is not None], dirs, config) fastq1 = trim_info[0] if fastq2 is not None: fastq2 = trim_info[1] out.append((fastq1, fastq2, item, cur_lane_name, cur_lane_desc, dirs, config)) return out
def trim_lane(fastq1, fastq2, info, lane_name, lane_desc, dirs, config): """ if trim_reads is set with no trimmer specified, default to B-run trimming only. if trimmer is set to a supported type, perform that trimming instead. """ to_trim = [x for x in [fastq1, fastq2] if x is not None] # this block is to maintain legacy configuration files trim_reads = config["algorithm"].get("trim_reads", False) if not trim_reads: logger.info("Skipping trimming of %s." % (", ".join(to_trim))) return [(fastq1, fastq2, info, lane_name, lane_desc, dirs, config)] # swap the default to None if trim_reads gets deprecated if trim_reads == "low_quality" or trim_reads == "true": logger.info("Trimming low quality ends from %s." % (", ".join(to_trim))) out_files = brun_trim_fastq(to_trim, dirs, config) if trim_reads == "read_through": logger.info("Trimming low quality ends and read through adapter " "sequence from %s." % (", ".join(to_trim))) out_files = trim_read_through(to_trim, dirs, config) else: logger.info("Trimming low quality ends from %s." % (", ".join(to_trim))) out_files = brun_trim_fastq(to_trim, dirs, config) fastq1 = out_files[0] if fastq2 is not None: fastq2 = out_files[1] return [(fastq1, fastq2, info, lane_name, lane_desc, dirs, config)]
def process_lane(lane_items, fc_name, fc_date, dirs, config): """Prepare lanes, potentially splitting based on barcodes. """ lane_name = "%s_%s_%s" % (lane_items[0]['lane'], fc_date, fc_name) logger.info("Demulitplexing %s" % lane_name) full_fastq1, full_fastq2 = get_fastq_files(dirs["fastq"], dirs["work"], lane_items[0], fc_name, config=_update_config_w_custom( config, lane_items[0])) bc_files = split_by_barcode(full_fastq1, full_fastq2, lane_items, lane_name, dirs, config) out = [] for item in lane_items: config = _update_config_w_custom(config, item) # Can specify all barcodes but might not have actual sequences # Would be nice to have a good way to check this is okay here. if bc_files.has_key(item["barcode_id"]): for fastq1, fastq2, lane_ext in _prep_fastq_files( item, bc_files, dirs, config): cur_lane_name = lane_name cur_lane_desc = item["description"] if item.get("name", "") and config["algorithm"].get( "include_short_name", True): cur_lane_desc = "%s : %s" % (item["name"], cur_lane_desc) if item["barcode_id"] is not None: cur_lane_name += "_%s" % (item["barcode_id"]) if lane_ext is not None: cur_lane_name += "_s{0}".format(lane_ext) if config["algorithm"].get("trim_reads", False): trim_info = brun_trim_fastq( [x for x in [fastq1, fastq2] if x is not None], dirs, config) fastq1 = trim_info[0] if fastq2 is not None: fastq2 = trim_info[1] out.append((fastq1, fastq2, item, cur_lane_name, cur_lane_desc, dirs, config)) return out
def process_lane(lane_items, fc_name, fc_date, dirs, config): """Prepare lanes, potentially splitting based on barcodes. """ lane_name = "%s_%s_%s" % (lane_items[0]['lane'], fc_date, fc_name) full_fastq1, full_fastq2 = get_fastq_files(dirs["fastq"], dirs["work"], lane_items[0], fc_name, config=config) # Filter phiX custom_config = _update_config_w_custom(config, lane_items[0]) if custom_config["algorithm"].get("filter_phix", False): # If we are starting from demultiplexed material, we will skip a lane-wise screening # Screening will be performed on a sample basis if custom_config["algorithm"].get("demultiplexed", False): logger.warn("Will not filter phix lane-wise on already demultiplexed files. " \ "You will have to specify genomes_filter_out option for each sample") else: logger.info("Filtering phiX from %s" % lane_name) info = { "genomes_filter_out": "spiked_phix", "description": lane_name } processed = remove_contaminants(full_fastq1, full_fastq2, info, lane_name, info["description"], dirs, custom_config) (full_fastq1, full_fastq2, _, lane_name) = processed[0][0:4] logger.info("Demultiplexing %s" % lane_name) bc_files = split_by_barcode(full_fastq1, full_fastq2, lane_items, lane_name, dirs, config) out = [] for item in lane_items: config = _update_config_w_custom(config, item) # Can specify all barcodes but might not have actual sequences # Would be nice to have a good way to check this is okay here. if item["barcode_id"] in bc_files: fastq1, fastq2 = bc_files[item["barcode_id"]] cur_lane_name = lane_name cur_lane_desc = item["description"] if item.get("name", "") and config["algorithm"].get( "include_short_name", True): cur_lane_desc = "%s : %s" % (item["name"], cur_lane_desc) if item["barcode_id"] is not None: cur_lane_name += "_%s" % (item["barcode_id"]) if config["algorithm"].get("trim_reads", False): trim_info = brun_trim_fastq( [x for x in [fastq1, fastq2] if x is not None], dirs, config) fastq1 = trim_info[0] if fastq2 is not None: fastq2 = trim_info[1] out.append((fastq1, fastq2, item, cur_lane_name, cur_lane_desc, dirs, config)) return out