示例#1
0
def process_lane(lane_items, fc_name, fc_date, dirs, config):
    """Prepare lanes, potentially splitting based on barcodes.
    """
    lane_name = "%s_%s_%s" % (lane_items[0]['lane'], fc_date, fc_name)
    logger.info("Preparing %s" % lane_name)
    full_fastq1, full_fastq2 = get_fastq_files(dirs["fastq"],
      dirs["work"], lane_items[0], fc_name, dirs=dirs,
      config=shared.update_config_w_custom(config, lane_items[0]))
    bc_files = split_by_barcode(full_fastq1, full_fastq2, lane_items,
                                lane_name, dirs, config)
    out = []
    for item in lane_items:
        config = shared.update_config_w_custom(config, item)
        # Can specify all barcodes but might not have actual sequences
        # Would be nice to have a good way to check this is okay here.
        if bc_files.has_key(item["barcode_id"]):
            for fastq1, fastq2, lane_ext in _prep_fastq_files(item, bc_files, dirs, config):
                cur_lane_name = lane_name
                cur_lane_desc = item["description"]
                if item.get("name", "") and config["algorithm"].get("include_short_name", True):
                    cur_lane_desc = "%s : %s" % (item["name"], cur_lane_desc)
                if item["barcode_id"] is not None:
                    cur_lane_name += "_%s" % (item["barcode_id"])
                if lane_ext is not None:
                    cur_lane_name += "_s{0}".format(lane_ext)
                out.append((fastq1, fastq2, item, cur_lane_name, cur_lane_desc,
                            dirs, config))
    return out
示例#2
0
def process_lane(lane_items, fc_name, fc_date, dirs, config):
    """Prepare lanes, potentially splitting based on barcodes.
    """
    lane_name = "%s_%s_%s" % (lane_items[0]['lane'], fc_date, fc_name)
    logger.info("Preparing %s" % lane_name)
    full_fastq1, full_fastq2 = get_fastq_files(
        dirs["fastq"],
        dirs["work"],
        lane_items[0],
        fc_name,
        dirs=dirs,
        config=shared.update_config_w_custom(config, lane_items[0]))
    bc_files = split_by_barcode(full_fastq1, full_fastq2, lane_items,
                                lane_name, dirs, config)
    out = []
    for item in lane_items:
        config = shared.update_config_w_custom(config, item)
        # Can specify all barcodes but might not have actual sequences
        # Would be nice to have a good way to check this is okay here.
        if item["barcode_id"] in bc_files:
            for fastq1, fastq2, lane_ext in _prep_fastq_files(
                    item, bc_files, dirs, config):
                cur_lane_name = lane_name
                cur_lane_desc = item["description"]
                if item.get("name", "") and config["algorithm"].get(
                        "include_short_name", True):
                    cur_lane_desc = "%s : %s" % (item["name"], cur_lane_desc)
                if item["barcode_id"] is not None:
                    cur_lane_name += "_%s" % (item["barcode_id"])
                if lane_ext is not None:
                    cur_lane_name += "_s{0}".format(lane_ext)
                out.append((fastq1, fastq2, item, cur_lane_name, cur_lane_desc,
                            dirs, config))
    return out
示例#3
0
def merge_sample(data):
    """Merge fastq and BAM files for multiple samples.
    """
    logger.info("Combining fastq and BAM files %s" % str(data["name"]))
    config = shared.update_config_w_custom(data["config"], data["info"])
    genome_build, sam_ref = shared.ref_genome_info(data["info"], config,
                                                   data["dirs"])
    if config["algorithm"].get("upload_fastq", False):
        fastq1, fastq2 = combine_fastq_files(data["fastq_files"],
                                             data["dirs"]["work"], config)
    else:
        fastq1, fastq2 = None, None
    sort_bam = merge_bam_files(data["bam_files"], data["dirs"]["work"], config)
    return [[{
        "name": data["name"],
        "metadata": data["info"].get("metadata", {}),
        "info": data["info"],
        "genome_build": genome_build,
        "sam_ref": sam_ref,
        "work_bam": sort_bam,
        "fastq1": fastq1,
        "fastq2": fastq2,
        "dirs": data["dirs"],
        "config": config,
        "config_file": data["config_file"]
    }]]
示例#4
0
文件: lane.py 项目: joshuashen/steady
def _item_needs_compute(lanes):
    """Determine if any item needs computing resources to spin up a cluster.
    """
    for lane_items, _, _, _, config in lanes:
        # check if multiplexed
        if len(lane_items) > 1 or lane_items[0]["barcode_id"] is not None:
            return True
        # check if we need to process the input by splitting or conversion
        item = lane_items[0]
        config = shared.update_config_w_custom(config, item)
        split_size = config.get("distributed", {}).get("align_split_size",
                                                       config["algorithm"].get("align_split_size", None))
        if split_size is not None:
            return True
        if needs_fastq_conversion(item, config):
            return True
    return False
示例#5
0
def merge_sample(data):
    """Merge fastq and BAM files for multiple samples.
    """
    logger.info("Combining fastq and BAM files %s" % str(data["name"]))
    config = shared.update_config_w_custom(data["config"], data["info"])
    genome_build, sam_ref = shared.ref_genome_info(data["info"], config, data["dirs"])
    if config["algorithm"].get("upload_fastq", False):
        fastq1, fastq2 = combine_fastq_files(data["fastq_files"], data["dirs"]["work"],
                                             config)
    else:
        fastq1, fastq2 = None, None
    sort_bam = merge_bam_files(data["bam_files"], data["dirs"]["work"], config)
    return [[{"name": data["name"], "metadata": data["info"].get("metadata", {}),
              "info": data["info"],
              "genome_build": genome_build, "sam_ref": sam_ref,
              "work_bam": sort_bam, "fastq1": fastq1, "fastq2": fastq2,
              "dirs": data["dirs"], "config": config,
              "config_file": data["config_file"]}]]
示例#6
0
def _item_needs_compute(lanes):
    """Determine if any item needs computing resources to spin up a cluster.
    """
    for lane_items, _, _, _, config in lanes:
        # check if multiplexed
        if len(lane_items) > 1 or lane_items[0]["barcode_id"] is not None:
            return True
        # check if we need to process the input by splitting or conversion
        item = lane_items[0]
        config = shared.update_config_w_custom(config, item)
        split_size = config.get("distributed", {}).get(
            "align_split_size",
            config["algorithm"].get("align_split_size", None))
        if split_size is not None:
            return True
        if needs_fastq_conversion(item, config):
            return True
    return False