def main(config_file): with open(config_file) as in_handle: config = yaml.load(in_handle) ref_index = novoalign.refindex(config["ref"], kmer_size=13, step_size=1) create_dirs(config) for cur in config["input"]: in_fastq = cur["fastq"] if cur.get("old_style_barcodes", False): in_fastq = convert_illumina_oldstyle(in_fastq) bc_files = demultiplex(in_fastq, cur["barcodes"], config["dir"]["tmp"], config) with cpmap(config["algorithm"]["cores"]) as cur_map: for _ in cur_map(process_fastq, ((bc_file, ref_index, cur, config, config_file) for bc_file in bc_files)): pass
def fastq_to_process(config): """Retrieve fastq files to process, handling demultiplexing. """ for cur in config.get("experiments", config.get("input", [])): ref_index = _index_ref_genome(config, cur) in_fastq = cur.get("fastq", None) if cur.get("old_style_barcodes", False): in_fastq = convert_illumina_oldstyle(in_fastq) if cur.get("barcodes", None): bc_files = demultiplex(in_fastq, [(b["name"], b["seq"]) for b in cur["barcodes"]], config["dir"]["tmp"], config) fastqs = _assign_bc_files(cur, bc_files) else: cur["program"] = config["program"] if not cur.has_key("algorithm"): cur["algorithm"] = config["algorithm"] fastqs = [cur] for fq in fastqs: yield fq, ref_index