def testSysCall(self): cmd = f"wc -l {__file__}" wc = libgwas.sys_call(cmd) # I can see that there are more than 25 lines in the file self.assertTrue(int(wc.split()[0]) > 25) wc = libgwas.sys_call("wc -l non-existent-filename") self.assertEqual(None, wc)
def split_chrom_jobs(args, chrom_file): """Split up GWAS jobs based on portions of a chromosome :param args: arguments from parseargs :param chrom_file: marker info file :return dictionary name=>job_details """ max_snp_count = args.snps_per_job poscol = 3 if args.map3: poscol = 2 job_list = {} cur = None last_pos = None job_string = "" job_name = "" for line in sys_call("cut -f 1,%d %s" % (poscol, chrom_file)): pos = -1 values = line.split() if len(values) > 0: chrom, pos = [int(x) for x in values] if cur is None: # First line observed cur = chrom job_string = "--chr %d --from-bp %d" % (chrom, pos) job_name = "Chr%d_%d-" % (chrom, pos) snp_count = 0 elif cur != cur: # Changed chromosome job_string += " --to-bp %d" % (last_pos) job_name += str(last_pos) job_list[job_name] = job_string cur = chrom job_string = "--chr %d --from-bp %d" % (chrom, pos) job_name = "Chr%d_%d-" % (chrom, pos) snp_count = 0 # create new job based on snp count elif snp_count < max_snp_count: snp_count += 1 else: job_string += " --to-bp %d" % (last_pos) job_name += str(last_pos) job_list[job_name] = job_string job_string = "--chr %d --from-bp" % (chrom, pos) job_name = "Chr%d_%d-" % (chrom, pos) snp_count = 0 last_pos = pos if job_string != "": job_string += " --to-bp %d" % (last_pos) job_name += str(last_pos) job_list[job_name] = job_string return job_list
def split_impute_jobs(args, filename): """Parse the IMPUTE file and generate the list of jobs. :param args: parsearg object containing command line arguments :filename args: file containing the IMPUTE gen files and chromosome numbers """ max_snp_count = args.snps_per_job if args.impute_count: impute_count = args.impute_count else: impute_count = 1 ExitIf("mvmany doesn't support splitting IMPUTE jobs into pieces at this time", max_snp_count > 1) job_list = {} gen_files = [] for line in open(filename): gen_files.append(".".join(line.strip().split()[-1].split("/")[-1].split(".")[0:-1])) file_count = len(gen_files) job_count = int(math.ceil(float(file_count) / impute_count)) for job_num in range(job_count): job_idx = job_num * impute_count + 1 job_string = "--impute-offset %d --impute-count %d" % (job_idx, impute_count) job_name = "job%04d-%s" % (job_num+1, gen_files[job_idx -1]) job_list[job_name] = job_string print job_string return job_list # For now, let's not deal with the complexity of splitting chromosomes in IMPUTE poscol = 2 cur = None last_pos = None job_string = "" job_name = "" file_index = 0 for line in open(filename): chr, genfile = line.strip().split() if max_snp_count > 0: locus_index = 0 last_pos = 1 for locus in open(genfile): if locus_index >= max_snp_count - 1: rsid, pos = locus.split()[1:2] job_name = "chr%d_%d" % (chr, last_pos) job_string = "--chr %s --from-bp %d --to-bp %d" % (chr, last_pos, pos) last_pos = pos + 1 job_list[job_name] = job_string locus_index = 0 if cur is None: cur = pos for line in sys_call("cut -f 1,%d %s" % (poscol, chrom_file)): chrom, pos = [int(x) for x in line.split()] if cur is None: # First line observed cur = chrom job_string = "--chr %d --from-bp %d" % (chrom, pos) job_name = "Chr%d_%d-" % (chrom, pos) snp_count = 0 elif cur != cur: # Changed chromosome job_string += " --to-bp %d" % (last_pos) job_name += str(last_pos) job_list[job_name] = job_string cur = chrom job_string = "--chr %d --from-bp %d" % (chrom, pos) job_name = "Chr%d_%d-" % (chrom, pos) snp_count = 0 # create new job based on snp count elif snp_count < max_snp_count: snp_count += 1 else: job_string += " --to-bp %d" % (last_pos) job_name += str(last_pos) job_list[job_name] = job_string job_string = "--chr %d --from-bp" % (chrom, pos) job_name = "Chr%d_%d-" % (chrom, pos) snp_count = 0 last_pos = pos if job_string != "": job_string += " --to-bp %d" % (last_pos) job_name += str(last_pos) job_list[job_name] = job_string return job_list
def split_impute_jobs(args, filename): """Parse the IMPUTE file and generate the list of jobs. :param args: parsearg object containing command line arguments :filename args: file containing the IMPUTE gen files and chromosome numbers """ max_snp_count = args.snps_per_job if args.impute_count: impute_count = args.impute_count else: impute_count = 1 ExitIf( "mvmany doesn't support splitting IMPUTE jobs into pieces at this time", max_snp_count > 1) job_list = {} gen_files = [] for line in open(filename): gen_files.append(".".join( line.strip().split()[-1].split("/")[-1].split(".")[0:-1])) file_count = len(gen_files) job_count = int(math.ceil(float(file_count) / impute_count)) for job_num in range(job_count): job_idx = job_num * impute_count + 1 job_string = "--impute-offset %d --impute-count %d" % (job_idx, impute_count) job_name = "job%04d-%s" % (job_num + 1, gen_files[job_idx - 1]) job_list[job_name] = job_string print job_string return job_list # For now, let's not deal with the complexity of splitting chromosomes in IMPUTE poscol = 2 cur = None last_pos = None job_string = "" job_name = "" file_index = 0 for line in open(filename): chr, genfile = line.strip().split() if max_snp_count > 0: locus_index = 0 last_pos = 1 for locus in open(genfile): if locus_index >= max_snp_count - 1: rsid, pos = locus.split()[1:2] job_name = "chr%d_%d" % (chr, last_pos) job_string = "--chr %s --from-bp %d --to-bp %d" % ( chr, last_pos, pos) last_pos = pos + 1 job_list[job_name] = job_string locus_index = 0 if cur is None: cur = pos for line in sys_call("cut -f 1,%d %s" % (poscol, chrom_file)): chrom, pos = [int(x) for x in line.split()] if cur is None: # First line observed cur = chrom job_string = "--chr %d --from-bp %d" % (chrom, pos) job_name = "Chr%d_%d-" % (chrom, pos) snp_count = 0 elif cur != cur: # Changed chromosome job_string += " --to-bp %d" % (last_pos) job_name += str(last_pos) job_list[job_name] = job_string cur = chrom job_string = "--chr %d --from-bp %d" % (chrom, pos) job_name = "Chr%d_%d-" % (chrom, pos) snp_count = 0 # create new job based on snp count elif snp_count < max_snp_count: snp_count += 1 else: job_string += " --to-bp %d" % (last_pos) job_name += str(last_pos) job_list[job_name] = job_string job_string = "--chr %d --from-bp" % (chrom, pos) job_name = "Chr%d_%d-" % (chrom, pos) snp_count = 0 last_pos = pos if job_string != "": job_string += " --to-bp %d" % (last_pos) job_name += str(last_pos) job_list[job_name] = job_string return job_list