import geneTools import argparse import matplotlib.pyplot as plt import random import sys #look for the amount of noise by checking coverage values at OFFSETnt upstream from gene stops parser = argparse.ArgumentParser(description="parser for a gene and a coverage file") parser.add_argument("GENES_FILE", help="path to gene file you want to use") parser.add_argument("POSITIVE_COVERAGE_FILE", help="path to coverage file you want to use") args = vars(parser.parse_args()) gene_lines, gene_filetype = geneTools.readORFLines(args['GENES_FILE']) #select a certain number of random genes NUM_GENES = 500 gene_lines_rand = [] for _ in range(NUM_GENES): line_data = geneTools.getLineData(gene_lines[random.randint(0, len(gene_lines) - 1)], gene_filetype) if line_data not in gene_lines_rand: gene_lines_rand.append(line_data) gene_lines_rand.sort() pos_lines, neg_lines = geneTools.readCoverageLines(args['POSITIVE_COVERAGE_FILE']) ind_pos = 0 ind_neg = 0 x_pos = int(pos_lines[ind_pos].split('\t')[1]) x_neg = int(neg_lines[ind_neg].split('\t')[1])
import geneTools parser = argparse.ArgumentParser( description= "parser for removing one set of (verified) genes from another set of genes" ) parser.add_argument("VERIFIED_GENES_FILE", help="path to file containing verified genes") parser.add_argument("GENES_FILE", help="path to file containing another set of genes") parser.add_argument("OUT_FILE", help="path to output file you would like to create") args = vars(parser.parse_args()) v_lines, v_filetype = geneTools.readORFLines(args["VERIFIED_GENES_FILE"]) lines, filetype = geneTools.readORFLines(args["GENES_FILE"]) toRemove = [] for v_line in v_lines: v_start, v_end, v_strand = geneTools.getLineData(v_line, v_filetype) for line in lines: start, end, strand = geneTools.getLineData(line, filetype) if v_start == start and v_end == end and v_strand == strand: toRemove.append((start, end, strand)) #have list of (start, end, strand) tuples to remove #now go through genes file and add all lines to ouput file unless it is in toRemove with open(args["OUT_FILE"], 'w') as of: for line in lines:
import geneTools import argparse import matplotlib.pyplot as plt import random import sys #look for the amount of noise by checking coverage values at OFFSETnt upstream from gene stops parser = argparse.ArgumentParser(description="parser for a gene and a coverage file") parser.add_argument("GENES_FILE", help="path to gene file you want to use") parser.add_argument("POSITIVE_COVERAGE_FILE", help="path to coverage file you want to use") args = vars(parser.parse_args()) gene_lines, gene_filetype = geneTools.readORFLines(args['GENES_FILE']) #select a certain number of random genes NUM_GENES = 500 DIRECT_ONLY = True SAVE_GENES = False #enforce that no gene on either strand starts within ZONE_LENGTH nt of the stop codon gene_lines_rand = [] ZONE_LENGTH = 50 attempts = 0 index = 0 while len(gene_lines_rand) < NUM_GENES: attempts += 1 index = random.randint(0, len(gene_lines) - 1) line1 = geneTools.getLineData(gene_lines[index], gene_filetype) if line1[2] == "-": if DIRECT_ONLY:
import argparse import geneTools parser = argparse.ArgumentParser(description="parser for removing one set of (verified) genes from another set of genes") parser.add_argument("VERIFIED_GENES_FILE", help="path to file containing verified genes") parser.add_argument("GENES_FILE", help="path to file containing another set of genes") parser.add_argument("OUT_FILE", help="path to output file you would like to create") args = vars(parser.parse_args()) v_lines, v_filetype = geneTools.readORFLines(args["VERIFIED_GENES_FILE"]) lines, filetype = geneTools.readORFLines(args["GENES_FILE"]) toRemove = [] for v_line in v_lines: v_start, v_end, v_strand = geneTools.getLineData(v_line, v_filetype) for line in lines: start, end, strand = geneTools.getLineData(line, filetype) if v_start == start and v_end == end and v_strand == strand: toRemove.append((start, end, strand)) #have list of (start, end, strand) tuples to remove #now go through genes file and add all lines to ouput file unless it is in toRemove with open(args["OUT_FILE"], 'w') as of: for line in lines: start, end, strand = geneTools.getLineData(line, filetype) if (start, end, strand) not in toRemove: of.write(line + '\n')