예제 #1
0
def readGenomesFromFile(multi_files_fname, genome_dir, genome_dicts, column_index=1, load_fxn=biofile.firstField, species=None, outstream=None):
	if outstream is None:
		outstream = util.OutStreams()
	# Format for
	species_map = {}
	for line in file(multi_files_fname,'r').readlines():
		if line[0] != '#' and not line.strip() == '':  # skip comments and blank lines
			flds = line.strip().split()
			#print flds, column_index
			species_map[flds[0]] = flds[column_index]
	if species is None:
		species = species_map.keys()
	else:
		assert set(species).intersection(set(species_map.keys())) == set(species), "Not all specified species found in mapping file"

	for spec in species:
		genome_file = os.path.join(os.path.expanduser(genome_dir), species_map[spec])
		if not os.path.isfile(genome_file):
			outstream.write("# Cannot find file %s\n" % genome_file)
		genome = biofile.readFASTADict(genome_file, load_fxn)
		genome_dicts[spec] = genome
		outstream.write("# species=%s, genome file=%s has %d entries, example ID=%s\n" % (spec, genome_file, len(genome.keys()), genome.keys()[0]))
	return species_map
예제 #2
0
    # Required arguments
    parser.add_argument(dest="orthodb_id", type=str, help="OrthoDB ID")
    # Optional arguments
    parser.add_argument("-o",
                        "--out",
                        dest="out_fname",
                        default=None,
                        help="output filename")
    parser.add_argument("--fasta-out",
                        dest="fasta_out_fname",
                        default=None,
                        help="output FASTA filename")
    #parser.add_argument("-o", "--out-dir", dest="out_dir", default='', help="output file directory")
    options = parser.parse_args()

    info_outs = util.OutStreams()
    #data_outs = util.OutStreams()
    fasta_outs = util.OutStreams()

    # Start up output
    if not options.out_fname is None:
        outf = file(options.out_fname, 'w')
        info_outs.addStream(outf)
    else:
        # By default, write to stdout
        info_outs.addStream(sys.stdout)
    if not options.fasta_out_fname is None:
        outf = file(options.fasta_out_fname, 'w')
        fasta_outs.addStream(outf)
    else:
        # By default, write to stdout
예제 #3
0
		pos = n-1
	return pval_list[pos]

if __name__=='__main__':
	parser = argparse.ArgumentParser(description="Compute P-values for entropy")
	# Required arguments
	parser.add_argument(dest="in_fname", help="input filename")
	parser.add_argument(dest="in_distribution_fname_pattern", help="input distribution filename pattern")
	# Optional arguments
	parser.add_argument("-a", "--adjust", dest="adjust", action='store_true', default=False, help="adjust P-values")
	parser.add_argument("-l", "--lower", dest="min_window_size", default=5, help="minimum window size")
	parser.add_argument("-u", "--upper", dest="max_window_size", default=300, help="maximum window size")
	parser.add_argument("-o", "--out", dest="out_fname", default=None, help="output filename")
	options = parser.parse_args()

	info_outs = util.OutStreams(sys.stdout)
	data_outs = util.OutStreams()

	# Start up output
	if not options.out_fname is None:
		outf = file(options.out_fname,'w')
		data_outs.addStream(outf)
	else:
		# By default, write to stdout
		data_outs.addStream(sys.stdout)

	# Write out parameters
	data_outs.write("# Run started {}\n".format(util.timestamp(timeformat="%a %b %d %H:%M:%S %Y")))
	data_outs.write("# Parameters:\n")
	optdict = vars(options)
	for (k,v) in optdict.items():
예제 #4
0
                        help="random-number generator seed")
    parser.add_argument("-p",
                        "--optimize",
                        dest="optimize",
                        action="store_true",
                        default=False,
                        help="optimize the codons?")
    options = parser.parse_args()

    # Fetch sequence
    seq = options.sequence

    random.seed(options.seed)

    # Start up output
    info_outs = util.OutStreams([sys.stdout])
    data_outs = util.OutStreams()
    multi_outs = util.OutStreams([info_outs, data_outs])
    if not options.out_fname is None:
        outf = file(options.out_fname, 'w')
        data_outs.addStream(outf)
    else:
        data_outs.addStream(sys.stdout)

    # Read sequences from a FASTA file?
    fname = os.path.expanduser(seq)
    if os.path.isfile(fname):
        (headers, seq_list) = biofile.readFASTA(fname)
        seqs = zip(headers, seq_list)
        info_outs.write("# Read {0:d} sequences from {1}\n".format(
            len(seqs), fname))
예제 #5
0
        action="store_true",
        default=False,
        help="generate short species IDs (e.g. S.cerevisiae) for each sequence?"
    )
    parser.add_argument("-o",
                        "--out",
                        dest="out_fname",
                        default=None,
                        help="output filename")
    parser.add_argument("--fasta-out",
                        dest="fasta_out_fname",
                        default=None,
                        help="FASTA output filename")
    options = parser.parse_args()

    info_outs = util.OutStreams(sys.stdout)
    data_outs = util.OutStreams()

    # Start up output
    if not options.out_fname is None:
        outf = open(options.out_fname, 'w')
        data_outs.addStream(outf)
    else:
        # By default, write to stdout
        data_outs.addStream(sys.stdout)

    # Write out parameters
    data_outs.write("# Run started {}\n".format(util.timestamp()))
    data_outs.write("# Command: {}\n".format(' '.join(sys.argv)))
    data_outs.write("# Parameters:\n")
    optdict = vars(options)
예제 #6
0
		if prot_align[i] == '-':
			out_align += "-"
		else:
			out_align += prot[j]
			j += 1
	return out_align

if __name__=='__main__':
	parser = argparse.ArgumentParser(description="Muscle alignment")
	parser.add_argument("in_fname", help="input filename")
	parser.add_argument("-p", "--path", dest="muscle_path", default=const_default_muscle_exepath, help="path to Muscle binary")
	parser.add_argument("-t", "--translate", dest="translate", action="store_true", default=False, help="translate the input sequences?")
	parser.add_argument("-o", "--out", dest="out_fname", default=None, help="output filename")
	options = parser.parse_args()
	
	outs = util.OutStreams()
	if not options.out_fname is None:
		fname = os.path.expanduser(options.out_fname)
		#print fname
		outf = open(fname,'w')
		outs.addStream(outf)
	else:
		outs.addStream(sys.stdout)
	
	(headers, seqs) = biofile.readFASTA(open(options.in_fname,'r'))
	seqs_to_align = seqs
	if options.translate:
		seqs_to_align = [translate.translate(s) for s in seqs]
	alseqs = alignSequences(seqs_to_align, exepath=options.muscle_path)
	#print alseqs
	if options.translate:
예제 #7
0
                        dest="normalize_ratio_by_orf",
                        default=None,
                        help="ORF to use for normalization across runs")
    parser.add_argument(
        "--ratio-sig",
        dest="ratio_significance_field",
        default="ratio_hl_normalized",
        help="field to use for ratio significance calculations")
    parser.add_argument("--abundance",
                        dest="abundance_field",
                        default="intensity",
                        help="field to use for abundance calculations")
    options = parser.parse_args()

    # Set up some output
    info_outs = util.OutStreams(sys.stdout)

    orf_dict = None
    if not options.database_fname is None:
        orf_dict = biofile.readFASTADict(options.database_fname)

    evidences = []
    if not options.in_fname is None:
        #print "# Loading..."
        # Read more experiments from master file
        inf = file(os.path.expanduser(options.in_fname), 'r')
        dlr = util.DelimitedLineReader(inf, header=True)
        while not dlr.atEnd():
            flds = dlr.nextDict()
            if os.path.isfile(os.path.expanduser(flds['filename'])):
                ed = mq.EvidenceDescriptor()
예제 #8
0
                        action='store_true',
                        default=False,
                        help="execute debugging code?")
    parser.add_argument("-o",
                        "--out",
                        dest="out_fname",
                        default=None,
                        help="output filename")
    parser.add_argument("--isolate-out",
                        dest="isolate_out_fname",
                        default=None,
                        help="output filename")

    options = parser.parse_args()

    info_outs = util.OutStreams(sys.stdout)
    data_outs = util.OutStreams()
    isolate_outs = util.OutStreams()
    params_outs = util.OutStreams([data_outs])
    motif_outs = None

    # Start up output
    if not options.out_fname is None:
        outf = file(options.out_fname, 'w')
        data_outs.addStream(outf)
    else:
        # By default, write to stdout
        if options.isolate_out_fname is None:
            data_outs.addStream(sys.stdout)
    if not options.isolate_out_fname is None:
        isolate_outf = file(options.isolate_out_fname, 'w')
예제 #9
0
    # Optional arguments
    parser.add_argument(
        "--resolution-ml",
        dest="resolution_ml",
        default=0.005,
        help=
        "output resolution, in mL: average output over elution-volume ranges no larger than this number"
    )
    parser.add_argument("-o",
                        "--out",
                        dest="out_fname",
                        default=None,
                        help="output filename")
    options = parser.parse_args()

    info_outs = util.OutStreams(sys.stdout)
    data_outs = util.OutStreams()

    # Start up output
    if not options.out_fname is None:
        outf = open(options.out_fname, 'w')
        data_outs.addStream(outf)
    else:
        # By default, write to stdout
        data_outs.addStream(sys.stdout)

    # Create the instance using default options
    if os.path.isdir(options.in_fname):
        basedir = options.in_fname
        for in_fname in os.listdir(options.in_fname):
            full_fname = os.path.join(basedir, in_fname)