Exemplo n.º 1
0
def separatePairsAndSingles(cf):
	"""In an array of runs, separate the paired end reads into two separate files, and 
		the singles into a third."""
	fastqfiles = get_array(cf, 'fastqfiles')
	srafetchxml = cf.get_input('srafetchxml')
	leftfh = open(cf.get_output('left'), 'w')
	rightfh = open(cf.get_output('right'), 'w')
	singlefh = open(cf.get_output('single'), 'w')
	for accession, fastqfile in fastqfiles:
		fqp = FastqParser()
		fastqfh = open(fastqfile, 'U')
		if isPaired(srafetchxml, accession):
			iter = fqp.parse(fastqfh)
			#paired end run
			while True:
				try:
					pe1 = iter.next()
					pe2 = iter.next()
					leftfh.write(str(pe1) + '\n')
					rightfh.write(str(pe2) + '\n')
				except StopIteration:
					break
		else:
			#single end run
			for rec in fqp.parse(fastqfh):
				singlefh.write(str(rec) + '\n')
	leftfh.close()
	rightfh.close()
	singlefh.close()
	return constants.OK
Exemplo n.º 2
0
def getOverRepClusters(cf):
	"""Identify over represented clusters in a fastqfile and write the cluster seed 
		to a file."""
	fastqfile = cf.get_input('fastqfile')
	resultsuc = cf.get_input('resultsuc')
	resultsfa = cf.get_input('resultsfa')
	percRep = cf.get_parameter('percRep', 'float')
	output = cf.get_output('resultsfa')
	totalSeqs = 0
	fqp = FastqParser()
	for rec in fqp.parse(open(fastqfile, 'rb')):
		totalSeqs += 1
	clusterCounts = {}
	reader = csv.reader(open(resultsuc, 'rb'), quoting=csv.QUOTE_NONE, delimiter='\t')
	for row in reader:
		if row[0] == 'H':
			if not clusterCounts.has_key(row[-1]):
				clusterCounts[row[-1]] = 0
			clusterCounts[row[-1]] += 1
	outfh = open(output, 'wb')
	for rec in fasta_itr(resultsfa):
		if not clusterCounts.has_key(rec.header):
			continue
		clusterRep = (float(clusterCounts[rec.header]) / float(totalSeqs)) * 100
		if clusterRep >= percRep:
			outfh.write(str(rec) + '\n')
	outfh.close()
	return constants.OK
Exemplo n.º 3
0
def getNamesFromFastQ(cf):
    """Write the headers of a fastqfile to an outputfile."""
    outfh = open(cf.get_output("namelist"), "w")
    fqp = FastqParser()
    for rec in fqp.parse(open(cf.get_input("fastqfile"), "U")):
        outfh.write("%s\n" % rec.header)
    outfh.close()
    return constants.OK
Exemplo n.º 4
0
def fastq_merge(cf):
	"""Merge an array of fastqfiles."""
	outfh = open(cf.get_output('output'), 'w')
	fastqfiles = get_array(cf, 'in_array')
	cf.write_log(str(fastqfiles))
	fqp = FastqParser()
	for key, fastqfile in fastqfiles:
		for rec in fqp.parse(open(fastqfile, 'U')):
			outfh.write(str(rec) + '\n')
	outfh.close()
	return constants.OK
Exemplo n.º 5
0
	inputfile = cf.get_input('srafile')
	srafetchxml = cf.get_input('srafetchxml')
	accession = cf.get_parameter('accession', 'string')
	outputfile = cf.get_output('fastqfile')
	params = []
	if isPaired(srafetchxml, accession):
		cf.write_log('Run is paired end.')
		params.append('--split-spot')
	outfh = open(outputfile + '.tmp', 'wb')
	try:
		params.append('-Z')
		params.append(inputfile)
		subprocess.check_call(['fastq-dump'] + params, stdout=outfh)
	except subprocess.CalledProcessError, e:
		cf.write_log("Error running fastq-dump.")
		cf.write_log("Error: %s" % str(e))
		return constants.GENERIC_ERROR
	finally:
		outfh.close()
	#format the fastq headers
	outfh = open(outputfile, 'wb')
	fqp = FastqParser()
	for rec in fqp.parse(open(outputfile + '.tmp', 'U')):
		rec.header = rec.header.split(' ')[1]
		outfh.write(str(rec) + '\n')
	outfh.close()
	return constants.OK
anduril.main(sra2fastq)