def _para(self, *paraArgs): """ssh to the remote machine and run the para command. paraArgs are passed as arguments to the para command. Returns stdout as a list of lines, stderr in ProcException if the remote program encouners an error. There is a possibility for quoting hell here.""" remCmd = "cd " + self.paraDir + "; para " + " ".join(paraArgs) fileOps.prLine(sys.stderr, "ssh ", self.paraHost, " ", remCmd) return procOps.callProcLines(["ssh", "-o", "ClearAllForwardings=yes", self.paraHost, remCmd])
def build_attributes(database, name, out_dir): header = '\t'.join(['GeneId', 'GeneName', 'GeneType', 'TranscriptId', 'TranscriptType']) + '\n' source_cmd = ['hgsql', '-Ne', 'select * from ensemblSource', database] source = dict(x.split() for x in callProcLines(source_cmd)) genes_cmd = ['hgsql', '-Ne', 'select * from ensemblToGeneName', database] genes = dict(x.split() for x in callProcLines(genes_cmd)) transcripts_cmd = ['hgsql', '-Ne', 'select transcript, gene from ensGtp', database] transcripts = dict(x.split() for x in callProcLines(transcripts_cmd)) r = [] for transcript_id, gene_id in transcripts.iteritems(): gene_name = genes.get(transcript_id, 'NoName') biotype = source[transcript_id] r.append([gene_id, gene_name, biotype, transcript_id, biotype]) with open(os.path.join(out_dir, name + '.tsv'), 'w') as outf: outf.write(header) for x in sorted(r, key=lambda x: x[0]): outf.write('\t'.join(x) + '\n')
def extract_newick_genomes_cactus(hal): """ Parse the cactus config file, extracting just the newick tree """ cmd = ['halStats', '--tree', hal] newick = callProcLines(cmd)[0] t = ete3.Tree(newick, format=1) genomes = tuple(t.get_leaf_names()) return newick, genomes
def _para(self, *paraArgs): """ssh to the remote machine and run the para command. paraArgs are passed as arguments to the para command. Returns stdout as a list of lines, stderr in ProcException if the remote program encouners an error. There is a possibility for quoting hell here.""" remCmd = "cd " + self.paraDir + "; para " + " ".join(paraArgs) fileOps.prLine(sys.stderr, "ssh ", self.paraHost, " ", remCmd) return procOps.callProcLines( ["ssh", "-o", "ClearAllForwardings=yes", self.paraHost, remCmd])
def testCallLines(self): out = procOps.callProcLines(["sort", self.getInputFile("simple1.txt")]) self.assertEqual(out, ['five', 'four', 'one', 'six', 'three', 'two'])
def testCallLines(self): out = procOps.callProcLines(["sort", self.getInputFile("simple1.txt")]) self.failUnlessEqual(out, ['five', 'four', 'one', 'six', 'three', 'two'])
def halStats(args): "Call halStats with the specified arguments, returns output as a list of lines" return list(procOps.callProcLines(["halStats"] + args))
frozenset(["Notch2NL-D", "Notch2NL-C"]): [[15867, 74916]], frozenset(): [[81069, 162368], [165397, 2000000]]} f = Fasta("stitched_alignment.fa") results = {} for exclude in [frozenset(), frozenset(["Notch2NL-D"]), frozenset(["Notch2NL-D", "Notch2NL-C"])]: t = open("tmp.fasta", "w") for para in sorted(set(f.keys()) - exclude): t.write(">{}\n{}\n".format(para, f[para])) t.close() n = '_'.join(sorted(exclude)) if len(exclude) > 0 else 'all' cmd = ['java', '-jar', '/cluster/home/ifiddes/jvarkit/dist-1.133/biostar94573.jar', '-R', n, 'tmp.fasta'] r = callProcLines(cmd) recs = [x.split() for x in r if not x.startswith("#")] results[exclude] = recs raw_recs = [] for exclude, region in regions.iteritems(): for start, stop in region: raw_recs.extend([x for x in results[exclude] if start < int(x[1]) <= stop]) # region with poor alignment exclude_regions = [[28574, 31093]] exclude_regions = [ChromosomeInterval('a', x[0], x[1], '.') for x in exclude_regions] recs = [] for r in raw_recs:
def map_to_ref(fwd_filtered, chain): rev_unfiltered = tmpFileGet() cmd = ['pslMap', '-swapMap', '-chainMapFile', fwd_filtered, chain, rev_unfiltered] runProc(cmd) cmd = ['pslCDnaFilter', '-localNearBest=0.05', '-filterWeirdOverlapped', '-decayMinCover', rev_unfiltered, '/dev/stdout'] return callProcLines(cmd)