def _call_hmmer(hmm, inputproteins): inputproteins = list(inputproteins) scores = {} for ip in inputproteins: scores[ip.id] = 0 with ntf(prefix="/dev/shm/") as inputfasta: with ntf(prefix="/dev/shm/") as hmmoutput: SeqIO.write(inputproteins, inputfasta.name, 'fasta') hmmfile = os.path.join(hmm_location, hmm + '.hmm') sp.call(['hmmsearch', '-o', hmmoutput.name, hmmfile, inputfasta.name]) hmmoutput.flush() hmmoutput.seek(0) QRS = SearchIO.parse(hmmoutput, format="hmmer3-text") for qr in QRS: # there's *always* a QR, even though it's usually empty. # qr.sort() # I'm kind of hoping this sorts by hit strength. # worth checking. I guess it doesn't matter anyway. for hit in qr: scores[hit.id] = max(scores[hit.id], hit.bitscore) for hsp in hit.hsps: def appropriate_hyphens(m): return '-' * len(m.group(0)) if len(hsp.hit.seq) > 100: hitseq = re.sub('PPPPP+', appropriate_hyphens, str(hsp.hit.seq)) hitseq = hitseq.translate(None,'-*').upper() yield hit.id, hsp.bitscore, hitseq
def call_prodigal(fastafile): """Invokes prodigal on a provided fasta file, returns the SeqRecord produced by -a. Everything is done in temporary files kept on virtual filesystem.""" # check if file exists blah blah with ntf(prefix='/dev/shm/', delete=True, suffix='.prot') as protfile, ntf(prefix='/dev/shm/', delete=True, suffix='.out') as prod: sp.call(['prodigal', '-i', fastafile, '-a', protfile.name, '-o', prod.name, '-q']) # you can't close over temporary files, so the .parse generator can't generate once this returns # hence list. sucks to be you memory return list(SeqIO.parse(protfile.name, 'fasta'))
def diff(a, b): af = ntf(delete=False) af.write(a) af.close() bf = ntf(delete=False) bf.write(b) bf.close() o, e = execute(['diff', af.name, bf.name]) os.unlink(af.name) os.unlink(bf.name) return o
def print_recfil(self,dir = '.'): if self.file is not None: self.close() self.file = ntf(mode = 'w', suffix = '.rec', dir='.', delete = False) # self.file = open(tmpr.name,'w') # print(self.file.name) rec_tot = [] for use in (list(self.what.values())+list(self.when.values())): # print(use) if use == True: rec_tot.append('0100') else: rec_tot.append('0000') rec_tot+=self.rec_base str_tot = '' i = 1 import codecs for thing in rec_tot: print(codecs.decode(thing,'hex')) self.file.write(str(codecs.decode(thing,'hex'))) # str_tot += thing+('\n' if i%8 == 0 else' ') # i += 1 # # print(str_tot) # from codecs import encode # return(str_tot.decode('hex')) # self.file.write(encode(str_tot,'hex')) self.file.close()
def test_add_metadata(self): msg = "Hi there" f = ntf(bufsize=0, delete=False) f.write(msg) f.close() d = os.path.join(self.home_path, os.path.basename(f.name)) self.ios.put_object(f.name, dest_path=d) os.unlink(f.name) # avu has to be >=2 avu = ('key1',) with self.assertRaises(AssertionError): self.ios.add_object_metadata(d, avu) # first avu in avu = ('key1', 'value1', 'units1') self.ios.add_object_metadata(d, avu) obj = self.ios.sess.data_objects.get(d) obj_metadata = [] for i in obj.metadata.items(): obj_metadata.append((i.name, i.value, i.units)) self.assertIn(avu, obj_metadata) # second identical avu has to be rejected with self.assertRaises(RuntimeError): self.ios.add_object_metadata(d, avu) self.ios.sess.data_objects.unlink(d)
def atomic_writer(func, name, mode="wb", *args, **kwargs): """ Make sure the data is written atomically. """ # Xor of the two conditions if "w" not in mode: raise ValueError("Write mode not selected: mode = '%s'" % mode) # Get the filename parts fname = os.path.abspath(name) prefix = os.path.basename(fname) + "-" dirname = os.path.dirname(fname) # Create the empty temporary file tobj = ntf(prefix=prefix, suffix=TMP_SUFFIX, dir=dirname, delete=False) tname = tobj.name tobj.close() # Reopen the file with proper func with func(tname, mode, *args, **kwargs) as fobj: yield fobj fobj.flush() os.fsync(fobj.fileno()) # Now the atomic switch os.rename(tname, fname)
def execute_shellscript(self, script, preamble=preamble, postamble=postamble, **kwargs): source = self.source output = self.output config = self.config logdir = output.logdir prefix = source.doctype.__name__ + '-' s = script.format(output=output, source=source, config=config) tf = ntf(dir=logdir, prefix=prefix, suffix='.sh', delete=False) tf.close() with codecs.open(tf.name, 'w', encoding='utf-8') as f: if preamble: f.write(preamble) f.write(s) if postamble: f.write(postamble) mode = stat.S_IXUSR | stat.S_IRUSR | stat.S_IWUSR os.chmod(tf.name, mode) cmd = [tf.name] result = execute(cmd, logdir=logdir) if result != 0: with codecs.open(tf.name, encoding='utf-8') as f: for line in f: logger.info("Script: %s", line.rstrip()) return False return True
def test_add_metadata(self): msg = "Hi there" f = ntf(bufsize=0, delete=False) f.write(msg) f.close() d = os.path.join(self.home_path, os.path.basename(f.name)) self.ios.put_object(f.name, dest_path=d) os.unlink(f.name) # avu has to be >=2 avu = ('key1', ) with self.assertRaises(AssertionError): self.ios.add_object_metadata(d, avu) # first avu in avu = ('key1', 'value1', 'units1') self.ios.add_object_metadata(d, avu) obj = self.ios.sess.data_objects.get(d) obj_metadata = [] for i in obj.metadata.items(): obj_metadata.append((i.name, i.value, i.units)) self.assertIn(avu, obj_metadata) # second identical avu has to be rejected with self.assertRaises(RuntimeError): self.ios.add_object_metadata(d, avu) self.ios.sess.data_objects.unlink(d)
def genericGuessTest(content, ext): tf = ntf(prefix='tldp-guesser-test-', suffix=ext, delete=False) tf.close() with codecs.open(tf.name, 'w', encoding='utf-8') as f: f.write(content) dt = guess(tf.name) os.unlink(tf.name) return dt
def writeconfig(self, case): tf = ntf(prefix=case.tag, suffix='.cfg', dir=self.tempdir, delete=False) tf.close() with codecs.open(tf.name, 'w', encoding='utf-8') as f: f.write(case.cfg) case.configfile = tf.name
def _process_semrep(self, resolved_text): cmd = '/opt/public_semrep/bin/' cmd += 'semrep.v1.8 -L 2018 -Z 2018AA -X {in_} {out}' with ntf(mode='w') as in_, ntf('r+') as out: in_.write(resolved_text) in_.seek(0) cmd = cmd.format(in_=in_.name, out=out.name) subprocess.run(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=True) foo = '</SemRepAnnotation>' self._raw_processed = out.read() self._raw_processed += (foo if foo not in self._raw_processed else '') return
def test_arg_isreadablefile(self): f = ntf(prefix='readable-file') self.assertEqual(f.name, arg_isreadablefile(f.name)) mode = os.stat(f.name).st_mode os.chmod(f.name, 0) if 0 == os.getuid(): self.assertEqual(f.name, arg_isreadablefile(f.name)) else: self.assertIsNone(arg_isreadablefile(f.name)) os.chmod(f.name, mode)
def test_isreadablefile(self): f = ntf(prefix='readable-file') self.assertTrue(isreadablefile(f.name)) mode = os.stat(f.name).st_mode os.chmod(f.name, 0) if 0 == os.getuid(): self.assertTrue(isreadablefile(f.name)) else: self.assertFalse(isreadablefile(f.name)) os.chmod(f.name, mode)
def test_fq_executable(self): f = ntf(prefix='tldp-which-test', delete=False) f.close() notfound = which(f.name) self.assertIsNone(notfound) mode = stat.S_IRWXU | stat.S_IRGRP | stat.S_IROTH os.chmod(f.name, mode) found = which(f.name) self.assertEqual(f.name, found) os.unlink(f.name)
def get_auth(): '''Return a temporary file containing the user's name and password.''' user = get_input(msg="VPN username?") echo = ask_echo() pwrd = get_input(echo=echo, msg="VPN password?") temp = ntf(mode="w") temp.writelines([user, "\n", pwrd, "\n"]) temp.flush() return temp
def test_show_doctypes(self): tf = ntf(dir=self.tempdir, prefix='doctypes-', delete=False) tf.close() with codecs.open(tf.name, 'w', encoding='utf-8') as f: result = tldp.driver.show_doctypes(Namespace(), file=f) self.assertEqual(result, os.EX_OK) with codecs.open(f.name, encoding='utf-8') as x: stdout = x.read() for doctype in knowndoctypes: self.assertTrue(doctype.formatname in stdout)
def create_ss_fasta(sequence, topic='seq'): fasta_file = ntf(prefix='blast', delete=False, mode='w') fasta_file.write('> {}\n'.format(topic)) lines = [ sequence[i:i + FASTA_MAX_LENGTH] for i in range(0, len(sequence), FASTA_MAX_LENGTH) ] for line in lines: fasta_file.write('{}\n'.format(line)) fasta_file.close() return fasta_file.name
def test_statfile_exception(self): f = ntf(dir=self.tempdir) omode = os.stat(self.tempdir).st_mode os.chmod(self.tempdir, 0) if 0 != os.getuid(): with self.assertRaises(Exception) as ecm: statfile(f.name) e = ecm.exception self.assertIn(e.errno, (errno.EPERM, errno.EACCES)) os.chmod(self.tempdir, omode) stbuf = statfile(f.name) self.assertIsInstance(stbuf, posix.stat_result)
def annotate(record, preserve_anno=False): record = deepcopy(list(record)) # getting problems with annotated lists squashing each other's features # could parallelize but won't help much with ntf(prefix='/dev/shm/', delete=True, suffix=".fna") as fastafile: # prodigal can handle the genbank files # but in order to generate pseudofastas with accessions instead of species names # it has to use fasta. # to avoid excessive disk sadness, /dev/shm files are kept on RAM SeqIO.write(record, fastafile.name, 'fasta') gene_calls = parse_prodigal(call_prodigal(fastafile.name)) records = merge_features(record, gene_calls, preserve_anno) return records
def test_put_a_path_that_does_not_exists(self): msg = "Hi there" f = ntf(bufsize=0, delete=False) f.write(msg) f.close() dp1 = os.path.join(self.home_path, os.path.basename(f.name)) self.ios.put_object(f.name, dest_path=dp1) os.unlink(f.name) obj = self.ios.sess.data_objects.get(dp1) with obj.open('r+') as lf: msg_retrieved = lf.readlines()[0] self.ios.sess.data_objects.unlink(dp1) self.assertEqual(msg, msg_retrieved)
def test_get_fna(self): newcluster = Cluster(gene_list=list(self.contig.genes()), classification="testclass") newcluster.save() try: records = newcluster.fna() from tempfile import NamedTemporaryFile as ntf with ntf(mode='w', delete=True) as tempfile: SeqIO.write(records, tempfile.name, 'fasta') roundtrip = SeqIO.parse(tempfile.name, 'fasta') roundtrip = list(roundtrip)[0] self.assertEqual(len(roundtrip), len(records)) self.assertEqual(roundtrip.id, records.id) finally: newcluster.delete()
def demo( dot: 'A Graphviz dot document', request, response, algorithm: hug.types.one_of([ 'dot', 'neato', 'twopi', 'circo', 'fdp', 'sfdp', 'patchwork', 'osage' ]) = 'dot', ): suffix = request.path.split(".")[-1] # Enforce unicode strings try: dot = dot.decode("utf-8") except AttributeError: pass with ntf(suffix=".dot", mode="w") as f_dot, ntf(mode="r+b") as f_out, ntf( mode="r") as f_err: f_dot.write(dot) f_dot.flush() cmd = [algorithm, '-T', suffix, f_dot.name, '-o', f_out.name] proc = subprocess.Popen(cmd, stdout=f_err, stderr=subprocess.STDOUT) ret = proc.wait() if ret != 0: response.status = hug.HTTP_500 f_err.seek(0) return {"status_code": ret, "message": f_err.read()} f_out.seek(0) out_data = f_out.read() return BytesIO(out_data)
def create_ms_fasta(seq_topic_map): fasta_file = ntf(prefix='blast', delete=False, mode='w') for topic, sequence in seq_topic_map.items(): logging.debug("Writing to fasta - topic: {} sequence: {}".format( topic, sequence)) fasta_file.write('> {}\n'.format(topic)) lines = [ sequence[i:i + FASTA_MAX_LENGTH] for i in range(0, len(sequence), FASTA_MAX_LENGTH) ] for line in lines: fasta_file.write('{}\n'.format(line)) ''' fasta_file.write('{}\n'.format(sequence)) ''' fasta_file.close() return fasta_file.name
def viewms(obj): """ return the html representation the atoms object as string """ renders = [] ms = [] if isinstance(obj, ase.Atoms): ms.append(obj) elif isinstance(obj, (tuple, list)): for oi in obj: ms.append(aio.read(f)) else: raise Exception('##') for atoms in ms: with ntf('r+', suffix='.html') as fo: atoms.write(fo.name, format='html') fo.seek(0) renders.append(fo.read()) columns = ('<div class="col-xs-6 col-sm-3">{}</div>'.format(r) for r in renders) return HTML('<div class="row">{}</div>'.format("".join(columns)))
def save_net(ts_id, params, net, run_id=None): tbl = col = db[ts_id] #'collection' if run_id != None: params['run_id'] = run_id pc = tomongotypes(params) tfp = ntf(suffix='.tmp', delete=False) try: tfp.close() #b/c it looks like .save() tries to do that net.save(tfp.name) tfp.close() #just in case pc['net'] = pickle.dumps(net.load(tfp.name)) tfp.close() finally: #cleanup os.remove(tfp.name) o = tbl.insert_one(pc).inserted_id return o
def get_net(ts_id, params, i=-1): # n -1 gets the last one interted """get str rep from db and put it into a file""" tbl = col = db[ts_id] #'collection' pc = tomongotypes(params) found = list(tbl.find(pc)) #if len(found)>1: raise Exception('more than one model matched params') if len(found) == 0: return None found = found[i] # get's the last one inserted tfp = ntf(dir=_mydir, suffix='.tmp', delete=False) try: with open(tfp.name, 'wb') as f: f.write(found['net']) tfp.close() net = theanets.Network.load(tfp.name) finally: #cleanup os.remove(tfp.name) return net
def save_net(ts_id ,params,net ,run_id=None): tbl=col=db[ts_id] #'collection' if run_id != None: params['run_id']=run_id pc=tomongotypes(params) tfp=ntf(suffix='.tmp',delete=False) try: tfp.close();#b/c it looks like .save() tries to do that net.save(tfp.name) tfp.close() #just in case pc['net']=pickle.dumps(net.load(tfp.name)) tfp.close() finally: #cleanup os.remove(tfp.name) o= tbl.insert_one(pc).inserted_id return o
def get_net(ts_id ,params ,i=-1): # n -1 gets the last one interted """get str rep from db and put it into a file""" tbl=col=db[ts_id] #'collection' pc=tomongotypes(params) found=list(tbl.find(pc)) #if len(found)>1: raise Exception('more than one model matched params') if len(found)==0: return None found=found[i] # get's the last one inserted tfp=ntf(dir=_mydir,suffix='.tmp',delete=False) try: with open(tfp.name,'wb') as f: f.write(found['net']) tfp.close() net=theanets.Network.load(tfp.name) finally: #cleanup os.remove(tfp.name) return net
def test_put_a_path_that_exists(self): """Simulate an update""" msg1 = "Hi there" f = ntf(bufsize=0, delete=False) f.write(msg1) f.close() d = os.path.join(self.home_path, os.path.basename(f.name)) self.ios.put_object(f.name, dest_path=d) msg2 = "Hello there" with open(f.name, 'w') as ff: ff.write(msg2) with self.assertRaises(ValueError): self.ios.put_object(f.name, dest_path=d) self.ios.put_object(f.name, dest_path=d, force=True) obj = self.ios.sess.data_objects.get(d) with obj.open('r+') as lf: msg_retrieved = lf.readlines()[0] self.assertEqual(msg2, msg_retrieved) self.ios.sess.data_objects.unlink(d) os.unlink(f.name)
def test_statfile_enoent(self): f = ntf(dir=self.tempdir) self.assertIsNone(statfile(f.name + '-ENOENT_TEST'))
def test_isexecutable(self): f = ntf(prefix='executable-file') self.assertFalse(isexecutable(f.name)) mode = stat.S_IXUSR | stat.S_IRUSR | stat.S_IWUSR os.chmod(f.name, mode) self.assertTrue(isexecutable(f.name))
def test_contracts(): new_contract = str(uuid.uuid1()) with ntf() as t: c_name, rendered = contracts.render_contract(payload.copy()) t.write(rendered.encode('utf8')) t.seek(0) c = Contract(new_contract, t.name) assert hasattr(c, 'abi') assert hasattr(c, 'address') assert hasattr(c, 'bytecode') assert hasattr(c, 'compiled_name') assert hasattr(c, 'contracts') assert hasattr(c, 'deploy') assert hasattr(c, 'from_chain') assert hasattr(c, 'gas_estimates') assert hasattr(c, 'instance') assert hasattr(c, 'is_deployed') assert hasattr(c, 'metadata') assert hasattr(c, 'method_identifiers') assert hasattr(c, 'name') assert hasattr(c, 'output_json') assert hasattr(c, 'sol') assert hasattr(c, 'template_json') assert c.__dict__['name'] == new_contract # XXX : determine why these values are different between local dev, and travisci # 'codeDepositCost': '591000', 'executionCost': '20627', 'totalCost': '611627' # ~~u'creation': { # ~~u'executionCost': u'20627', # ~~u'totalCost': u'592400', # ~~u'codeDepositCost': u'591000'}, # assumption dependent on key intersectino test_map = '''{ u'creation': { u'executionCost': u'20627', u'totalCost': u'591000', u'codeDepositCost': u'611627'}, u'external': { u'approve(address,uint256)': u'20468', u'symbol()': u'694', u'balanceOf(address)': u'637', u'totalSupply()': u'414', u'FixedSupplyToken()': u'41108', u'owner()': u'563', u'allowance(address,address)': u'869', u'asset_name()': u'628', u'transfer(address,uint256)': u'42094', u'transferFrom(address,address,uint256)': u'62799', u'decimals()': u'261'} }''' keys = set([ u'creation', u'executionCost', u'totalCost', u'codeDepositCost', u'external', u'approve(address,uint256)', u'symbol()', u'balanceOf(address)', u'totalSupply()', u'FixedSupplyToken()', u'owner()', u'allowance(address,address)', u'asset_name()', u'transfer(address,uint256)', u'transferFrom(address,address,uint256)', u'decimals()' ]) for k in keys: if k in test_map: pass else: assert (False)
def test_simple_load_hosts(self): current_etc_hosts_contents = ''' # # IP-Address Full-Qualified-Hostname Short-Hostname # 127.0.0.1 localhost # special IPv6 addresses ::1 localhost ipv6-localhost ipv6-loopback fe00::0 ipv6-localnet ff00::0 ipv6-mcastprefix ff02::1 ipv6-allnodes ff02::2 ipv6-allrouters ff02::3 ipv6-allhosts # some other name someone/something introduced for Admin # it should be merged with the IP we will set for Admin 10.10.10.1 some-other-name-for-admin # some custom and unrelated name 10.10.10.9 custom-name #-- start Salt-CaaSP managed hosts - DO NOT MODIFY -- # these entries were added by Salt before having "caasp_hosts" # they should be ignored now 10.10.9.1 admin 10.10.9.2 master0 10.10.9.3 minion1 10.10.9.4 other0 #-- end Salt-CaaSP managed hosts -- ''' from tempfile import NamedTemporaryFile as ntf import caasp_hosts caasp_hosts.__utils__ = Utils() with ntf(mode='w+', prefix=TEMP_PREFIX) as etc_hosts: try: caasp_etc_hosts_filename = '/tmp/caasp-hosts-{}'.format( random.randrange(0, 1000)) # write the "current" /etc/hosts file etc_hosts.write(current_etc_hosts_contents) etc_hosts.seek(0) log_block( '/etc/hosts', etc_hosts.read(), description='/etc/hosts contents BEFORE calling managed()') # # story: first run of caasp_hosts.managed() # this is what we will find after updating from # the previous mechanism to the new system # with caasp_hosts # changes = self._generate(etc_hosts.name, caasp_etc_hosts_filename) etc_hosts.seek(0) new_contents = etc_hosts.read() log_block( '/etc/hosts', new_contents, description='/etc/hosts contents AFTER calling managed()') log_block('changes', changes) with open(caasp_etc_hosts_filename, 'r') as chf: log_block('/etc/caasp/hosts', chf.read(), description='Saved /etc/caasp/hosts file') # load the /etc/hosts we have generated and check # some entries are there new_etc_hosts_contents = OrderedDict() caasp_hosts._load_hosts_file(new_etc_hosts_contents, etc_hosts.name) def check_entry_strict(ip, names): self.assertIn(ip, new_etc_hosts_contents) self.assertEqual(names, new_etc_hosts_contents[ip]) def check_entry(ip, names): self.assertIn(ip, new_etc_hosts_contents) for name in names: self.assertIn(name, new_etc_hosts_contents[ip]) # check the Admin node has the right entries check_entry('10.10.10.1', ['admin-minion-id', 'some-other-name-for-admin']) # check we are setting the right things in 127.0.0.1 check_entry('127.0.0.1', [ 'api', 'api.infra.caasp.local', EXTERNAL_MASTER_NAME, 'localhost', 'master0', 'master0.infra.caasp.local' ]) # check other entries check_entry('10.10.10.9', ['custom-name']) # check the old entries atre not present for ip in ['10.10.9.1', '10.10.9.2', '10.10.9.3', '10.10.9.4']: self.assertNotIn(ip, new_etc_hosts_contents) # # story: nodenames are appended at the beginning of the line # check_entry_strict('10.10.10.2', [ 'nodename-master0-minion-id', 'nodename-master0-minion-id.infra.caasp.local', 'master0-minion-id', 'master0-minion-id.infra.caasp.local' ]) # # story: this host is highstated again # we must check the idempotency of 'caasp_hosts' # prev_etc_hosts_contents = new_etc_hosts_contents changes = self._generate(etc_hosts.name, caasp_etc_hosts_filename) etc_hosts.seek(0) new_contents = etc_hosts.read() log_block('/etc/hosts', new_contents, description= '/etc/hosts contents AFTER calling managed AGAIN()') log_block('changes', changes, description='we do not expect any changes here') # check some entries are still there new_etc_hosts_contents = OrderedDict() caasp_hosts._load_hosts_file(new_etc_hosts_contents, etc_hosts.name) self.assertDictEqual(prev_etc_hosts_contents, new_etc_hosts_contents) self.assertTrue(len(changes) == 0, 'changes have been found') # # story: user adds some custom entries in /etc/caasp/hosts # with open(caasp_etc_hosts_filename, 'a') as chf: log.debug( 'Adding some custom entries to /etc/caasp/hosts...') chf.write('10.10.23.5 foo.server.com\n') chf.write('10.10.23.8 bar.server.com\n') changes = self._generate(etc_hosts.name, caasp_etc_hosts_filename) etc_hosts.seek(0) new_contents = etc_hosts.read() log_block( '/etc/hosts', new_contents, description= '/etc/hosts contents AFTER adding some custom entries in /etc/caasp/hosts' ) log_block('changes', changes, description='two new extries should have been added') # check some entries are still there new_etc_hosts_contents = OrderedDict() caasp_hosts._load_hosts_file(new_etc_hosts_contents, etc_hosts.name) check_entry('10.10.23.5', ['foo.server.com']) check_entry('10.10.23.8', ['bar.server.com']) # repeat previous checks check_entry('10.10.10.1', ['admin-minion-id', 'some-other-name-for-admin']) check_entry('127.0.0.1', [ 'api', 'api.infra.caasp.local', EXTERNAL_MASTER_NAME, 'localhost', 'master0', 'master0.infra.caasp.local' ]) check_entry('10.10.10.9', ['custom-name']) for ip in ['10.10.9.1', '10.10.9.2', '10.10.9.3', '10.10.9.4']: self.assertNotIn(ip, new_etc_hosts_contents) finally: # some cleanups try: os.unlink(caasp_etc_hosts_filename) except Exception as e: log.error('could not remove %s: %s', caasp_etc_hosts_filename, e)
def run_tests(): """ Test output of pymcspearman against tabulated values from MCSpearman """ from tempfile import NamedTemporaryFile as ntf from urllib.request import urlretrieve # get test data tfile = ntf() urlretrieve( "https://raw.githubusercontent.com/PACurran/MCSpearman/master/test.data", tfile.name) # open temporary file data = _np.genfromtxt(tfile, usecols=(0, 1, 2, 3), dtype=[('x', float), ('dx', float), ('y', float), ('dy', float)]) # tabulated results from a MCSpearman run with 10000 iterations MCSres = [ (0.8308, 0.001), # spearman only (0.8213, 0.0470), # bootstrap only (0.7764, 0.0356), # perturbation only (0.7654, 0.0584) ] # bootstrapping and perturbation # spearman only res = pymcspearman(data['x'], data['y'], dx=data['dx'], dy=data['dy'], bootstrap=False, perturb=False, return_dist=True) try: assert _np.isclose(MCSres[0][0], res[0], atol=MCSres[0][1]) _sys.stdout.write("Passed spearman check.\n") except AssertionError: _sys.stderr.write("Spearman comparison failed.\n") # bootstrap only res = pymcspearman(data['x'], data['y'], dx=data['dx'], dy=data['dy'], bootstrap=True, perturb=False, return_dist=True) try: assert _np.isclose(MCSres[1][0], _np.mean(res[2]), atol=MCSres[1][1]) _sys.stdout.write("Passed bootstrap only method check.\n") except AssertionError: _sys.stderr.write("Bootstrap only method comparison failed.\n") # perturbation only res = pymcspearman(data['x'], data['y'], dx=data['dx'], dy=data['dy'], bootstrap=False, perturb=True, return_dist=True) try: assert _np.isclose(MCSres[2][0], _np.mean(res[2]), atol=MCSres[2][1]) _sys.stdout.write("Passed perturbation only method check.\n") except AssertionError: _sys.stderr.write("Perturbation only method comparison failed.\n") # composite method res = pymcspearman(data['x'], data['y'], dx=data['dx'], dy=data['dy'], bootstrap=True, perturb=True, return_dist=True) try: assert _np.isclose(MCSres[3][0], _np.mean(res[2]), atol=MCSres[3][1]) _sys.stdout.write("Passed composite method check.\n") except AssertionError: _sys.stderr.write("Composite method comparison failed.\n")
def test_arg_isdirectory(self): self.assertTrue(arg_isdirectory(self.tempdir)) f = ntf(dir=self.tempdir) self.assertFalse(arg_isdirectory(f.name))
def test_arg_isexecutable(self): f = ntf(prefix='executable-file') self.assertIsNone(arg_isexecutable(f.name)) mode = stat.S_IXUSR | stat.S_IRUSR | stat.S_IWUSR os.chmod(f.name, mode) self.assertEqual(f.name, arg_isexecutable(f.name))
def diamond(query, outfile, dbfile, mode="blastx", cpus=1, evalue=0.001, top=10, blocksize=2.0, chunks=4, tmpdir=False, minlen=False, taxonmap=None): """Runs diamond blast with query file This is a wrapper function for running diamond aligner in either blastx or blastp mode (default is blastx). Some initial checks on input is performed as well as optional filtering of input sequences by length. Parameters ---------- query: str Fasta file with sequences to query against database outfile: str Path to output file with diamond results dbfile: str Path to diamond database to use for searching mode: str Mode to use for diamond, either blastx or blastp cpus: int Number of cpus to use for diamond evalue: float Maximum allowed e-value to report hits for top: int Keep hits within top percent of best scoring hit blocksize: float Sequence block size in billions of letters (default=2.0). Set to 20 on clusters. chunks: int Number of chunks for index processing (default=4). Setting to one will shorten runtime but increase memory requirements. tmpdir: str Temporary directory for output minlen: int Minimum length for input sequences. """ from contigtax import diamond_legacy if diamond_legacy(): if taxonmap is None: sys.exit("ERROR: This diamond version requires you to supply" "a taxonmap file with " "--taxonmap at this stage") else: tmap_string = "--taxonmap {}".format(taxonmap) else: tmap_string = "" # Make sure that diamond database and query file exist check_args(dbfile, query) # Make sure tmpdir exists if specified tmpdir = check_dirs(outfile, tmpdir) # Set cpus to minimum allowed cpus = max(1, cpus) # Filter input sequences if minlen is specified if minlen: q = ntf(mode="w", delete=False) filter_seqs_by_len(query, q.name, minlen) query = q.name outfile = str(outfile).replace(".gz", "") # Use blast tabular output with taxonomy id in last column outfmt = "qseqid sseqid pident length mismatch gapopen qstart qend " \ "sstart send evalue bitscore staxids" p = subprocess.run("diamond {m} -q {q} -p {p} -f 6 {f} --top {top} -e {e} " "-b {b} -c {c} --tmpdir {tmpdir} --more-sensitive " "--compress 1 -d {db} " "-o {out} {taxonmap}".format(m=mode, q=query, p=cpus, f=outfmt, top=top, e=evalue, b=blocksize, c=chunks, out=outfile, db=dbfile, tmpdir=tmpdir, taxonmap=tmap_string), shell=True) p.check_returncode() if minlen: sys.stderr.write("Removing temporary file {}\n".format(q.name)) q.close() os.remove(q.name) return
def run_tests(): """ Test output of pymcspearman against tabulated values from MCSpearman """ from tempfile import NamedTemporaryFile as ntf from urllib.request import urlretrieve # get test data tfile = ntf() urlretrieve( "https://raw.githubusercontent.com/PACurran/MCSpearman/master/test.data", tfile.name) # open temporary file data = _np.genfromtxt(tfile, usecols=(0, 1, 2, 3), dtype=[('x', float), ('dx', float), ('y', float), ('dy', float)]) # tabulated results from a MCSpearman run with 10000 iterations MCSres = [ (0.8308, 0.001), # spearman only (0.8213, 0.0470), # bootstrap only (0.7764, 0.0356), # perturbation only (0.7654, 0.0584) ] # bootstrapping and perturbation # spearman only res = pymccorrelation(data['x'], data['y'], dx=data['dx'], dy=data['dy'], coeff='spearmanr', Nboot=None, Nperturb=None, return_dist=True) try: assert _np.isclose(MCSres[0][0], res[0], atol=MCSres[0][1]) _sys.stdout.write("Passed spearman check.\n") except AssertionError: _sys.stderr.write("Spearman comparison failed.\n") # bootstrap only res = pymccorrelation(data['x'], data['y'], dx=data['dx'], dy=data['dy'], Nboot=10000, coeff='spearmanr', Nperturb=None, return_dist=True) try: assert _np.isclose(MCSres[1][0], _np.mean(res[2]), atol=MCSres[1][1]) _sys.stdout.write("Passed bootstrap only method check.\n") except AssertionError: _sys.stderr.write("Bootstrap only method comparison failed.\n") # perturbation only res = pymccorrelation(data['x'], data['y'], dx=data['dx'], dy=data['dy'], coeff='spearmanr', Nboot=None, Nperturb=10000, return_dist=True) try: assert _np.isclose(MCSres[2][0], _np.mean(res[2]), atol=MCSres[2][1]) _sys.stdout.write("Passed perturbation only method check.\n") except AssertionError: _sys.stderr.write("Perturbation only method comparison failed.\n") # composite method res = pymccorrelation(data['x'], data['y'], dx=data['dx'], dy=data['dy'], coeff='spearmanr', Nboot=10000, Nperturb=10000, return_dist=True) try: assert _np.isclose(MCSres[3][0], _np.mean(res[2]), atol=MCSres[3][1]) _sys.stdout.write("Passed composite method check.\n") except AssertionError: _sys.stderr.write("Composite method comparison failed.\n") # test Kendall tau IFN86 for consistency with scipy sres = _kendalltau(data['x'], data['y']) IFN86res = kendall_IFN86(data['x'], data['y'], xlim=_np.zeros(len(data)), ylim=_np.zeros(len(data))) kt_wrap_res = pymccorrelation(data['x'], data['y'], xlim=_np.zeros(len(data)), ylim=_np.zeros(len(data)), coeff='kendallt') try: assert _np.isclose(sres[0], IFN86res[0]) assert _np.isclose(sres[1], IFN86res[1]) _sys.stdout.write("Passed Kendall tau comparison with scipy.\n") except AssertionError: _sys.stderr.write("Kendall tau comparison with scipy failed.\n") try: assert _np.isclose(kt_wrap_res[0], IFN86res[0]) assert _np.isclose(kt_wrap_res[1], IFN86res[1]) _sys.stdout.write("Passed internal Kendall tau comparison.\n") except AssertionError: _sys.stderr.write("Internal Kendall tau comparison failed.\n") # test pearson r wrapper wrap_res = pymccorrelation(data['x'], data['y'], coeff='pearsonr', return_dist=False) res = _pearsonr(data['x'], data['y']) try: assert _np.isclose(wrap_res[0], res[0]) assert _np.isclose(wrap_res[1], res[1]) _sys.stdout.write("Passed Pearson r wrapper check.\n") except AssertionError: _sys.stderr.write("Pearson r wrapper check failed.\n")