def use(url, pattern, nodeid, nodeclass, fieldnames, absolutize, format, pagekey, pagerange, output): """Uses predefined pattern to extract page data""" pat = PATTERNS[pattern] fields = fieldnames.split(',') if fieldnames else pat['deffields'] findata = use_pattern(url, pattern, nodeid, nodeclass, fieldnames, absolutize, pagekey, pagerange) if pat['json_only']: format = 'json' if output: io = open(output, 'w', encoding='utf8') else: io = open(sys.stdout.fileno(), mode='w', encoding='utf8', buffering=1) if format == 'text': writer = csv.DictWriter(io, fieldnames=fields) writer.writeheader() for item in findata: writer.writerow(item) elif format == 'csv': writer = csv.DictWriter(io, fieldnames=fields) writer.writeheader() for item in findata: writer.writerow(item) elif format == 'json': io.write(json.dumps(findata, indent=4)) pass
def execute(operator, operand, acc): """Execute a single instruction, and return new desired accumulator result""" global program_counter, z_flag, p_flag, memory, halt_flag if operator == instruction.HLT: # 0xx if operand == 0: # HLT 00 is actually HLT halt_flag = True elif operator == instruction.ADD: # 1xx acc += memory[operand] acc = truncate(acc) elif operator == instruction.SUB: # 2xx acc -= memory[operand] acc = truncate(acc) elif operator == instruction.STA: # 3xx memory[operand] = acc ##trace("m[" + str(operand) + "]=" + str(acc)) elif operator == instruction.LDA: # 5xx acc = memory[operand] ##trace("a=m[" + str(operand) + "]") elif operator == instruction.BRA: # 6xx program_counter = operand elif operator == instruction.BRZ: # 7xx if z_flag: program_counter = operand elif operator == instruction.BRP: # 8xx if p_flag: program_counter = operand elif operator == instruction.IO: # 9xx if operand == instruction.getOperand(instruction.INP): # 901 if not STDIN_REDIRECTED: sys.stdout.write("in? ") value = io.read() #TODO: should we cope with negative numbers here and complement appropriately? #TODO: Should honour buswidth here depending on decimal/binary/hexadecimal io mode if value < 0 or value > 999: raise ValueError("Out of range value:" + str(value)) acc = truncate(value) elif operand == instruction.getOperand(instruction.OUT): # 902 if not STDOUT_REDIRECTED: sys.stdout.write("out=") io.write(acc) else: # unhandled operator raise ValueError("Unknown operator:" + str(operator)) update_flags(acc) return acc
def GetActiveDevices(_): io.write(b"GetActiveDevices()\n") class _L(list): @property def primary(self): return 1 return _L([False, True, False, False, True] + [False] * 11)
def read(self, n=-1): data = self.stream.read(n) # FIXME: Support odd length reads assert len(data) % 2 == 0 io = BytesIO() for i in range(0, len(data), 2): io.write(data[i + 1:i + 2]) io.write(data[i:i + 1]) io.seek(0) return io.getvalue()
def read(self, n=-1): data = self.stream.read(n) # FIXME: Support odd length reads assert len(data) % 2 == 0 io = BytesIO() for i in range(0, len(data), 2): io.write(data[i + 1 : i + 2]) io.write(data[i : i + 1]) io.seek(0) return io.getvalue()
def write(memory, filename): """write the contents of memory to the file""" f = open(filename, "wt") size = len(memory) startaddr = min(memory) for addr in range(startaddr, startaddr+size): #if PREFIX_ADDR: # io.write(addr, file=f) io.write(memory[addr], file=f) f.close()
def _write(filename, fd, format, io, images, parallel=None, append=False, **kwargs): if isinstance(images, Atoms): images = [images] if io.single: if len(images) > 1: raise ValueError( '{}-format can only store 1 Atoms object.'.format(format)) images = images[0] if not io.can_write: raise ValueError("Can't write to {}-format".format(format)) # Special case for json-format: if format == 'json' and (len(images) > 1 or append): if filename is not None: return io.write(filename, images, append=append, **kwargs) raise ValueError("Can't write more than one image to file-descriptor " 'using json-format.') if io.acceptsfd: open_new = (fd is None) try: if open_new: mode = 'wb' if io.isbinary else 'w' if append: mode = mode.replace('w', 'a') fd = open_with_compression(filename, mode) # XXX remember to re-enable compressed open # fd = io.open(filename, mode) return io.write(fd, images, **kwargs) finally: if open_new and fd is not None: fd.close() else: if fd is not None: raise ValueError( "Can't write {}-format to file-descriptor".format(format)) if io.can_append: return io.write(filename, images, append=append, **kwargs) elif append: raise ValueError( "Cannot append to {}-format, write-function " "does not support the append keyword.".format(format)) else: return io.write(filename, images, **kwargs)
def output_entry(entry, profile, limit_fields=None): # debug build assertion that limit_fields only contains fields we know about if __debug__ and limit_fields is not None: assert len([f for f in limit_fields if f not in _field_order]) == 0 fmt = profile["_formatter"] io = io.StringIO() io.write(out_line("BEGIN", None, "VCARD", None)) io.write(out_line("VERSION", None, profile["_version"], None)) if limit_fields is None: fields = _field_order else: fields = [f for f in _field_order if f in limit_fields] for f in fields: if f in entry and f in profile: func = profile[f] # does it have a limit? (nice scary introspection :-) if "limit" in func.__code__.co_varnames[:func.__code__.co_argcount]: lines = func(entry[f], fmt, limit = profile["_limit"]) else: lines = func(entry[f], fmt) if len(lines): io.write(lines) io.write(out_line("END", None, "VCARD", fmt)) return io.getvalue()
def output_entry(entry, profile, limit_fields=None): # debug build assertion that limit_fields only contains fields we know about if __debug__ and limit_fields is not None: assert len([f for f in limit_fields if f not in _field_order]) == 0 fmt = profile["_formatter"] io = io.StringIO() io.write(out_line("BEGIN", None, "VCARD", None)) io.write(out_line("VERSION", None, profile["_version"], None)) if limit_fields is None: fields = _field_order else: fields = [f for f in _field_order if f in limit_fields] for f in fields: if f in entry and f in profile: func = profile[f] # does it have a limit? (nice scary introspection :-) if "limit" in func.__code__.co_varnames[:func.__code__. co_argcount]: lines = func(entry[f], fmt, limit=profile["_limit"]) else: lines = func(entry[f], fmt) if len(lines): io.write(lines) io.write(out_line("END", None, "VCARD", fmt)) return io.getvalue()
def convolveDOS(dosFiles,resFile): D = [] for i in dosFiles: D.append( read(filename=i) ) import io Res = io.load(resFile) R = [] for d in range(len(D)): er,rr = convolve(D[d],Res) R.append( (er,rr) ) io.write( er, rr, numpy.zeros(rr.shape), dosFiles[d]+".conv" ) return
def send(self, data, flags=0): io = BytesIO() io.write(data) buffer = io.getvalue() self.__send_lock.acquire() try: return self.__iowait(self._connection.send, buffer, flags) except OpenSSL.SSL.SysCallError as e: if e.args[0] == -1 and not data: # errors when writing empty strings are expected and can be ignored return 0 raise finally: self.__send_lock.release()
def send(self, data, flags=0): io = StringIO() io.write(data) buffer = io.getvalue() self.__send_lock.acquire() try: return self.__iowait(self._connection.send, buffer, flags) except OpenSSL.SSL.SysCallError as e: if e.args[0] == -1 and not data: # errors when writing empty strings are expected and can be ignored return 0 raise finally: self.__send_lock.release()
def save_as(self): io = EditorIO(self.editor) new_filename = io.ask_for_filename() if new_filename: return io.write(save_as=new_filename), new_filename else: return False, None
def write_to_stream(node: Node, io: io.IOBase, indentation=0, indentation_level=INDENTATION_LEVEL): if type(node) == Element: io.write(f'<{node.name} {attrs(node)}>') for child in node.children: write_to_stream(child, io, indentation + indentation_level) io.write(f'</{node.name}>') elif type(node) == SelfClosingElement: io.write(f'<{node.name} {attrs(node)}/>') else: io.write(node.data)
def extract(url, xpath, fieldnames, absolutize, post, pagekey, pagerange, format, output): """Extract data with xpath""" fields = fieldnames.split(',') if fieldnames else DEFAULT_FIELDS data = extract_data_xpath(url, xpath, fieldnames, absolutize, post, pagekey, pagerange) if output: io = open(output, 'w', encoding='utf8') else: io = open(sys.stdout.fileno(), mode='w', encoding='utf8', buffering=1) if format == 'text': writer = csv.DictWriter(io, fieldnames=fields) writer.writeheader() for item in data: writer.writerow(item) elif format == 'csv': writer = csv.DictWriter(io, fieldnames=fields) writer.writeheader() for item in data: writer.writerow(item) elif format == 'json': io.write(json.dumps(data, indent=4))
def gettable(url, nodeid, nodeclass, fieldnames, format, pagekey, pagerange, output): """Extracts table with data from html""" findata = get_table(url, nodeid, nodeclass, fieldnames, pagekey, pagerange) if output: io = open(output, 'w', encoding='utf8') else: io = open(sys.stdout.fileno(), mode='w', encoding='utf8', buffering=1) if format == 'text': writer = csv.writer(io) if fieldnames: writer.writerow(fieldnames.split(',')) for item in findata: writer.writerow(item) elif format == 'csv': writer = csv.writer(io) if fieldnames: writer.writerow(fieldnames.split(',')) for item in findata: writer.writerow(item) elif format == 'json': io.write(json.dumps(findata, sort_keys=True, indent=4)) pass
def test_utf8_writer(self): writer = converters.getwriter('utf-8') io = writer(self.io) io.write(self.u_japanese + '\n') io.seek(0) result = io.read().strip() tools.eq_(result, self.utf8_japanese) io.seek(0) io.truncate(0) io.write(self.euc_jp_japanese + b'\n') io.seek(0) result = io.read().strip() tools.eq_(result, self.euc_jp_japanese) io.seek(0) io.truncate(0) io.write(self.utf8_japanese + b'\n') io.seek(0) result = io.read().strip() tools.eq_(result, self.utf8_japanese)
def cp(content, fname): # more write than copy, buuuut... log("writing %s"%(fname,), 2) write(content, fname)
def write(self, io): # only need 1 free list entry (until reserved space is supported) # free list always has extra blank 4-bytes at end. # free list available grows by 10? resources = self._build_res_names() resources.extend(self._resources) index_used = len(resources) index_size = 10 + index_used // 10 * 10 # remove reserved space from the last entry ix = len(resources) if ix and resources[ix - 1][4]: (rid, rtype, attr, data, _) = resources[ix - 1] resources[ix - 1] = (rid, rtype, attr, data, 0) freelist_used = 1 for x in resources: if x[4]: freelist_used += 1 freelist_size = 10 + freelist_used // 10 * 10 extra = freelist_size * 8 + 4 + index_size * 20 map_size = 32 + extra map_offset = 0x8c # version, offset to map, sizeof map, 128 bytes (reserved) rheader = struct.pack("<III128x", 0, map_offset, map_size) # handle:4, flags:2, offset:4, size:4, toindex:2, filenum:2, id:2, # indexSize:4, indexUsed:4, flSize:2,flUsed:2, rmap = struct.pack("<IHIIHHHIIHH", 0, 0, map_offset, map_size, 32 + freelist_size * 8 + 4, 0, 0, index_size, index_used, freelist_size, freelist_used) eof = 0x8c + map_size fl = [] index = bytearray() for (rtype, rid, attr, data, reserved) in resources: # type:2, id:4, offset:4, attr:2, size:4, handle:4 index += struct.pack("<HIIHII", rtype, rid, eof, attr, len(data), 0) eof += len(data) if reserved: fl.append((eof, reserved)) eof += reserved index += bytes(20 * ((index_size - index_used))) fl.append((eof, 0xffffffff - eof)) fl = self._merge_free_list(fl) freelist = bytearray() for (offset, size) in fl: freelist += struct.pack("<II", offset, size) freelist += bytes(8 * (freelist_size - freelist_used) + 4) io.write(rheader) io.write(rmap) io.write(freelist) io.write(index) for (_, _, attr, data, reserved) in resources: io.write(data) if reserved: io.write(bytes(reserved)) return eof
def getDOS( Data = "test_cases/ni_0300/sqe.pkl", MT = "test_cases/ni_0300/mqe.pkl", C_ms = numpy.arange(0.0,2.0,0.1), backgroundFrac = 0.90, constantFrac = 0.00, cutoff = 8.5, elasticCutAvg = 3, longE = 40.0, cutRange = (1e-20,1e20), eStop = 60.0 , T = 300.0, M = 58.6934, N = 10, Tol = 1.0e-7, maxIter = 50, interactive = True, viewDirectory = 'tmp', outputDir = 'tmp', ): """ This is the `main` function for finding a DOS and a multiphonon/ multiple scattering correction from experimental scattering. user.py contains the user modifiable parameters. """ # *** LJ *** # check output dirs _checkOutdir( viewDirectory ) _checkOutdir( outputDir ) # copy the magic html file shutil.copy( os.path.join(paths.data, magic_html ), viewDirectory ) #shutil.copy(magic_html, viewDirectory) if interactive: # open a browser viewhtml = os.path.abspath( os.path.join( viewDirectory, magic_html ) ) bthread = BrowserThread( 'file://' + viewhtml ) bthread.start() # record time import time time1 = time.time() # ********** # --- Prep S(Q,E)for calculation ----------------------------------------------- sqe = expSqe(Data,T,M,cutRange=cutRange) # *** LJ *** if not MT: mqe = None else: mqe = expSqe( MT,T,M,cutRange=cutRange) # ********** if mqe: sqe.removeBackground(mqe,backgroundFrac,constantFrac) sqe.cropForCalc(cutoff,longE,eStop,elasticCutAvg) sqe.norm2one() sqe.expand(2.0) sqe0 = expSqe(sqe) sqe.plotSE(viewDirectory) sqe.plotSQE(viewDirectory,lower=1e-30,upper=2.5e-4) sqe.plotMask(viewDirectory) # --- Fitting ------------------------------------------------------------------ C_ms += 1.0 # This is a hack, until the internal rep of C_ms is changed. # ------------------------------------------------------------------------------ res = getCorrectedScatter(sqe,C_ms,N,Tol,maxIter,interactive,vd=viewDirectory) sqeCalc,dosCalc,cmsCalc,res,C_ms,lsqSc,lsqMu,lsqSl,LSQ \ = getBestSol(sqe0,res,C_ms) dosCalc.plotDOS(viewDirectory) # --- Output to file and pickle ------------------------------------------------ cp.dump((sqe0,C_ms,res,lsqSc,lsqMu,lsqSl,LSQ),\ open( os.path.join( outputDir,"all.pkl") ,'wb'),-1) cp.dump((sqe0,sqeCalc,dosCalc,cmsCalc),\ open( os.path.join( outputDir,"sol.pkl") ,'wb'),-1) # *** LJ *** saveDOSasHistogram( dosCalc, os.path.join( outputDir, "doshist.pkl") ) # ********** f = open( os.path.join( outputDir,"C_ms" ),'w') f.write( "C_ms = %lf\n" % (C_ms[numpy.argmin( numpy.array(LSQ)**2 )]-1.0) ) f.close() io.write(dosCalc.e,dosCalc.g, os.path.join( outputDir,"Dos" ) ) io.write(dosCalc.e,dosCalc.gz, os.path.join( outputDir,"Dos.z" ) ) io.write(sqe0.e,sqe0.se, os.path.join( outputDir,"Se.exp" ) ) io.write(sqe0.e,nar(nar(sqeCalc)), os.path.join( outputDir,"Se.clc" ) ) io.write(sqe0.e,nar(nar(sqeCalc[1:])),os.path.join( outputDir,"Multi.clc") ) io.write(sqe0.e,nar(nar(sqeCalc[1:]))/(cmsCalc),\ os.path.join( outputDir,"Mph.clc" ) ) io.write(sqe0.e,(cmsCalc-1.0)*nar(nar(sqeCalc[1:]))/cmsCalc\ ,os.path.join( outputDir,"Msc.clc" ) ) # --- `Interactive` Output ----------------------------------------------------- SQE = expSqe(sqe0.q,sqe0.e,nar(sqeCalc),sqe0.sqerr,sqe0.T,sqe0.M,cutRange=cutRange) plotComp(sqe0,sqeCalc,viewDirectory) plotLSQ(C_ms,lsqSc,lsqMu,lsqSl,LSQ,viewDirectory) plotSQE(SQE,viewDirectory,'sqeCalc.png',title='S(Q,E) Calculated',\ lower=1e-30,upper=2.5e-4) return
serial = 1 key = crypto.PKey() key.generate_key(crypto.TYPE_RSA, 4096) crt = crypto.X509() crt.get_subject().C = args.country or 'UK' crt.get_subject().ST = args.state or 'South Wales' crt.get_subject().L = args.city or 'Pontypridd' crt.get_subject().O = args.company or 'Crossbar.IO' crt.get_subject().OU = args.org or 'XBR' crt.get_subject().CN = args.host or gethostname() crt.set_serial_number(serial) crt.gmtime_adj_notBefore(0) crt.gmtime_adj_notAfter(7 * 24 * 60 * 60 * (args.weeks or 52)) crt.set_issuer(crt.get_subject()) crt.set_pubkey(key) crt.sign(key, 'sha1') c_str = crypto.dump_certificate(crypto.FILETYPE_PEM, crt) k_str = crypto.dump_privatekey(crypto.FILETYPE_PEM, key) with open('certs/server_crt.pem', 'w') as io: io.write(c_str.decode('utf-8')) with open('certs/server_key.pem', 'w') as io: io.write(k_str.decode('utf-8')) print( "Server certificates installed, don't forget to regenerate your client certs!" )
def Open(_, device): io.write(b'Open(%r)\n' % device) return True
def cp(content, fname): # more write than copy, buuuut... log("writing %s" % (fname, ), 2) write(content, fname)
def GetDeviceOSDName(_, destination): io.write(b"GetDeviceOSDName(%r)\n" % destination) return "Test"
#for i in range(len(SNQ)): # print SNQ[i] SN = [] for i in range(len(SNQ)): SN.append( numpy.outer(SNQ[i],ANE[i]) ) SN = numpy.array(SN) S = nar(SN) #------------------------------------------------------------------------------ #---- Write to file ----------------------------------------------------------- cp.dump((Q,E,S,S),open("sqe.pkl",'w')) sum = 0 for i in range(len(ANE)): io.write(E,ANE[i],"se."+str(i+1)) io.write(E, nar(S),"se.in") # #------------------------------------------------------------------------------ #---- Plot -------------------------------------------------------------------- for i in range(len(ANE)): G.replot(Gd(E,ANE[i],with='l lw 5')) raw_input("Press <Enter> to continue...") #------------------------------------------------------------------------------ #============================================================================== # --- Notes --- #------------------------------------------------------------------------------
def RescanActiveDevices(_): io.write(b"RescanActiveDevices()\n")
def prepare_data(argv=None): '''Aggregate sequence data GTDB using a file-of-files''' import argparse import io import sys import logging import h5py import pandas as pd from skbio import TreeNode from hdmf.common import get_hdf5io from hdmf.data_utils import DataChunkIterator from ..utils import get_faa_path, get_fna_path, get_genomic_path from exabiome.sequence.convert import AASeqIterator, DNASeqIterator, DNAVocabIterator, DNAVocabGeneIterator from exabiome.sequence.dna_table import AATable, DNATable, SequenceTable, TaxaTable, DeepIndexFile, NewickString, CondensedDistanceMatrix parser = argparse.ArgumentParser() parser.add_argument( 'accessions', type=str, help='file of the NCBI accessions of the genomes to convert') parser.add_argument('fadir', type=str, help='directory with NCBI sequence files') parser.add_argument('metadata', type=str, help='metadata file from GTDB') parser.add_argument('tree', type=str, help='the distances file') parser.add_argument('out', type=str, help='output HDF5') grp = parser.add_mutually_exclusive_group() parser.add_argument('-e', '--emb', type=str, help='embedding file', default=None) grp.add_argument('-p', '--protein', action='store_true', default=False, help='get paths for protein files') grp.add_argument('-c', '--cds', action='store_true', default=False, help='get paths for CDS files') grp.add_argument('-g', '--genomic', action='store_true', default=False, help='get paths for genomic files (default)') parser.add_argument('-D', '--dist_h5', type=str, help='the distances file', default=None) parser.add_argument( '-d', '--max_deg', type=float, default=None, help='max number of degenerate characters in protein sequences') parser.add_argument('-l', '--min_len', type=float, default=None, help='min length of sequences') parser.add_argument('-V', '--vocab', action='store_true', default=False, help='store sequences as vocabulary data') if len(sys.argv) == 1: parser.print_help() sys.exit(1) args = parser.parse_args(args=argv) if not any([args.protein, args.cds, args.genomic]): args.genomic = True logging.basicConfig(stream=sys.stdout, level=logging.INFO, format='%(asctime)s - %(message)s') logger = logging.getLogger() # read accessions logger.info('reading accessions %s' % args.accessions) with open(args.accessions, 'r') as f: taxa_ids = [l[:-1] for l in f.readlines()] # get paths to Fasta Files fa_path_func = get_genomic_path if args.cds: fa_path_func = get_fna_path elif args.protein: fa_path_func = get_faa_path fapaths = [fa_path_func(acc, args.fadir) for acc in taxa_ids] di_kwargs = dict() # if a distance matrix file has been given, read and select relevant distances if args.dist_h5: ############################# # read and filter distances ############################# logger.info('reading distances from %s' % args.dist_h5) with h5py.File(args.dist_h5, 'r') as f: dist = f['distances'][:] dist_taxa = f['leaf_names'][:].astype('U') logger.info('selecting distances for taxa found in %s' % args.accessions) dist = select_distances(taxa_ids, dist_taxa, dist) dist = CondensedDistanceMatrix('distances', data=dist) di_kwargs['distances'] = dist ############################# # read and filter taxonomies ############################# logger.info('reading taxonomies from %s' % args.metadata) taxlevels = [ 'domain', 'phylum', 'class', 'order', 'family', 'genus', 'species' ] def func(row): dat = dict(zip(taxlevels, row['gtdb_taxonomy'].split(';'))) dat['species'] = dat['species'].split(' ')[1] dat['gtdb_genome_representative'] = row['gtdb_genome_representative'][ 3:] dat['accession'] = row['accession'][3:] return pd.Series(data=dat) logger.info('selecting GTDB taxonomy for taxa found in %s' % args.accessions) taxdf = pd.read_csv(args.metadata, header=0, sep='\t')[['accession', 'gtdb_taxonomy', 'gtdb_genome_representative']]\ .apply(func, axis=1)\ .set_index('accession')\ .filter(items=taxa_ids, axis=0) ############################# # read and filter embeddings ############################# emb = None if args.emb is not None: logger.info('reading embeddings from %s' % args.emb) with h5py.File(args.emb, 'r') as f: emb = f['embedding'][:] emb_taxa = f['leaf_names'][:] logger.info('selecting embeddings for taxa found in %s' % args.accessions) emb = select_embeddings(taxa_ids, emb_taxa, emb) ############################# # read and trim tree ############################# logger.info('reading tree from %s' % args.tree) root = TreeNode.read(args.tree, format='newick') logger.info('transforming leaf names for shearing') for tip in root.tips(): tip.name = tip.name[3:].replace(' ', '_') logger.info('shearing taxa not found in %s' % args.accessions) rep_ids = taxdf['gtdb_genome_representative'].values root = root.shear(rep_ids) logger.info('converting tree to Newick string') bytes_io = io.BytesIO() root.write(bytes_io, format='newick') tree_str = bytes_io.getvalue() tree = NewickString('tree', data=tree_str) if di_kwargs.get('distances') is None: from scipy.spatial.distance import squareform tt_dmat = root.tip_tip_distances() if (rep_ids != taxa_ids).any(): tt_dmat = get_nonrep_matrix(taxa_ids, rep_ids, tt_dmat) dmat = tt_dmat.data di_kwargs['distances'] = CondensedDistanceMatrix('distances', data=dmat) h5path = args.out logger.info("reading %d Fasta files" % len(fapaths)) logger.info("Total size: %d", sum(os.path.getsize(f) for f in fapaths)) if args.vocab: if args.protein: SeqTable = SequenceTable seqit = AAVocabIterator(fapaths, logger=logger, min_seq_len=args.min_len) else: SeqTable = DNATable if args.cds: logger.info("reading and writing CDS sequences") seqit = DNAVocabGeneIterator(fapaths, logger=logger, min_seq_len=args.min_len) else: seqit = DNAVocabIterator(fapaths, logger=logger, min_seq_len=args.min_len) else: if args.protein: logger.info("reading and writing protein sequences") seqit = AASeqIterator(fapaths, logger=logger, max_degenerate=args.max_deg, min_seq_len=args.min_len) SeqTable = AATable else: logger.info("reading and writing DNA sequences") seqit = DNASeqIterator(fapaths, logger=logger, min_seq_len=args.min_len) SeqTable = DNATable seqit_bsize = 2**25 if args.protein: seqit_bsize = 2**15 elif args.cds: seqit_bsize = 2**18 # set up DataChunkIterators packed = DataChunkIterator.from_iterable(iter(seqit), maxshape=(None, ), buffer_size=seqit_bsize, dtype=np.dtype('uint8')) seqindex = DataChunkIterator.from_iterable(seqit.index_iter, maxshape=(None, ), buffer_size=2**0, dtype=np.dtype('int')) names = DataChunkIterator.from_iterable(seqit.names_iter, maxshape=(None, ), buffer_size=2**0, dtype=np.dtype('U')) ids = DataChunkIterator.from_iterable(seqit.id_iter, maxshape=(None, ), buffer_size=2**0, dtype=np.dtype('int')) taxa = DataChunkIterator.from_iterable(seqit.taxon_iter, maxshape=(None, ), buffer_size=2**0, dtype=np.dtype('uint16')) seqlens = DataChunkIterator.from_iterable(seqit.seqlens_iter, maxshape=(None, ), buffer_size=2**0, dtype=np.dtype('uint32')) io = get_hdf5io(h5path, 'w') tt_args = ['taxa_table', 'a table for storing taxa data', taxa_ids] tt_kwargs = dict() for t in taxlevels[1:]: tt_args.append(taxdf[t].values) if emb is not None: tt_kwargs['embedding'] = emb tt_kwargs['rep_taxon_id'] = rep_ids taxa_table = TaxaTable(*tt_args, **tt_kwargs) seq_table = SeqTable( 'seq_table', 'a table storing sequences for computing sequence embedding', io.set_dataio(names, compression='gzip', chunks=(2**15, )), io.set_dataio(packed, compression='gzip', maxshape=(None, ), chunks=(2**15, )), io.set_dataio(seqindex, compression='gzip', maxshape=(None, ), chunks=(2**15, )), io.set_dataio(seqlens, compression='gzip', maxshape=(None, ), chunks=(2**15, )), io.set_dataio(taxa, compression='gzip', maxshape=(None, ), chunks=(2**15, )), taxon_table=taxa_table, id=io.set_dataio(ids, compression='gzip', maxshape=(None, ), chunks=(2**15, ))) difile = DeepIndexFile(seq_table, taxa_table, tree, **di_kwargs) io.write(difile, exhaust_dci=False) io.close() logger.info("reading %s" % (h5path)) h5size = os.path.getsize(h5path) logger.info("HDF5 size: %d", h5size)
def Transmit(_, cmd): io.write(b"Transmit(dest: 0x%x, src: 0x%x, op: 0x%x, data: <%s>)\n" % (cmd.destination, cmd.initiator, cmd.opcode, cec_cmd_get_data(cmd).encode('hex'))) return True
def convert_header_field(io, header): if isinstance(header, list): if len(header) == 0: io.write(u"[]") else: io.write(u"\n") for item in header: io.write(u" - ") convert_header_field(io, item) elif isinstance(header, str): io.write(header) elif isinstance(header, bytes): try: io.write(header.decode('utf-8')) except UnicodeDecodeError: io.write(binascii.hexlify(header).decode('us-ascii')) else: io.write(repr(header))