示例#1
0
def use(url, pattern, nodeid, nodeclass, fieldnames, absolutize, format,
        pagekey, pagerange, output):
    """Uses predefined pattern to extract page data"""
    pat = PATTERNS[pattern]
    fields = fieldnames.split(',') if fieldnames else pat['deffields']
    findata = use_pattern(url, pattern, nodeid, nodeclass, fieldnames,
                          absolutize, pagekey, pagerange)

    if pat['json_only']: format = 'json'

    if output:
        io = open(output, 'w', encoding='utf8')
    else:
        io = open(sys.stdout.fileno(), mode='w', encoding='utf8', buffering=1)
    if format == 'text':
        writer = csv.DictWriter(io, fieldnames=fields)
        writer.writeheader()
        for item in findata:
            writer.writerow(item)
    elif format == 'csv':
        writer = csv.DictWriter(io, fieldnames=fields)
        writer.writeheader()
        for item in findata:
            writer.writerow(item)
    elif format == 'json':
        io.write(json.dumps(findata, indent=4))
    pass
示例#2
0
def execute(operator, operand, acc):
	"""Execute a single instruction, and return new desired accumulator result"""

	global program_counter, z_flag, p_flag, memory, halt_flag

	if   operator == instruction.HLT: # 0xx
		if operand == 0: # HLT 00 is actually HLT
			halt_flag = True

	elif operator == instruction.ADD: # 1xx
		acc += memory[operand]
		acc = truncate(acc)

	elif operator == instruction.SUB: # 2xx
		acc -= memory[operand]
		acc = truncate(acc)

	elif operator == instruction.STA: # 3xx
		memory[operand] = acc
		##trace("m[" + str(operand) + "]=" + str(acc))

	elif operator == instruction.LDA: # 5xx
		acc = memory[operand]
		##trace("a=m[" + str(operand) + "]")

	elif operator == instruction.BRA: # 6xx
		program_counter = operand

	elif operator == instruction.BRZ: # 7xx
		if z_flag:
			program_counter = operand

	elif operator == instruction.BRP: # 8xx
		if p_flag:
			program_counter = operand

	elif operator == instruction.IO: # 9xx
		if operand == instruction.getOperand(instruction.INP): # 901
			if not STDIN_REDIRECTED:
				sys.stdout.write("in? ")
			value = io.read()
			#TODO: should we cope with negative numbers here and complement appropriately?
			#TODO: Should honour buswidth here depending on decimal/binary/hexadecimal io mode
			if value < 0 or value > 999:
				raise ValueError("Out of range value:" + str(value))
			acc = truncate(value)

		elif operand == instruction.getOperand(instruction.OUT): # 902
			if not STDOUT_REDIRECTED:
				sys.stdout.write("out=")
			io.write(acc)

	else: # unhandled operator
		raise ValueError("Unknown operator:" + str(operator))

	update_flags(acc)
	return acc
示例#3
0
    def GetActiveDevices(_):
        io.write(b"GetActiveDevices()\n")

        class _L(list):
            @property
            def primary(self):
                return 1

        return _L([False, True, False, False, True] + [False] * 11)
示例#4
0
def execute(operator, operand, acc):
    """Execute a single instruction, and return new desired accumulator result"""

    global program_counter, z_flag, p_flag, memory, halt_flag

    if operator == instruction.HLT:  # 0xx
        if operand == 0:  # HLT 00 is actually HLT
            halt_flag = True

    elif operator == instruction.ADD:  # 1xx
        acc += memory[operand]
        acc = truncate(acc)

    elif operator == instruction.SUB:  # 2xx
        acc -= memory[operand]
        acc = truncate(acc)

    elif operator == instruction.STA:  # 3xx
        memory[operand] = acc
        ##trace("m[" + str(operand) + "]=" + str(acc))

    elif operator == instruction.LDA:  # 5xx
        acc = memory[operand]
        ##trace("a=m[" + str(operand) + "]")

    elif operator == instruction.BRA:  # 6xx
        program_counter = operand

    elif operator == instruction.BRZ:  # 7xx
        if z_flag:
            program_counter = operand

    elif operator == instruction.BRP:  # 8xx
        if p_flag:
            program_counter = operand

    elif operator == instruction.IO:  # 9xx
        if operand == instruction.getOperand(instruction.INP):  # 901
            if not STDIN_REDIRECTED:
                sys.stdout.write("in? ")
            value = io.read()
            #TODO: should we cope with negative numbers here and complement appropriately?
            #TODO: Should honour buswidth here depending on decimal/binary/hexadecimal io mode
            if value < 0 or value > 999:
                raise ValueError("Out of range value:" + str(value))
            acc = truncate(value)

        elif operand == instruction.getOperand(instruction.OUT):  # 902
            if not STDOUT_REDIRECTED:
                sys.stdout.write("out=")
            io.write(acc)

    else:  # unhandled operator
        raise ValueError("Unknown operator:" + str(operator))

    update_flags(acc)
    return acc
示例#5
0
 def read(self, n=-1):
     data = self.stream.read(n)
     # FIXME: Support odd length reads
     assert len(data) % 2 == 0
     io = BytesIO()
     for i in range(0, len(data), 2):
         io.write(data[i + 1:i + 2])
         io.write(data[i:i + 1])
     io.seek(0)
     return io.getvalue()
示例#6
0
 def read(self, n=-1):
     data = self.stream.read(n)
     # FIXME: Support odd length reads
     assert len(data) % 2 == 0
     io = BytesIO()
     for i in range(0, len(data), 2):
         io.write(data[i + 1 : i + 2])
         io.write(data[i : i + 1])
     io.seek(0)
     return io.getvalue()
示例#7
0
def write(memory, filename):
	"""write the contents of memory to the file"""
	
	f = open(filename, "wt")
	size = len(memory)
	startaddr = min(memory)

	for addr in range(startaddr, startaddr+size):
		#if PREFIX_ADDR:
		#	io.write(addr, file=f)
		io.write(memory[addr], file=f)
	f.close()
示例#8
0
def _write(filename,
           fd,
           format,
           io,
           images,
           parallel=None,
           append=False,
           **kwargs):
    if isinstance(images, Atoms):
        images = [images]

    if io.single:
        if len(images) > 1:
            raise ValueError(
                '{}-format can only store 1 Atoms object.'.format(format))
        images = images[0]

    if not io.can_write:
        raise ValueError("Can't write to {}-format".format(format))

    # Special case for json-format:
    if format == 'json' and (len(images) > 1 or append):
        if filename is not None:
            return io.write(filename, images, append=append, **kwargs)
        raise ValueError("Can't write more than one image to file-descriptor "
                         'using json-format.')

    if io.acceptsfd:
        open_new = (fd is None)
        try:
            if open_new:
                mode = 'wb' if io.isbinary else 'w'
                if append:
                    mode = mode.replace('w', 'a')
                fd = open_with_compression(filename, mode)
                # XXX remember to re-enable compressed open
                # fd = io.open(filename, mode)
            return io.write(fd, images, **kwargs)
        finally:
            if open_new and fd is not None:
                fd.close()
    else:
        if fd is not None:
            raise ValueError(
                "Can't write {}-format to file-descriptor".format(format))
        if io.can_append:
            return io.write(filename, images, append=append, **kwargs)
        elif append:
            raise ValueError(
                "Cannot append to {}-format, write-function "
                "does not support the append keyword.".format(format))
        else:
            return io.write(filename, images, **kwargs)
示例#9
0
def output_entry(entry, profile, limit_fields=None):

    # debug build assertion that limit_fields only contains fields we know about
    if __debug__ and limit_fields is not None:
        assert len([f for f in limit_fields if f not in _field_order]) == 0
    
    fmt = profile["_formatter"]
    io = io.StringIO()
    io.write(out_line("BEGIN", None, "VCARD", None))
    io.write(out_line("VERSION", None, profile["_version"], None))

    if limit_fields is None:
        fields = _field_order
    else:
        fields = [f for f in _field_order if f in limit_fields]

    for f in fields:
        if f in entry and f in profile:
            func = profile[f]
            # does it have a limit?  (nice scary introspection :-)
            if "limit" in func.__code__.co_varnames[:func.__code__.co_argcount]:
                lines = func(entry[f], fmt, limit = profile["_limit"])
            else:
                lines = func(entry[f], fmt)
            if len(lines):
                io.write(lines)

    io.write(out_line("END", None, "VCARD", fmt))
    return io.getvalue()
示例#10
0
def output_entry(entry, profile, limit_fields=None):

    # debug build assertion that limit_fields only contains fields we know about
    if __debug__ and limit_fields is not None:
        assert len([f for f in limit_fields if f not in _field_order]) == 0

    fmt = profile["_formatter"]
    io = io.StringIO()
    io.write(out_line("BEGIN", None, "VCARD", None))
    io.write(out_line("VERSION", None, profile["_version"], None))

    if limit_fields is None:
        fields = _field_order
    else:
        fields = [f for f in _field_order if f in limit_fields]

    for f in fields:
        if f in entry and f in profile:
            func = profile[f]
            # does it have a limit?  (nice scary introspection :-)
            if "limit" in func.__code__.co_varnames[:func.__code__.
                                                    co_argcount]:
                lines = func(entry[f], fmt, limit=profile["_limit"])
            else:
                lines = func(entry[f], fmt)
            if len(lines):
                io.write(lines)

    io.write(out_line("END", None, "VCARD", fmt))
    return io.getvalue()
示例#11
0
def convolveDOS(dosFiles,resFile):
  D = []
  for i in dosFiles:
    D.append( read(filename=i) )

  import io
  Res = io.load(resFile)

  R = []
  for d in range(len(D)):
    er,rr = convolve(D[d],Res)
    R.append( (er,rr) )
    io.write( er, rr, numpy.zeros(rr.shape), dosFiles[d]+".conv" )
  return
示例#12
0
    def send(self, data, flags=0):
        io = BytesIO()
        io.write(data)
        buffer = io.getvalue()

        self.__send_lock.acquire()

        try:
            return self.__iowait(self._connection.send, buffer, flags)
        except OpenSSL.SSL.SysCallError as e:
            if e.args[0] == -1 and not data:
                # errors when writing empty strings are expected and can be ignored
                return 0
            raise
        finally:
            self.__send_lock.release()
    def send(self, data, flags=0):
        io = StringIO()
        io.write(data)
        buffer = io.getvalue()

        self.__send_lock.acquire()

        try:
            return self.__iowait(self._connection.send, buffer, flags)
        except OpenSSL.SSL.SysCallError as e:
            if e.args[0] == -1 and not data:
                # errors when writing empty strings are expected and can be ignored
                return 0
            raise
        finally:
            self.__send_lock.release()
示例#14
0
 def save_as(self):
     io = EditorIO(self.editor)
     new_filename = io.ask_for_filename()
     if new_filename:
         return io.write(save_as=new_filename), new_filename
     else:
         return False, None
示例#15
0
def write_to_stream(node: Node,
                    io: io.IOBase,
                    indentation=0,
                    indentation_level=INDENTATION_LEVEL):
    if type(node) == Element:
        io.write(f'<{node.name} {attrs(node)}>')
        for child in node.children:
            write_to_stream(child, io, indentation + indentation_level)
        io.write(f'</{node.name}>')
    elif type(node) == SelfClosingElement:
        io.write(f'<{node.name} {attrs(node)}/>')
    else:
        io.write(node.data)
示例#16
0
def extract(url, xpath, fieldnames, absolutize, post, pagekey, pagerange,
            format, output):
    """Extract data with xpath"""
    fields = fieldnames.split(',') if fieldnames else DEFAULT_FIELDS
    data = extract_data_xpath(url, xpath, fieldnames, absolutize, post,
                              pagekey, pagerange)
    if output:
        io = open(output, 'w', encoding='utf8')
    else:
        io = open(sys.stdout.fileno(), mode='w', encoding='utf8', buffering=1)
    if format == 'text':
        writer = csv.DictWriter(io, fieldnames=fields)
        writer.writeheader()
        for item in data:
            writer.writerow(item)
    elif format == 'csv':
        writer = csv.DictWriter(io, fieldnames=fields)
        writer.writeheader()
        for item in data:
            writer.writerow(item)
    elif format == 'json':
        io.write(json.dumps(data, indent=4))
示例#17
0
def gettable(url, nodeid, nodeclass, fieldnames, format, pagekey, pagerange,
             output):
    """Extracts table with data from html"""
    findata = get_table(url, nodeid, nodeclass, fieldnames, pagekey, pagerange)

    if output:
        io = open(output, 'w', encoding='utf8')
    else:
        io = open(sys.stdout.fileno(), mode='w', encoding='utf8', buffering=1)
    if format == 'text':
        writer = csv.writer(io)
        if fieldnames:
            writer.writerow(fieldnames.split(','))
        for item in findata:
            writer.writerow(item)
    elif format == 'csv':
        writer = csv.writer(io)
        if fieldnames:
            writer.writerow(fieldnames.split(','))
        for item in findata:
            writer.writerow(item)
    elif format == 'json':
        io.write(json.dumps(findata, sort_keys=True, indent=4))
    pass
示例#18
0
    def test_utf8_writer(self):
        writer = converters.getwriter('utf-8')
        io = writer(self.io)
        io.write(self.u_japanese + '\n')
        io.seek(0)
        result = io.read().strip()
        tools.eq_(result, self.utf8_japanese)

        io.seek(0)
        io.truncate(0)
        io.write(self.euc_jp_japanese + b'\n')
        io.seek(0)
        result = io.read().strip()
        tools.eq_(result, self.euc_jp_japanese)

        io.seek(0)
        io.truncate(0)
        io.write(self.utf8_japanese + b'\n')
        io.seek(0)
        result = io.read().strip()
        tools.eq_(result, self.utf8_japanese)
    def test_utf8_writer(self):
        writer = converters.getwriter('utf-8')
        io = writer(self.io)
        io.write(self.u_japanese + '\n')
        io.seek(0)
        result = io.read().strip()
        tools.eq_(result, self.utf8_japanese)

        io.seek(0)
        io.truncate(0)
        io.write(self.euc_jp_japanese + b'\n')
        io.seek(0)
        result = io.read().strip()
        tools.eq_(result, self.euc_jp_japanese)

        io.seek(0)
        io.truncate(0)
        io.write(self.utf8_japanese + b'\n')
        io.seek(0)
        result = io.read().strip()
        tools.eq_(result, self.utf8_japanese)
def cp(content, fname): # more write than copy, buuuut...
	log("writing %s"%(fname,), 2)
	write(content, fname)
示例#21
0
    def write(self, io):
        # only need 1 free list entry (until reserved space is supported)

        # free list always has extra blank 4-bytes at end.
        # free list available grows by 10?

        resources = self._build_res_names()

        resources.extend(self._resources)

        index_used = len(resources)
        index_size = 10 + index_used // 10 * 10

        # remove reserved space from the last entry
        ix = len(resources)
        if ix and resources[ix - 1][4]:
            (rid, rtype, attr, data, _) = resources[ix - 1]
            resources[ix - 1] = (rid, rtype, attr, data, 0)

        freelist_used = 1
        for x in resources:
            if x[4]: freelist_used += 1
        freelist_size = 10 + freelist_used // 10 * 10

        extra = freelist_size * 8 + 4 + index_size * 20

        map_size = 32 + extra
        map_offset = 0x8c

        # version, offset to map, sizeof map, 128 bytes (reserved)
        rheader = struct.pack("<III128x", 0, map_offset, map_size)

        # handle:4, flags:2, offset:4, size:4, toindex:2, filenum:2, id:2,
        # indexSize:4, indexUsed:4, flSize:2,flUsed:2,
        rmap = struct.pack("<IHIIHHHIIHH", 0, 0, map_offset, map_size,
                           32 + freelist_size * 8 + 4, 0, 0, index_size,
                           index_used, freelist_size, freelist_used)

        eof = 0x8c + map_size
        fl = []

        index = bytearray()
        for (rtype, rid, attr, data, reserved) in resources:
            # type:2, id:4, offset:4, attr:2, size:4, handle:4
            index += struct.pack("<HIIHII", rtype, rid, eof, attr, len(data),
                                 0)
            eof += len(data)
            if reserved:
                fl.append((eof, reserved))
                eof += reserved

        index += bytes(20 * ((index_size - index_used)))

        fl.append((eof, 0xffffffff - eof))

        fl = self._merge_free_list(fl)

        freelist = bytearray()
        for (offset, size) in fl:
            freelist += struct.pack("<II", offset, size)
        freelist += bytes(8 * (freelist_size - freelist_used) + 4)

        io.write(rheader)
        io.write(rmap)
        io.write(freelist)
        io.write(index)

        for (_, _, attr, data, reserved) in resources:
            io.write(data)
            if reserved: io.write(bytes(reserved))

        return eof
示例#22
0
def getDOS(
    Data           = "test_cases/ni_0300/sqe.pkl",
    MT             = "test_cases/ni_0300/mqe.pkl",
    C_ms           = numpy.arange(0.0,2.0,0.1),
    backgroundFrac =    0.90,
    constantFrac   =    0.00,
    cutoff         =    8.5,
    elasticCutAvg  =    3,
    longE          =   40.0,
    cutRange       =  (1e-20,1e20),
    eStop          =   60.0 ,
    T              =  300.0,
    M              =   58.6934,
    N              =   10,
    Tol            =    1.0e-7,
    maxIter        =  50,
    interactive    = True,
    viewDirectory  = 'tmp',
    outputDir      = 'tmp',
    ):

    """
      This is the `main` function for finding a DOS and a multiphonon/
    multiple scattering correction from experimental scattering.

    user.py contains the user modifiable parameters.
    """

    # *** LJ ***
    # check output dirs
    _checkOutdir( viewDirectory )
    _checkOutdir( outputDir )

    # copy the magic html file
    shutil.copy( os.path.join(paths.data, magic_html ), viewDirectory )
    #shutil.copy(magic_html, viewDirectory)

    if interactive:
        # open a browser
        viewhtml = os.path.abspath( os.path.join( viewDirectory, magic_html ) )
        bthread = BrowserThread( 'file://' + viewhtml )
        bthread.start()

    # record time
    import time
    time1 = time.time()
    # **********


    # --- Prep S(Q,E)for calculation -----------------------------------------------
    sqe = expSqe(Data,T,M,cutRange=cutRange)

    # *** LJ ***
    if not MT: mqe = None
    else: mqe = expSqe(  MT,T,M,cutRange=cutRange)
    # **********

    if mqe: sqe.removeBackground(mqe,backgroundFrac,constantFrac)
    sqe.cropForCalc(cutoff,longE,eStop,elasticCutAvg)
    sqe.norm2one()
    sqe.expand(2.0)
    sqe0 = expSqe(sqe)
    
    sqe.plotSE(viewDirectory) 
    sqe.plotSQE(viewDirectory,lower=1e-30,upper=2.5e-4) 
    sqe.plotMask(viewDirectory) 
    
    # --- Fitting ------------------------------------------------------------------
    C_ms += 1.0  # This is a hack, until the internal rep of C_ms is changed.
    # ------------------------------------------------------------------------------
    res = getCorrectedScatter(sqe,C_ms,N,Tol,maxIter,interactive,vd=viewDirectory)
    sqeCalc,dosCalc,cmsCalc,res,C_ms,lsqSc,lsqMu,lsqSl,LSQ \
                                                           = getBestSol(sqe0,res,C_ms)
    
    dosCalc.plotDOS(viewDirectory)
    
    # --- Output to file and pickle ------------------------------------------------
    cp.dump((sqe0,C_ms,res,lsqSc,lsqMu,lsqSl,LSQ),\
            open( os.path.join( outputDir,"all.pkl") ,'wb'),-1)
    cp.dump((sqe0,sqeCalc,dosCalc,cmsCalc),\
         open( os.path.join( outputDir,"sol.pkl") ,'wb'),-1)
    # *** LJ ***
    saveDOSasHistogram( dosCalc, os.path.join( outputDir, "doshist.pkl") )
    # **********
    
    f = open( os.path.join( outputDir,"C_ms" ),'w')
    f.write( "C_ms = %lf\n" % (C_ms[numpy.argmin( numpy.array(LSQ)**2 )]-1.0) )
    f.close()
    io.write(dosCalc.e,dosCalc.g,         os.path.join( outputDir,"Dos"      ) )
    io.write(dosCalc.e,dosCalc.gz,        os.path.join( outputDir,"Dos.z"    ) )
    io.write(sqe0.e,sqe0.se,              os.path.join( outputDir,"Se.exp"   ) )
    io.write(sqe0.e,nar(nar(sqeCalc)),    os.path.join( outputDir,"Se.clc"   ) )
    io.write(sqe0.e,nar(nar(sqeCalc[1:])),os.path.join( outputDir,"Multi.clc") )
    io.write(sqe0.e,nar(nar(sqeCalc[1:]))/(cmsCalc),\
             os.path.join( outputDir,"Mph.clc"  ) )
    io.write(sqe0.e,(cmsCalc-1.0)*nar(nar(sqeCalc[1:]))/cmsCalc\
             ,os.path.join( outputDir,"Msc.clc"  ) )
    
    # --- `Interactive` Output -----------------------------------------------------
    SQE = expSqe(sqe0.q,sqe0.e,nar(sqeCalc),sqe0.sqerr,sqe0.T,sqe0.M,cutRange=cutRange)
    
    plotComp(sqe0,sqeCalc,viewDirectory)
    plotLSQ(C_ms,lsqSc,lsqMu,lsqSl,LSQ,viewDirectory)
    plotSQE(SQE,viewDirectory,'sqeCalc.png',title='S(Q,E) Calculated',\
            lower=1e-30,upper=2.5e-4) 
    return
示例#23
0
        serial = 1

    key = crypto.PKey()
    key.generate_key(crypto.TYPE_RSA, 4096)
    crt = crypto.X509()
    crt.get_subject().C = args.country or 'UK'
    crt.get_subject().ST = args.state or 'South Wales'
    crt.get_subject().L = args.city or 'Pontypridd'
    crt.get_subject().O = args.company or 'Crossbar.IO'
    crt.get_subject().OU = args.org or 'XBR'
    crt.get_subject().CN = args.host or gethostname()
    crt.set_serial_number(serial)
    crt.gmtime_adj_notBefore(0)
    crt.gmtime_adj_notAfter(7 * 24 * 60 * 60 * (args.weeks or 52))
    crt.set_issuer(crt.get_subject())
    crt.set_pubkey(key)
    crt.sign(key, 'sha1')

    c_str = crypto.dump_certificate(crypto.FILETYPE_PEM, crt)
    k_str = crypto.dump_privatekey(crypto.FILETYPE_PEM, key)

    with open('certs/server_crt.pem', 'w') as io:
        io.write(c_str.decode('utf-8'))

    with open('certs/server_key.pem', 'w') as io:
        io.write(k_str.decode('utf-8'))

    print(
        "Server certificates installed, don't forget to regenerate your client certs!"
    )
示例#24
0
 def Open(_, device):
     io.write(b'Open(%r)\n' % device)
     return True
def cp(content, fname):  # more write than copy, buuuut...
    log("writing %s" % (fname, ), 2)
    write(content, fname)
示例#26
0
 def GetDeviceOSDName(_, destination):
     io.write(b"GetDeviceOSDName(%r)\n" % destination)
     return "Test"
示例#27
0
#for i in range(len(SNQ)):
#    print SNQ[i]
    
SN = []
for i in range(len(SNQ)):
  SN.append( numpy.outer(SNQ[i],ANE[i]) )

SN = numpy.array(SN)
S = nar(SN)
#------------------------------------------------------------------------------

#---- Write to file -----------------------------------------------------------
cp.dump((Q,E,S,S),open("sqe.pkl",'w'))
sum = 0
for i in range(len(ANE)):
  io.write(E,ANE[i],"se."+str(i+1))
io.write(E, nar(S),"se.in")
#

#------------------------------------------------------------------------------

#---- Plot --------------------------------------------------------------------
for i in range(len(ANE)):
  G.replot(Gd(E,ANE[i],with='l lw 5'))

raw_input("Press <Enter> to continue...")
#------------------------------------------------------------------------------

#==============================================================================
# --- Notes ---
#------------------------------------------------------------------------------
示例#28
0
 def RescanActiveDevices(_):
     io.write(b"RescanActiveDevices()\n")
示例#29
0
def prepare_data(argv=None):
    '''Aggregate sequence data GTDB using a file-of-files'''
    import argparse
    import io
    import sys
    import logging
    import h5py
    import pandas as pd

    from skbio import TreeNode

    from hdmf.common import get_hdf5io
    from hdmf.data_utils import DataChunkIterator

    from ..utils import get_faa_path, get_fna_path, get_genomic_path
    from exabiome.sequence.convert import AASeqIterator, DNASeqIterator, DNAVocabIterator, DNAVocabGeneIterator
    from exabiome.sequence.dna_table import AATable, DNATable, SequenceTable, TaxaTable, DeepIndexFile, NewickString, CondensedDistanceMatrix

    parser = argparse.ArgumentParser()
    parser.add_argument(
        'accessions',
        type=str,
        help='file of the NCBI accessions of the genomes to convert')
    parser.add_argument('fadir',
                        type=str,
                        help='directory with NCBI sequence files')
    parser.add_argument('metadata', type=str, help='metadata file from GTDB')
    parser.add_argument('tree', type=str, help='the distances file')
    parser.add_argument('out', type=str, help='output HDF5')
    grp = parser.add_mutually_exclusive_group()
    parser.add_argument('-e',
                        '--emb',
                        type=str,
                        help='embedding file',
                        default=None)
    grp.add_argument('-p',
                     '--protein',
                     action='store_true',
                     default=False,
                     help='get paths for protein files')
    grp.add_argument('-c',
                     '--cds',
                     action='store_true',
                     default=False,
                     help='get paths for CDS files')
    grp.add_argument('-g',
                     '--genomic',
                     action='store_true',
                     default=False,
                     help='get paths for genomic files (default)')
    parser.add_argument('-D',
                        '--dist_h5',
                        type=str,
                        help='the distances file',
                        default=None)
    parser.add_argument(
        '-d',
        '--max_deg',
        type=float,
        default=None,
        help='max number of degenerate characters in protein sequences')
    parser.add_argument('-l',
                        '--min_len',
                        type=float,
                        default=None,
                        help='min length of sequences')
    parser.add_argument('-V',
                        '--vocab',
                        action='store_true',
                        default=False,
                        help='store sequences as vocabulary data')

    if len(sys.argv) == 1:
        parser.print_help()
        sys.exit(1)

    args = parser.parse_args(args=argv)

    if not any([args.protein, args.cds, args.genomic]):
        args.genomic = True

    logging.basicConfig(stream=sys.stdout,
                        level=logging.INFO,
                        format='%(asctime)s - %(message)s')
    logger = logging.getLogger()

    # read accessions
    logger.info('reading accessions %s' % args.accessions)
    with open(args.accessions, 'r') as f:
        taxa_ids = [l[:-1] for l in f.readlines()]

    # get paths to Fasta Files
    fa_path_func = get_genomic_path
    if args.cds:
        fa_path_func = get_fna_path
    elif args.protein:
        fa_path_func = get_faa_path
    fapaths = [fa_path_func(acc, args.fadir) for acc in taxa_ids]

    di_kwargs = dict()
    # if a distance matrix file has been given, read and select relevant distances
    if args.dist_h5:
        #############################
        # read and filter distances
        #############################
        logger.info('reading distances from %s' % args.dist_h5)
        with h5py.File(args.dist_h5, 'r') as f:
            dist = f['distances'][:]
            dist_taxa = f['leaf_names'][:].astype('U')
        logger.info('selecting distances for taxa found in %s' %
                    args.accessions)
        dist = select_distances(taxa_ids, dist_taxa, dist)
        dist = CondensedDistanceMatrix('distances', data=dist)
        di_kwargs['distances'] = dist

    #############################
    # read and filter taxonomies
    #############################
    logger.info('reading taxonomies from %s' % args.metadata)
    taxlevels = [
        'domain', 'phylum', 'class', 'order', 'family', 'genus', 'species'
    ]

    def func(row):
        dat = dict(zip(taxlevels, row['gtdb_taxonomy'].split(';')))
        dat['species'] = dat['species'].split(' ')[1]
        dat['gtdb_genome_representative'] = row['gtdb_genome_representative'][
            3:]
        dat['accession'] = row['accession'][3:]
        return pd.Series(data=dat)

    logger.info('selecting GTDB taxonomy for taxa found in %s' %
                args.accessions)
    taxdf = pd.read_csv(args.metadata, header=0, sep='\t')[['accession', 'gtdb_taxonomy', 'gtdb_genome_representative']]\
                        .apply(func, axis=1)\
                        .set_index('accession')\
                        .filter(items=taxa_ids, axis=0)

    #############################
    # read and filter embeddings
    #############################
    emb = None
    if args.emb is not None:
        logger.info('reading embeddings from %s' % args.emb)
        with h5py.File(args.emb, 'r') as f:
            emb = f['embedding'][:]
            emb_taxa = f['leaf_names'][:]
        logger.info('selecting embeddings for taxa found in %s' %
                    args.accessions)
        emb = select_embeddings(taxa_ids, emb_taxa, emb)

    #############################
    # read and trim tree
    #############################
    logger.info('reading tree from %s' % args.tree)
    root = TreeNode.read(args.tree, format='newick')

    logger.info('transforming leaf names for shearing')
    for tip in root.tips():
        tip.name = tip.name[3:].replace(' ', '_')

    logger.info('shearing taxa not found in %s' % args.accessions)
    rep_ids = taxdf['gtdb_genome_representative'].values
    root = root.shear(rep_ids)

    logger.info('converting tree to Newick string')
    bytes_io = io.BytesIO()
    root.write(bytes_io, format='newick')
    tree_str = bytes_io.getvalue()
    tree = NewickString('tree', data=tree_str)

    if di_kwargs.get('distances') is None:
        from scipy.spatial.distance import squareform
        tt_dmat = root.tip_tip_distances()
        if (rep_ids != taxa_ids).any():
            tt_dmat = get_nonrep_matrix(taxa_ids, rep_ids, tt_dmat)
        dmat = tt_dmat.data
        di_kwargs['distances'] = CondensedDistanceMatrix('distances',
                                                         data=dmat)

    h5path = args.out

    logger.info("reading %d Fasta files" % len(fapaths))
    logger.info("Total size: %d", sum(os.path.getsize(f) for f in fapaths))

    if args.vocab:
        if args.protein:
            SeqTable = SequenceTable
            seqit = AAVocabIterator(fapaths,
                                    logger=logger,
                                    min_seq_len=args.min_len)
        else:
            SeqTable = DNATable
            if args.cds:
                logger.info("reading and writing CDS sequences")
                seqit = DNAVocabGeneIterator(fapaths,
                                             logger=logger,
                                             min_seq_len=args.min_len)
            else:
                seqit = DNAVocabIterator(fapaths,
                                         logger=logger,
                                         min_seq_len=args.min_len)
    else:
        if args.protein:
            logger.info("reading and writing protein sequences")
            seqit = AASeqIterator(fapaths,
                                  logger=logger,
                                  max_degenerate=args.max_deg,
                                  min_seq_len=args.min_len)
            SeqTable = AATable
        else:
            logger.info("reading and writing DNA sequences")
            seqit = DNASeqIterator(fapaths,
                                   logger=logger,
                                   min_seq_len=args.min_len)
            SeqTable = DNATable

    seqit_bsize = 2**25
    if args.protein:
        seqit_bsize = 2**15
    elif args.cds:
        seqit_bsize = 2**18

    # set up DataChunkIterators
    packed = DataChunkIterator.from_iterable(iter(seqit),
                                             maxshape=(None, ),
                                             buffer_size=seqit_bsize,
                                             dtype=np.dtype('uint8'))
    seqindex = DataChunkIterator.from_iterable(seqit.index_iter,
                                               maxshape=(None, ),
                                               buffer_size=2**0,
                                               dtype=np.dtype('int'))
    names = DataChunkIterator.from_iterable(seqit.names_iter,
                                            maxshape=(None, ),
                                            buffer_size=2**0,
                                            dtype=np.dtype('U'))
    ids = DataChunkIterator.from_iterable(seqit.id_iter,
                                          maxshape=(None, ),
                                          buffer_size=2**0,
                                          dtype=np.dtype('int'))
    taxa = DataChunkIterator.from_iterable(seqit.taxon_iter,
                                           maxshape=(None, ),
                                           buffer_size=2**0,
                                           dtype=np.dtype('uint16'))
    seqlens = DataChunkIterator.from_iterable(seqit.seqlens_iter,
                                              maxshape=(None, ),
                                              buffer_size=2**0,
                                              dtype=np.dtype('uint32'))

    io = get_hdf5io(h5path, 'w')

    tt_args = ['taxa_table', 'a table for storing taxa data', taxa_ids]
    tt_kwargs = dict()
    for t in taxlevels[1:]:
        tt_args.append(taxdf[t].values)
    if emb is not None:
        tt_kwargs['embedding'] = emb
    tt_kwargs['rep_taxon_id'] = rep_ids

    taxa_table = TaxaTable(*tt_args, **tt_kwargs)

    seq_table = SeqTable(
        'seq_table',
        'a table storing sequences for computing sequence embedding',
        io.set_dataio(names, compression='gzip', chunks=(2**15, )),
        io.set_dataio(packed,
                      compression='gzip',
                      maxshape=(None, ),
                      chunks=(2**15, )),
        io.set_dataio(seqindex,
                      compression='gzip',
                      maxshape=(None, ),
                      chunks=(2**15, )),
        io.set_dataio(seqlens,
                      compression='gzip',
                      maxshape=(None, ),
                      chunks=(2**15, )),
        io.set_dataio(taxa,
                      compression='gzip',
                      maxshape=(None, ),
                      chunks=(2**15, )),
        taxon_table=taxa_table,
        id=io.set_dataio(ids,
                         compression='gzip',
                         maxshape=(None, ),
                         chunks=(2**15, )))

    difile = DeepIndexFile(seq_table, taxa_table, tree, **di_kwargs)

    io.write(difile, exhaust_dci=False)
    io.close()

    logger.info("reading %s" % (h5path))
    h5size = os.path.getsize(h5path)
    logger.info("HDF5 size: %d", h5size)
示例#30
0
 def Transmit(_, cmd):
     io.write(b"Transmit(dest: 0x%x, src: 0x%x, op: 0x%x, data: <%s>)\n" %
              (cmd.destination, cmd.initiator, cmd.opcode,
               cec_cmd_get_data(cmd).encode('hex')))
     return True
示例#31
0
def convert_header_field(io, header):
    if isinstance(header, list):
        if len(header) == 0:
            io.write(u"[]")
        else:
            io.write(u"\n")
            for item in header:
                io.write(u" - ")
                convert_header_field(io, item)
    elif isinstance(header, str):
        io.write(header)
    elif isinstance(header, bytes):
        try:
            io.write(header.decode('utf-8'))
        except UnicodeDecodeError:
            io.write(binascii.hexlify(header).decode('us-ascii'))
    else:
        io.write(repr(header))