def valid_image(filename, no_strict_mrc=False): ''' Test if the image is valid :Parameters: filename : str Input filename to test no_strict_mrc : bool Perform strict MRC header checking (recommended) - Only EPU MRC files and Yifan's frame alignment require this to be off. :Returns: flag : bool True if image is valid ''' f = util.uopen(filename, 'rb') try: h = read_mrc_header(f, no_strict_mrc) total = file_size(f) dtype = numpy.dtype(mrc2numpy[h['mode'][0]]) return total == (1024 + int(h['nsymbt']) + int(h['nx'][0]) * int(h['ny'][0]) * int(h['nz'][0]) * dtype.itemsize) finally: util.close(filename, f)
def valid_image(filename): ''' Test if the image is valid :Parameters: filename : str Input filename to test :Returns: flag : bool True if image is valid ''' f = util.uopen(filename, 'rb') try: h = read_spider_header(f) h_len = int(h['labbyt']) d_len = int(h['nx']) * int(h['ny']) * int(h['nz']) i_len = d_len * 4 count = count_images(h) if count > 1 or h['istack'] == 2: return file_size(f) == (h_len + count * (h_len + i_len)) else: return file_size(f) == (h_len + count * i_len) finally: util.close(filename, f)
def write_image(filename, img, index=None, header=None, inplace=False): ''' Write an image array to a file in the MRC format :Parameters: filename : str Name of the output file img : array Image array index : int, optional Index to write image in the stack header : dict, optional Dictionary of header values inplace : bool Write new image to stack without removing the stack ''' if header is None and hasattr(img, 'header'): header=img.header mode = 'rb+' if index is not None and (index > 0 or inplace and index > -1) else 'wb+' f = util.uopen(filename, mode) if header is None or not is_format_header(header): header = create_header(img.shape, img.dtype, img.order, header) try: if inplace: f.seek(int(header.itemsize+int(header['extended'])+index*img.ravel().shape[0]*img.dtype.itemsize)) elif f != filename: f.seek(0) header.tofile(f) if index > 0: f.seek(int(header.itemsize+int(header['extended'])+index*img.ravel().shape[0]*img.dtype.itemsize)) img.tofile(f) finally: util.close(filename, f)
def main(args): import codecs from util import ureader, uwriter, uopen def handler(x): v = x.object[x.start:x.end] print >> stderr, repr(v), v return (u'', x.end) codecs.register_error('clear', handler) if '-t' not in args: usage(args) tag = map(string.lower, args[1 + args.index('-t')].split(',')) enc = args[1 + args.index('-e')] if '-e' in args else 'utf8' stdin = ureader(sys.stdin) if '-i' not in args else uopen( args[1 + args.index('-i')]) # stdout = codecs.getwriter(enc)(sys.stdout if '-o' not in args else open(args[1 + args.index('-o')], 'wb'), errors='clear') stdout = codecs.getwriter(enc)( sys.stdout if '-o' not in args else open(args[1 + args.index('-o')], 'wb')) stderr = uwriter(sys.stderr) for l in strip(stdin.read(), keep=tag): try: print >> stdout, l except UnicodeDecodeError: print 'problem with', l
def read_image(filename, index=None, header=None, cache=None): ''' Read an image from the specified file in the WEB format :Parameters: filename : str or file object Filename or open stream for a file index : int, optional Index of image to get, if None, first image (Default: None) header : dict, optional Output dictionary to place header values :Returns: out : array Array with image information from the file ''' idx = 0 if index is None else index f = util.uopen(filename, 'rb') try: h = read_web_header(f) #if header is not None: util.update_header(header, h, web2ara, 'web') if idx >= count_images(h): raise IOError, "Index exceeds number of images in stack: %d < %d"%(idx, count_images(h)) offset, ar_args = array_from_header(h) f.seek(offset + idx * ar_args[1] * ar_args[0].itemsize) out = util.read_image(f, *ar_args) finally: util.close(filename, f) return out
def iter_images(filename, index=None, header=None): ''' Read a set of SPIDER images :Parameters: filename : str or file object Filename or open stream for a file index : int, optional Index of image to start, if None, start with the first image (Default: None) header : dict, optional Output dictionary to place header values :Returns: out : array Array with image information from the file ''' f = util.uopen(filename, 'rb') if index is None: index = 0 try: h = read_web_header(f) #if header is not None: util.update_header(header, h, web2ara, 'web') count = count_images(h) offset, ar_args = array_from_header(h) f.seek(int(offset)) if not hasattr(index, '__iter__'): index = xrange(index, count) else: index = index.astype(numpy.int) for i in index: yield util.read_image(f, *ar_args) finally: util.close(filename, f)
def test_m(): from util import read_seed, N, V lang = 'en' rels = [(u'1', (u'debt', 'n'), (u'kill', 'v')), (u'1', (u'poverty', 'n'), (u'hurl', 'v'))] seeds = read_seed( uopen(env('{SEED_DIR}/{LANG}/{SEEDS}', SEEDS='seeds.ei', LANG=lang))) noun_file, verb_file = cluster[lang] def tag_ext(tag): return lambda words: extended(tagged(words, tag)) nclusters = read_clusters(uopen(noun_file), tag_ext(N)) vclusters = read_clusters(uopen(verb_file), tag_ext(V)) m = MetaphorBuilder(lang, nclusters, vclusters, seeds) pprint(m.find(rels))
def read_web_header(filename, index=None): ''' Read the WEB header :Parameters: filename : str or file object Filename or open stream for a file index : int, ignored Index of image to get the header, if None, the stack header (Default: None) :Returns: out : array Array with header information in the file ''' f = util.uopen(filename, 'rb') m=None try: #curr = f.tell() h = numpy.fromfile(f, dtype=header_dtype, count=1) if not is_readable(h): h = h.byteswap().newbyteorder() if not is_readable(h): raise IOError, "Not an WEB file" if h['extended_ident'] == 'WEBMETADATA': count = h['extended'][0]/metadata_dtype.itemsize if (count*metadata_dtype.itemsize) != h['extended'][0]: _logger.warn("Unable to read metadata - size mismatch: %d *%d = %d != %d"%(count, metadata_dtype.itemsize, (count*metadata_dtype.itemsize), h['extended'][0])) else: m = numpy.fromfile(f, dtype=metadata_dtype, count=count) finally: util.close(filename, f) return h, m
def valid_image(filename): ''' Test if the image is valid :Parameters: filename : str Input filename to test :Returns: flag : bool True if image is valid ''' f = util.uopen(filename, 'rb') try: h = read_spider_header(f) h_len = int(h['labbyt']) d_len = int(h['nx']) * int(h['ny']) * int(h['nz']) i_len = d_len * 4 count = count_images(h) if count > 1 or h['istack'] == 2: return file_size(f) == (h_len + count * (h_len+i_len)) else: return file_size(f) == (h_len + count * i_len) finally: util.close(filename, f)
def read_mrc_header(filename, index=None, no_strict_mrc=False): ''' Read the MRC header :Parameters: filename : str or file object Filename or open stream for a file index : int, ignored Index of image to get the header, if None, the stack header (Default: None) no_strict_mrc : bool Perform strict MRC header checking (recommended) - Only EPU MRC files and Yifan's frame alignment require this to be off. :Returns: out : array Array with header information in the file ''' f = util.uopen(filename, 'rb') try: #curr = f.tell() h = util.fromfile(f, dtype=header_image_dtype, count=1) if not is_readable(h, no_strict_mrc): h = h.newbyteorder() if not is_readable(h, no_strict_mrc): raise IOError, "Not MRC header" finally: util.close(filename, f) return h
def main(args): # 1. Read in the seed file seeds = rseed(uopen(args[1 + args.index('-i')])) # outs = uopen(args[1 + args.index('-o')], mode='w+') for s in seeds: print s pprint(['%s.%s' % (l.name, l.synset.pos) for l in derivations(s.noun)]) pprint(['%s.%s' % (l.name, l.synset.pos) for l in derivations(s.verb)])
def __init__(self, lang, seed_fname, extend_seeds): def tag_ext(pos): return lambda words: extended(tagged(words, pos)) def tag(pos): return lambda words: tagged(words, pos) noun_fn, verb_fn = cluster(lang) with uopen(seed_fname) as lines: seeds = read_seed(l.rstrip().split() for l in lines) op = tag_ext if extend_seeds else tag with uopen(noun_fn) as nlines, uopen(verb_fn) as vlines: nclusters = read_clusters((l.rstrip().split() for l in nlines), op(N)) vclusters = read_clusters((l.rstrip().split() for l in vlines), op(V)) update(self, mbuilder=MetaphorBuilder(lang, nclusters, vclusters, seeds))
def test_1(): parser, base, depn, sentn = sys.argv[1:5] # with uopen(depn) as depf, uopen(sentn) as sentf: with open(depn) as depf, uopen(sentn) as sentf: deps = [l.rstrip() for l in depf] sents = [l.rstrip() for l in sentf] dump(translate(deps, sents, int(base), parser, None, None), uout, ensure_ascii=False, encoding='utf-8', indent=2)
def scatter(lines, name, target_dir, files_per_dir, chunk_size, ext='ss'): lines_per_dir = files_per_dir * chunk_size line_groups = groupby(enumerate(lines), lambda (c, _): int(c / lines_per_dir)) for d, enum_lines in line_groups: dirname = join(target_dir, '%.2x' % d) if not exists(dirname): makedirs(dirname) file_groups = groupby(enum_lines, lambda (c, _): int(c / chunk_size)) for f, enum_lines_in_file in file_groups: target = join(dirname, '%.4x.%s' % (f, ext)) if True: with uopen(target, 'w') as outs: print('writing', target) outs.writelines(l.decode('utf8') for _, l in enum_lines_in_file) else: print(target)
def main(args): if args.debug_meta: entries = args.__dict__ print('Metaphors for language {lang}, seed file {seed_fn}:'.format( **entries)) for n, v in all_metaphors(**entries): print(u'{0[0]}.{0[1]} {1[0]}.{1[1]}'.format(n, v), file=uwriter(sys.stdout)) else: with open_file(args.json_fn) as jsonf: json_out = m4detect(json_in=json.load(fp=jsonf, encoding='utf-8'), **args.__dict__) if args.out_fn == '-': json_dump(obj=json_out, fp=uwriter(sys.stdout)) else: with uopen(args.out_fn, mode='w+b') as out_f: json_dump(obj=json_out, fp=out_f)
def read_spider_header(filename, index=None): ''' Read the SPIDER header :Parameters: filename : str or file object Filename or open stream for a file index : int, optional Index of image to get the header, if None, the stack header (Default: None) :Returns: out : array Array with header information in the file ''' f = util.uopen(filename, 'rb') try: #curr = f.tell() h = numpy.fromfile(f, dtype=header_dtype, count=1) if not is_readable(h): h = h.newbyteorder() if not is_readable(h): raise IOError, "Not a SPIDER file" if index is not None: h_len = int(h['labbyt']) i_len = int(h['nx']) * int(h['ny']) * int(h['nz']) * 4 count = max(int(h['istack']), 1) if index >= count: raise IOError, "Index exceeds number of images in stack: %d < %d" % ( index, count) #offset = index * (h_len+i_len) offset = h_len + index * (h_len + i_len) if int( h['istack']) > 0 else 0 try: f.seek(offset) except: _logger.error("Offset: %s" % str(offset)) raise h = numpy.fromfile(f, dtype=h.dtype, count=1) finally: util.close(filename, f) return h
def read_spider_header(filename, index=None): ''' Read the SPIDER header :Parameters: filename : str or file object Filename or open stream for a file index : int, optional Index of image to get the header, if None, the stack header (Default: None) :Returns: out : array Array with header information in the file ''' f = util.uopen(filename, 'rb') try: #curr = f.tell() h = numpy.fromfile(f, dtype=header_dtype, count=1) if not is_readable(h): h = h.newbyteorder() if not is_readable(h): raise IOError, "Not a SPIDER file" if index is not None: h_len = int(h['labbyt']) i_len = int(h['nx']) * int(h['ny']) * int(h['nz']) * 4 count = max(int(h['istack']), 1) if index >= count: raise IOError, "Index exceeds number of images in stack: %d < %d"%(index, count) #offset = index * (h_len+i_len) offset = h_len + index * (h_len+i_len) if int(h['istack']) > 0 else 0 try: f.seek(offset) except: _logger.error("Offset: %s"%str(offset)) raise h = numpy.fromfile(f, dtype=h.dtype, count=1) finally: util.close(filename, f) return h
def valid_image(filename): ''' Test if the image is valid :Parameters: filename : str Input filename to test :Returns: flag : bool True if image is valid ''' f = util.uopen(filename, 'rb') try: h = read_web_header(f) offset, ar_args = array_from_header(h) return file_size(f) == (offset + h['count'] * ar_args[1] * ar_args[0].itemsize) finally: util.close(filename, f)
def write_image(filename, img, index=None, header=None, inplace=False): ''' Write an image array to a file in the MRC format :Parameters: filename : str Name of the output file img : array Image array index : int, optional Index to write image in the stack header : dict, optional Dictionary of header values inplace : bool Write new image to stack without removing the stack ''' #float64 #complex64 if header is None and hasattr(img, 'header'): header = img.header dtype = numpy.complex64 if numpy.iscomplexobj(img) else numpy.float32 try: img = img.astype(dtype) except: raise TypeError, "Unsupported type for SPIDER writing: %s" % str( img.dtype) mode = 'rb+' if index is not None and ( index > 0 or inplace and index > -1) else 'wb+' try: f = util.uopen(filename, mode) except: _logger.error("Mode: %s - Index: %s" % (str(mode), str(index))) raise try: if header is None or not hasattr( header, 'dtype') or not is_format_header(header): h = numpy.zeros(1, header_dtype) even = header[ 'fourier_even'] if header is not None and 'fourier_even' in header else None util.update_header(h, spi_defaults, ara2spi) header = util.update_header(h, header, ara2spi, 'spi') # Image size in header header['nx'] = img.T.shape[0] header['ny'] = img.T.shape[1] if img.ndim > 1 else 1 header['nz'] = img.T.shape[2] if img.ndim > 2 else 1 header['lenbyt'] = img.shape[0] * 4 header['labrec'] = 1024 / int(header['lenbyt']) if 1024 % int(header['lenbyt']) != 0: header['labrec'] = int(header['labrec']) + 1 header['labbyt'] = int(header['labrec']) * int(header['lenbyt']) imgsize = img.ravel().shape[0] * 4 headsize = int(header['labbyt']) header['irec'] = header['labrec'] + header['nx'] # #header['irec'] if numpy.iscomplexobj(img): header['iform'] = 3 if img.ndim == 3 else 1 # determine even or odd Fourier - assumes other dim are padded appropriately if even is None: v = int(round(float(img.shape[1]) / img.shape[0])) v = img.shape[1] / v even = (v % 2) == 0 if even: header['iform'] = -22 if img.ndim == 3 else -12 else: header['iform'] = -21 if img.ndim == 3 else -11 else: header['iform'] = 3 if img.ndim == 3 else 1 fheader = numpy.zeros(int(header['labbyt']) / 4, dtype=numpy.float32) for name, idx in _header_map.iteritems(): fheader[idx - 1] = float(header[name]) if inplace: f.seek(index * (imgsize + headsize) + headsize + headsize) else: if index is not None: fheader[_header_map['maxim'] - 1] = index + 1 fheader[_header_map['imgnum'] - 1] = index + 1 fheader[_header_map['istack'] - 1] = 2 f.seek(0) fheader.tofile(f) fheader[_header_map['istack'] - 1] = 0 f.seek(index * (imgsize + headsize) + headsize) fheader[_header_map['maxim'] - 1] = 0 fheader.tofile(f) img.tofile(f) finally: util.close(filename, f)
def main(args): lang = args[1 + args.index('-l')] nc, vc = [read_clusters(uopen(fname)) for fname in cluster[lang]] out(nc, 'n') out(vc, 'v')
def write_image(filename, img, index=None, header=None, inplace=False): ''' Write an image array to a file in the MRC format :Parameters: filename : str Name of the output file img : array Image array index : int, optional Index to write image in the stack header : dict, optional Dictionary of header values inplace : bool Write new image to stack without removing the stack ''' #float64 #complex64 if header is None and hasattr(img, 'header'): header=img.header dtype = numpy.complex64 if numpy.iscomplexobj(img) else numpy.float32 try: img = img.astype(dtype) except: raise TypeError, "Unsupported type for SPIDER writing: %s"%str(img.dtype) mode = 'rb+' if index is not None and (index > 0 or inplace and index > -1) else 'wb+' try: f = util.uopen(filename, mode) except: _logger.error("Mode: %s - Index: %s"%(str(mode), str(index))) raise try: if header is None or not hasattr(header, 'dtype') or not is_format_header(header): h = numpy.zeros(1, header_dtype) even = header['fourier_even'] if header is not None and 'fourier_even' in header else None util.update_header(h, spi_defaults, ara2spi) header=util.update_header(h, header, ara2spi, 'spi') # Image size in header header['nx'] = img.T.shape[0] header['ny'] = img.T.shape[1] if img.ndim > 1 else 1 header['nz'] = img.T.shape[2] if img.ndim > 2 else 1 header['lenbyt'] = img.shape[0]*4 header['labrec'] = 1024 / int(header['lenbyt']) if 1024%int(header['lenbyt']) != 0: header['labrec'] = int(header['labrec'])+1 header['labbyt'] = int(header['labrec'] ) * int(header['lenbyt']) imgsize = img.ravel().shape[0]*4 headsize = int(header['labbyt']) header['irec'] = header['labrec']+header['nx'] # #header['irec'] if numpy.iscomplexobj(img): header['iform'] = 3 if img.ndim == 3 else 1 # determine even or odd Fourier - assumes other dim are padded appropriately if even is None: v = int(round(float(img.shape[1])/img.shape[0])) v = img.shape[1]/v even = (v%2)==0 if even: header['iform'] = -22 if img.ndim == 3 else -12 else: header['iform'] = -21 if img.ndim == 3 else -11 else: header['iform'] = 3 if img.ndim == 3 else 1 fheader = numpy.zeros(int(header['labbyt'])/4, dtype=numpy.float32) for name, idx in _header_map.iteritems(): fheader[idx-1]=float(header[name]) if inplace: f.seek(index * (imgsize + headsize)+headsize+headsize) else: if index is not None: fheader[_header_map['maxim']-1] = index+1 fheader[_header_map['imgnum']-1] = index+1 fheader[_header_map['istack']-1] = 2 f.seek(0) fheader.tofile(f) fheader[_header_map['istack']-1] = 0 f.seek(index * (imgsize + headsize)+headsize) fheader[_header_map['maxim']-1] = 0 fheader.tofile(f) img.tofile(f) finally: util.close(filename, f)
def iter_images(filename, index=None, header=None, no_strict_mrc=False): ''' Read a set of SPIDER images :Parameters: filename : str or file object Filename or open stream for a file index : int, optional Index of image to start, if None, start with the first image (Default: None) header : dict, optional Output dictionary to place header values no_strict_mrc : bool Perform strict MRC header checking (recommended) - Only EPU MRC files and Yifan's frame alignment require this to be off. :Returns: out : array Array with image information from the file ''' f = util.uopen(filename, 'rb') if index is None: index = 0 try: h = read_mrc_header(f, no_strict_mrc) count = count_images(h) #if header is not None: util.update_header(header, h, mrc2ara, 'mrc') tmp = read_header(h) if header is not None: header.update(tmp) d_len = h['nx'][0] * h['ny'][0] dtype = numpy.dtype(mrc2numpy[h['mode'][0]]) offset = 1024 + int(h['nsymbt']) + 0 * d_len * dtype.itemsize try: f.seek(int(offset)) except: _logger.error("%s -- %s" % (str(offset), str(offset.__class__.__name__))) raise if not hasattr(index, '__iter__'): index = xrange(index, count) else: index = index.astype(numpy.int) last = 0 total = file_size(f) if total != (1024 + int(h['nsymbt']) + int(h['nx'][0]) * int(h['ny'][0]) * int(h['nz'][0]) * dtype.itemsize): raise util.InvalidHeaderException, "file size != header: %d != %d -- %d" % ( total, (1024 + int(h['nsymbt']) + int(h['nx'][0]) * int(h['ny'][0]) * int(h['nz'][0]) * dtype.itemsize), int(h['nsymbt'])) for i in index: if i != (last + 1): f.seek( int(1024 + int(h['nsymbt']) + i * d_len * dtype.itemsize)) out = util.fromfile(f, dtype=dtype, count=d_len) out = reshape_data(out, h, index, count) if header_image_dtype.newbyteorder()[0] == h.dtype[0]: out = out.byteswap() yield out finally: util.close(filename, f)
def read_image(filename, index=None, header=None): ''' Read an image from the specified file in the SPIDER format :Parameters: filename : str or file object Filename or open stream for a file index : int, optional Index of image to get, if None, first image (Default: None) header : dict, optional Dictionary to hold header values :Returns: out : array Array with image information from the file ''' f = util.uopen(filename, 'rb') h = None try: if index is None: index = 0 h = read_spider_header(f) dtype = numpy.dtype(spi2numpy[float(h['iform'])]) #if header_dtype.newbyteorder()==h.dtype: dtype = dtype.newbyteorder() # - changed #if header is not None: util.update_header(header, h, spi2ara, 'spi') tmp = read_header(h) if header is not None: header.update(tmp) h_len = int(h['labbyt']) d_len = int(h['nx']) * int(h['ny']) * int(h['nz']) i_len = d_len * 4 count = count_images(h) if index >= count: raise IOError, "Index exceeds number of images in stack: %d < %d" % ( index, count) if count > 1 and int(h['istack']) == 0: raise ValueError, "Improperly formatted SPIDER header - not stack but contains mutliple images" offset = h_len * 2 + index * (h_len + i_len) if int( h['istack']) > 0 else h_len if count > 1 or h['istack'] == 2: if file_size(f) != (h_len + count * (h_len + i_len)): raise ValueError, "file size != header: %d != %d - count: %d -- nx:%d,ny:%d,nz:%d" % ( file_size(f), (h_len + count * (h_len + i_len)), count, int(h['nx']), int(h['ny']), int(h['nz'])) else: if file_size(f) != (h_len + count * i_len): f.seek(h_len + index * (h_len + i_len)) h2 = read_spider_header(f) raise ValueError, "file size != header: %d != %d - %d + %d * %d -- %d,%d == %d,%d -- count: " % ( file_size(f), (h_len + count * i_len), h_len, count, i_len, int(h['istack']), int(h['imgnum']), int( h2['istack']), int(h2['imgnum']), int(h['maxim'])) try: f.seek(offset) except: _logger.error("Offset: %s" % str(offset)) raise out = numpy.fromfile(f, dtype=dtype, count=d_len) if header_dtype.newbyteorder()[0] == h.dtype[0]: out = out.byteswap() #assert(out.ravel().shape[0]==d_len) if int(h['nz']) > 1: out = out.reshape(int(h['nz']), int(h['ny']), int(h['nx'])) elif int(h['ny']) > 1: try: out = out.reshape(int(h['ny']), int(h['nx'])) except: _logger.error("%d != %d*%d = %d" % (out.ravel().shape[0], int(h['nx']), int( h['ny']), int(h['nx']) * int(h['ny']))) raise finally: util.close(filename, f) #if header_image_dtype.newbyteorder()==h.dtype:out = out.byteswap() return out
def open_file(fn): _, ext = splitext(fn) return ureader(gzip.open(fn)) if ext == '.gz' else uopen(fn)
def sentence(sent_id, files=512, chunk=4096): fn, pos = divmod(sent_id - 1, chunk) dn = fn // files with uopen(path.join('%.2x' % dn, '%.4x.ss' % fn)) as f: return islice(f, pos, pos + 1).next()
def main(indexname, verb, noun): with uopen(indexname, 'r') as stream: for i, (_, sentence) in enumerate(find((verb, noun), stream)): print(i + 1, sentence)
return (l.decode('utf8').rstrip().split(sep) for l in f) if decode else (l.rstrip().split(sep) for l in f) from os import path from itertools import islice def sentence(sent_id, files=512, chunk=4096): fn, pos = divmod(sent_id - 1, chunk) dn = fn // files with uopen(path.join('%.2x' % dn, '%.4x.ss' % fn)) as f: return islice(f, pos, pos + 1).next() if __name__ == '__main__': args = sys.argv[1:] multiple = '-m' in args if not multiple: main(*args) else: index, vs, vo = args[1:5] r = dict(vo=u'1-компл', vs=u'предик') outs = uwriter(sys.stdout) with uopen(vs) as vss, uopen(vo) as vos, open(index) as indexs: relations = read(split(vss), r['vs']) | read(split(vos), r['vo']) # for k, n, v in relations: # print(k, n, v, sep=u'\t', file=uwriter(sys.stderr)) findm(split(indexs, decode=True, sep=u'\t'), relations, outs)
def read_image(filename, index=None, header=None): ''' Read an image from the specified file in the SPIDER format :Parameters: filename : str or file object Filename or open stream for a file index : int, optional Index of image to get, if None, first image (Default: None) header : dict, optional Dictionary to hold header values :Returns: out : array Array with image information from the file ''' f = util.uopen(filename, 'rb') h = None try: if index is None: index = 0 h = read_spider_header(f) dtype = numpy.dtype(spi2numpy[float(h['iform'])]) #if header_dtype.newbyteorder()==h.dtype: dtype = dtype.newbyteorder() # - changed #if header is not None: util.update_header(header, h, spi2ara, 'spi') tmp=read_header(h) if header is not None: header.update(tmp) h_len = int(h['labbyt']) d_len = int(h['nx']) * int(h['ny']) * int(h['nz']) i_len = d_len * 4 count = count_images(h) if index >= count: raise IOError, "Index exceeds number of images in stack: %d < %d"%(index, count) if count > 1 and int(h['istack']) == 0: raise ValueError, "Improperly formatted SPIDER header - not stack but contains mutliple images" offset = h_len*2 + index * (h_len+i_len) if int(h['istack']) > 0 else h_len if count > 1 or h['istack'] == 2: if file_size(f) != (h_len + count * (h_len+i_len)): raise ValueError, "file size != header: %d != %d - count: %d -- nx:%d,ny:%d,nz:%d"%(file_size(f), (h_len + count * (h_len+i_len)), count, int(h['nx']), int(h['ny']), int(h['nz'])) else: if file_size(f) != (h_len + count * i_len): f.seek(h_len + index * (h_len+i_len)) h2 = read_spider_header(f) raise ValueError, "file size != header: %d != %d - %d + %d * %d -- %d,%d == %d,%d -- count: "%(file_size(f), (h_len + count * i_len), h_len, count, i_len, int(h['istack']), int(h['imgnum']), int(h2['istack']), int(h2['imgnum']), int(h['maxim']) ) try: f.seek(offset) except: _logger.error("Offset: %s"%str(offset)) raise out = numpy.fromfile(f, dtype=dtype, count=d_len) if header_dtype.newbyteorder()[0]==h.dtype[0]: out = out.byteswap() #assert(out.ravel().shape[0]==d_len) if int(h['nz']) > 1: out = out.reshape(int(h['nz']), int(h['ny']), int(h['nx'])) elif int(h['ny']) > 1: try: out = out.reshape(int(h['ny']), int(h['nx'])) except: _logger.error("%d != %d*%d = %d"%(out.ravel().shape[0], int(h['nx']), int(h['ny']), int(h['nx'])*int(h['ny']))) raise finally: util.close(filename, f) #if header_image_dtype.newbyteorder()==h.dtype:out = out.byteswap() return out
def iter_images(filename, index=None, header=None): ''' Read a set of SPIDER images :Parameters: filename : str or file object Filename or open stream for a file index : int, optional Index of image to start, if None, start with the first image (Default: None) header : dict, optional Dictionary to hold header values :Returns: out : array Array with image information from the file ''' f = util.uopen(filename, 'rb') if index is None: index = 0 try: h = read_spider_header(f) dtype = numpy.dtype(spi2numpy[float(h['iform'])]) #if header_dtype.newbyteorder()==h.dtype: dtype = dtype.newbyteorder() #if header is not None: util.update_header(header, h, spi2ara, 'spi') tmp=read_header(h) if header is not None: header.update(tmp) h_len = int(h['labbyt']) d_len = int(h['nx']) * int(h['ny']) * int(h['nz']) i_len = d_len * 4 count = count_images(h) if numpy.any(index >= count): raise IOError, "Index exceeds number of images in stack: %s < %d"%(str(index), count) #offset = h_len + 0 * (h_len+i_len) size = ( h_len + count * (h_len+i_len) ) if int(h['istack']) > 0 else (h_len + i_len) if file_size(f) != size: raise ValueError, "file size != header: %d != %d - %d -- %d,%d,%d"%(file_size(f), (h_len + count * (h_len+i_len)), count, int(h['nx']), int(h['ny']), int(h['nz'])) try: f.seek(h_len) except: _logger.error("Offset: %s"%str(h_len)) raise if int(h['istack']) == 0: # This file contains a single image! out = numpy.fromfile(f, dtype=dtype, count=d_len) if header_dtype.newbyteorder()[0]==h.dtype[0]: out = out.byteswap() if int(h['nz']) > 1: out = out.reshape(int(h['nz']), int(h['ny']), int(h['nx'])) elif int(h['ny']) > 1: out = out.reshape(int(h['ny']), int(h['nx'])) yield out return if not hasattr(index, '__iter__'): index = xrange(index, count) else: index = index.astype(numpy.int) last=-1 if count > 1 and int(h['istack']) == 0: raise ValueError, "Improperly formatted SPIDER header - not stack but contains mutliple images" for i in index: if i < 0: raise ValueError, "Cannot have a negative index" if i != (last+1): offset = h_len*2 + i * (h_len+i_len) try: f.seek(int(offset)) except: _logger.error("Offset: %s"%str(offset)) _logger.error("i: %s"%str(i)) raise else: f.seek(h_len, 1) last=i out = numpy.fromfile(f, dtype=dtype, count=d_len) if header_dtype.newbyteorder()[0]==h.dtype[0]: out = out.byteswap() if int(h['nz']) > 1: try: out = out.reshape(int(h['nz']), int(h['ny']), int(h['nx'])) except: _logger.error("%d, %d, %d == %d == %d", int(h['nz']), int(h['ny']), int(h['nx']), numpy.prod((int(h['nz']), int(h['ny']), int(h['nx']))), out.shape[0]) raise elif int(h['ny']) > 1: try: out = out.reshape(int(h['ny']), int(h['nx'])) except: _logger.error("(%d < %d) -- %d, %d == %d == %d", i, count, int(h['ny']), int(h['nx']), numpy.prod((int(h['ny']), int(h['nx']))), out.shape[0]) raise yield out finally: util.close(filename, f)
def read_image(filename, index=None, header=None, cache=None, no_strict_mrc=False, force_volume=False): ''' Read an image from the specified file in the MRC format :Parameters: filename : str or file object Filename or open stream for a file index : int, optional Index of image to get, if None, first image (Default: None) header : dict, optional Output dictionary to place header values no_strict_mrc : bool Perform strict MRC header checking (recommended) - Only EPU MRC files and Yifan's frame alignment require this to be off. force_volume : bool For image to be read as a volume :Returns: out : array Array with image information from the file ''' idx = 0 if index is None else index f = util.uopen(filename, 'rb') try: h = read_mrc_header(f, no_strict_mrc) #if header is not None: util.update_header(header, h, mrc2ara, 'mrc') tmp = read_header(h, force_volume=force_volume) if header is not None: header.update(tmp) count = count_images(h) if idx >= count: raise IOError, "Index exceeds number of images in stack: %d < %d" % ( idx, count) if index is None and (count == h['nx'][0] or force_volume): d_len = h['nx'][0] * h['ny'][0] * h['nz'][0] else: d_len = h['nx'][0] * h['ny'][0] dtype = numpy.dtype(mrc2numpy[h['mode'][0]]) offset = 1024 + int(h['nsymbt']) + idx * d_len * dtype.itemsize total = file_size(f) if total != (1024 + int(h['nsymbt']) + int(h['nx'][0]) * int(h['ny'][0]) * int(h['nz'][0]) * dtype.itemsize): raise util.InvalidHeaderException, "file size != header: %d != %d -- %s, %d" % ( total, (1024 + int(h['nsymbt']) + int(h['nx'][0]) * int(h['ny'][0]) * int(h['nz'][0]) * dtype.itemsize), str(idx), int(h['nsymbt'])) f.seek(int(offset)) out = util.fromfile(f, dtype=dtype, count=d_len) out = reshape_data(out, h, index, count, force_volume) if header_image_dtype.newbyteorder()[0] == h.dtype[0]: out = out.byteswap() finally: util.close(filename, f) #assert(numpy.alltrue(numpy.logical_not(numpy.isnan(out)))) #if header_image_dtype.newbyteorder()==h.dtype:out = out.byteswap() return out
def write_image(filename, img, index=None, header=None, inplace=False): ''' Write an image array to a file in the MRC format :Parameters: filename : str Name of the output file img : array Image array index : int, optional Index to write image in the stack header : dict, optional Dictionary of header values inplace : bool Write new image to stack without removing the stack ''' if header is None and hasattr(img, 'header'): header = img.header try: img = img.astype(mrc2numpy[numpy2mrc[img.dtype.type]]) except: raise TypeError, "Unsupported type for MRC writing: %s" % str( img.dtype) mode = 'rb+' if index is not None and ( index > 0 or inplace and index > -1) else 'wb+' f = util.uopen(filename, mode) if header is None or not hasattr(header, 'dtype') or not is_format_header(header): h = numpy.zeros(1, header_image_dtype) util.update_header(h, mrc_defaults, ara2mrc) pix = header.get('apix', 1.0) if header is not None else 1.0 header = util.update_header(h, header, ara2mrc, 'mrc') header['nx'] = img.T.shape[0] header['ny'] = img.T.shape[1] if img.ndim > 1 else 1 if header['nz'] == 0: header['nz'] = img.shape[2] if img.ndim > 2 else 1 header['mode'] = numpy2mrc[img.dtype.type] header['mx'] = header['nx'] header['my'] = header['ny'] header['mz'] = header['nz'] header['xlen'] = header['nx'] * pix header['ylen'] = header['ny'] * pix header['zlen'] = header['nz'] * pix header['alpha'] = 90 header['beta'] = 90 header['gamma'] = 90 header['mapc'] = 1 header['mapr'] = 2 header['maps'] = 3 header['amin'] = numpy.min(img) header['amax'] = numpy.max(img) header['amean'] = numpy.mean(img) header['map'] = 'MAP' header['byteorder'] = byteorderint2[sys.byteorder] #'DA\x00\x00' header['nlabels'] = 1 header['label0'] = 'Created by Arachnid' #header['byteorder'] = numpy.fromstring('\x44\x41\x00\x00', dtype=header['byteorder'].dtype) #header['rms'] = numpy.std(img) if img.ndim == 3: header['nxstart'] = header['nx'] / -2 header['nystart'] = header['ny'] / -2 header['nzstart'] = header['nz'] / -2 if index is not None: stack_count = index + 1 header['nz'] = stack_count header['mz'] = stack_count header['zlen'] = stack_count #header['zorigin'] = stack_count/2.0 try: if inplace: f.seek( int(1024 + int(h['nsymbt']) + index * img.ravel().shape[0] * img.dtype.itemsize)) elif f != filename: f.seek(0) header.tofile(f) if index > 0: f.seek( int(1024 + int(h['nsymbt']) + index * img.ravel().shape[0] * img.dtype.itemsize)) img.tofile(f) finally: util.close(filename, f)
def openf(fn): return uopen(fn) if fn != '-' else ureader(sys.stdin)
from __future__ import print_function """A replacement for ~katia/JuneSystem/June.sh @author [email protected] """ from util import uopen, Environment expand = Environment(BASE='/n/shokuji/dc/katia', BNC='/u/metanet/corpolexica/EN/bnc-relations', ADJ='{BNC}/AdverbialModifierForAdjRels.txt-uniqed-sorted', DOBJ='{BNC}/DirectObjRels.txt-uniqed-sorted', SUBJ='{BNC}/SubjectRels.txt-uniqed-sorted', IOBJ='{BNC}/IndirectObjRels.txt-underscore-uniqed-sorted') _SCOREF = expand('{BASE}/MRC_Conc_All') score = dict(l.rstrip().split() for l in uopen(_SCOREF)) def concreteness(target): return score[target] def main(relation, target, source): pass
def iter_images(filename, index=None, header=None): ''' Read a set of SPIDER images :Parameters: filename : str or file object Filename or open stream for a file index : int, optional Index of image to start, if None, start with the first image (Default: None) header : dict, optional Dictionary to hold header values :Returns: out : array Array with image information from the file ''' f = util.uopen(filename, 'rb') if index is None: index = 0 try: h = read_spider_header(f) dtype = numpy.dtype(spi2numpy[float(h['iform'])]) #if header_dtype.newbyteorder()==h.dtype: dtype = dtype.newbyteorder() #if header is not None: util.update_header(header, h, spi2ara, 'spi') tmp = read_header(h) if header is not None: header.update(tmp) h_len = int(h['labbyt']) d_len = int(h['nx']) * int(h['ny']) * int(h['nz']) i_len = d_len * 4 count = count_images(h) if numpy.any(index >= count): raise IOError, "Index exceeds number of images in stack: %s < %d" % ( str(index), count) #offset = h_len + 0 * (h_len+i_len) size = (h_len + count * (h_len + i_len)) if int(h['istack']) > 0 else (h_len + i_len) if file_size(f) != size: raise ValueError, "file size != header: %d != %d - %d -- %d,%d,%d" % ( file_size(f), (h_len + count * (h_len + i_len)), count, int(h['nx']), int(h['ny']), int(h['nz'])) try: f.seek(h_len) except: _logger.error("Offset: %s" % str(h_len)) raise if int(h['istack']) == 0: # This file contains a single image! out = numpy.fromfile(f, dtype=dtype, count=d_len) if header_dtype.newbyteorder()[0] == h.dtype[0]: out = out.byteswap() if int(h['nz']) > 1: out = out.reshape(int(h['nz']), int(h['ny']), int(h['nx'])) elif int(h['ny']) > 1: out = out.reshape(int(h['ny']), int(h['nx'])) yield out return if not hasattr(index, '__iter__'): index = xrange(index, count) else: index = index.astype(numpy.int) last = -1 if count > 1 and int(h['istack']) == 0: raise ValueError, "Improperly formatted SPIDER header - not stack but contains mutliple images" for i in index: if i < 0: raise ValueError, "Cannot have a negative index" if i != (last + 1): offset = h_len * 2 + i * (h_len + i_len) try: f.seek(int(offset)) except: _logger.error("Offset: %s" % str(offset)) _logger.error("i: %s" % str(i)) raise else: f.seek(h_len, 1) last = i out = numpy.fromfile(f, dtype=dtype, count=d_len) if header_dtype.newbyteorder()[0] == h.dtype[0]: out = out.byteswap() if int(h['nz']) > 1: try: out = out.reshape(int(h['nz']), int(h['ny']), int(h['nx'])) except: _logger.error( "%d, %d, %d == %d == %d", int(h['nz']), int(h['ny']), int(h['nx']), numpy.prod((int(h['nz']), int(h['ny']), int(h['nx']))), out.shape[0]) raise elif int(h['ny']) > 1: try: out = out.reshape(int(h['ny']), int(h['nx'])) except: _logger.error("(%d < %d) -- %d, %d == %d == %d", i, count, int(h['ny']), int(h['nx']), numpy.prod((int(h['ny']), int(h['nx']))), out.shape[0]) raise yield out finally: util.close(filename, f)