Пример #1
1
def compute_pca(data_path=os.path.join(BASE_DIR, 'data/memmap/'),
                  out_path=os.path.join(BASE_DIR, 'data/'),
                  batch_size=500, image_size=3*300*300):

    ipca = IncrementalPCA(n_components=3, batch_size=batch_size)

    path = os.path.join(data_path, 'tn_x.dat')
    train = np.memmap(path, dtype=theano.config.floatX, mode='r+', shape=(4044,image_size))
    n_samples, _ = train.shape

    for batch_num, batch in enumerate(gen_batches(n_samples, batch_size)):
        X = train[batch,:]
        X = np.reshape(X, (X.shape[0], 3, int(image_size/3)))
        X = X.transpose(0, 2, 1)
        X = np.reshape(X, (reduce(np.multiply, X.shape[:2]), 3))
        ipca.partial_fit(X)

    path = os.path.join(data_path, 'v_x.dat')
    valid = np.memmap(path, dtype=theano.config.floatX, mode='r+', shape=(500,image_size))
    n_samples, _ = valid.shape


    for batch_num, batch in enumerate(gen_batches(n_samples, batch_size)):
        X = valid[batch,:]
        X = np.reshape(X, (X.shape[0], 3, int(image_size/3)))
        X = X.transpose(0, 2, 1)
        X = np.reshape(X, (reduce(np.multiply, X.shape[:2]), 3))
        ipca.partial_fit(X)

    eigenvalues, eigenvectors = np.linalg.eig(ipca.get_covariance())
    eigenvalues.astype('float32').dump(os.path.join(out_path, 'eigenvalues.dat'))
    eigenvectors.astype('float32').dump(os.path.join(out_path, 'eigenvectors.dat'))
Пример #2
0
    def as_numpy_array(self, projection=['Smile', 'Trackerfail'], valid_only=True, train_proportion=0.8):
        tmp_train_images = os.path.join(self.cache_dir, self.TMP_TRAIN_IMAGES)
        tmp_train_labels = os.path.join(self.cache_dir, self.TMP_TRAIN_LABELS)
        tmp_test_images = os.path.join(self.cache_dir, self.TMP_TEST_IMAGES)
        tmp_test_labels = os.path.join(self.cache_dir, self.TMP_TEST_LABELS)
        if all([os.path.exists(f) for f in [tmp_train_images, tmp_train_labels, tmp_test_images, tmp_test_labels]]):
            X_train_memmap = np.memmap(tmp_train_images).reshape((-1, 64, 64, self.c_dim))
            y_train_memmap = np.memmap(tmp_train_labels).reshape((-1, len(projection) - 1))
            X_test_memmap = np.memmap(tmp_test_images).reshape((-1, 64, 64, self.c_dim))
            y_test_memmap = np.memmap(tmp_test_labels).reshape((-1, len(projection) - 1))
        else:
            entities = self.find_data_intersection()
            test_size = int(entities.shape[0] * (1 - train_proportion))
            test_idxs = random.sample(range(entities.shape[0]), test_size)
            count = 0
            train = []
            test = []
            for i, e in entities.iterrows():
                entity = Entity(e.video, e.au_label, e.landmarks, self.cache_dir)
                entity_records = entity.frames(projection=projection, valid_only=valid_only)
                if i in test_idxs:
                    test += entity_records
                else:
                    train += entity_records
                self.logger.info('Finished video %d/%d' % (count + 1, len(entities)))
                count += 1

            X_train_memmap, y_train_memmap = self.__list_to_array(tmp_train_images, tmp_train_labels, train)
            if test_idxs:
                X_test_memmap, y_test_memmap = self.__list_to_array(tmp_test_images, tmp_test_labels, test)

        p = np.random.permutation(X_train_memmap.shape[0])
        return X_train_memmap[p], y_train_memmap[p], X_test_memmap, y_test_memmap
Пример #3
0
 def __init__(self,fname):
     #dump to binary
     fndata=fname;
     if (not os.path.isfile(fndata)):
         fndatain=fndata.replace('/bin/','/');
         datain=Epix100a(fndatain);
         #write header
         binheader=np.zeros(16).astype(np.uint32);
         binheader[0:6]=[datain.nframes, datain.my*datain.mx, datain.my, datain.mx, datain.nblocks, datain.nbcols];
         binheader.tofile(fndata);    
         #write data
         dataout=np.memmap(fndata,dtype=np.int16,mode='r+', shape=(datain.nframes,datain.my,datain.mx),offset=64);
         t0=time.clock();
         for iframe in range(datain.nframes):
             dataout[iframe]=datain.frames(iframe);
             if (iframe%100==0):
                 #progress(iframe,nframes,iframe);
                 print str(iframe)+' - '+str(1000*(time.clock()-t0)/(iframe+1))+' ms. average frame: '+str(np.mean(datain.frames(iframe)));
         dataout.flush();
         
         del dataout;
         del datain;
     #get nr of frames
     data=np.memmap(fndata,dtype=np.uint32,mode='r',shape=((64)),offset=0); 
     self.nframes=data[0]; self.nframesize=data[1]; self.my=data[2]; self.mx=data[3]; self.nblocks=data[4]; self.nbcols=data[5];
     self.data=np.memmap(fndata,dtype=np.int16,mode='c',shape=(self.nframes,self.my,self.mx),offset=64);
Пример #4
0
    def linear_regression_2(self):
        '''Run a linear regression and save the output of that regression as new X features'''
        logging.info('Beginning Creation of new files based on Linear Regression model.')
        x_2 = self.X[:,1:] ** 2
        y_2 = self.X_submit[:,1:] ** 2
        
        self.reset_data()
        self.clean_data()
        kg.split_data(.3, .0001, 100, 100)
        lr = linear_model.LinearRegression()        
        self.__fit(lr,lr.fit) 
        self.__score_cv(lr,lr.predict)        
        x_pred = lr.predict(self.X[:,1:])
        y_pred = lr.predict(self.X_submit[:,1:])
        self.reset_data()
        
        X_new = np.hstack((self.X,x_pred))
        Y_new = np.hstack((self.X_submit, y_pred))
        
        X_new = np.hstack((X_new,x_2))
        Y_new = np.hstack((Y_new, y_2))

        X_new = np.hstack((X_new,self.Y[:,1].reshape(-1,1)))
        
        logging.info('New X shape is %s' %(str(X_new.shape)))
        logging.info('New Y shape is %s' %(str(Y_new.shape)))
        mm = np.memmap('mm.x_with_linear.csv', dtype='float32', mode='w+',shape=X_new.shape)
        mm[:] = X_new[:]
        del mm
        mm = np.memmap('mm.y_with_linear.csv', dtype='float32', mode='w+',shape=Y_new.shape)
        mm[:] = Y_new[:]
        del mm
        logging.info('Completed creating new files based on Linear Regression model. Remember to update X_ROW values.')
Пример #5
0
def convert(in_name, out_name):
    """convert the file identified by filename in_name to a complex numpy array and store it to a file named out_name"""
    wav = wave.open(in_name,'rb')
    verifyfileformat(wav)

    length = wav.getnframes()
    channels = wav.getnchannels()

    logging.info('length: {} frames, channels: {}'.format(length, channels))
    wav.close()
   
    # now that we know the format is valid, access data directly
    npinfile = np.memmap(in_name, dtype=np.int16, mode='r', offset=44) 
    if npinfile.shape[0]/2 != length:
        raise TypeError('frame mismatch in direct access')

    # our output file, this will be an npy binary holding complex64 types
    npfile = np.memmap(out_name, dtype=np.complex64,
                       mode='w+',
                       shape=(length,))

    # convert input to complex output
    npfile[:] = npinfile[0::2] + 1j * npinfile[1::2]
    
    # cleanup
    del npinfile
    del npfile
Пример #6
0
def test_score_memmap():
    # Ensure a scalar score of memmap type is accepted
    iris = load_iris()
    X, y = iris.data, iris.target
    clf = MockClassifier()
    tf = tempfile.NamedTemporaryFile(mode='wb', delete=False)
    tf.write(b'Hello world!!!!!')
    tf.close()
    scores = np.memmap(tf.name, dtype=np.float64)
    score = np.memmap(tf.name, shape=(), mode='r', dtype=np.float64)
    try:
        cross_val_score(clf, X, y, scoring=lambda est, X, y: score)
        # non-scalar should still fail
        assert_raises(ValueError, cross_val_score, clf, X, y,
                      scoring=lambda est, X, y: scores)
    finally:
        # Best effort to release the mmap file handles before deleting the
        # backing file under Windows
        scores, score = None, None
        for _ in range(3):
            try:
                os.unlink(tf.name)
                break
            except WindowsError:
                sleep(1.)
Пример #7
0
def compare_binary(pattern,testname):
    
    files = glob.glob(pattern)
    
    data1 = np.memmap(files[0],dtype=np.complex128)
    data2 = np.memmap(files[1],dtype=np.complex128)
    
    diff = data1 - data2
    diff_real = np.abs(np.real(diff))
    diff_imag = np.abs(np.imag(diff))
    diff_abs = np.abs(data1) - np.abs(data2)
    diff_phase = np.angle(data1) - np.angle(data2)
    diff_phase[diff_phase > math.pi] -= (2*math.pi)
    diff_phase = np.abs(diff_phase)
    
    
    if np.max(diff_real) < 1e-15 and np.max(diff_imag) < 1e-15:
        print('TEST {}:\tOK'.format(testname))
    else:
        print('TEST {}:\tFAILED'.format(testname))
        print('differences between {} and {}'.format(files[0],files[1]))
        print('max difference real part:\t',np.max(diff_real ))
        print('max difference imag part:\t',np.max(diff_imag ))
        print('max difference modulus:\t\t',np.max(diff_abs  ))
        print('max difference phase:\t\t',  np.max(diff_phase))
    print()
Пример #8
0
def read_ply(ply_filename):
    vfile = tempfile.mktemp()
    ffile = tempfile.mktemp()
    reader = ply_reader.PlyReader(ply_filename)
    
    v_id = 0
    f_id = 0

    # Reading the header
    for evt, data in reader.read():
        if evt == ply_reader.EVENT_HEADER:
            n_vertices, n_faces = data
            vertices = np.memmap(vfile, dtype='float64', shape = (n_vertices,3),
                                mode='w+')
            faces = np.memmap(ffile, dtype='int64', shape = (n_faces,3),
                              mode='w+')
            break

    # Reading the vertices and faces
    for evt, data in reader.read():
        if evt == ply_reader.EVENT_VERTEX:
            current_vertex = data
            vertices[v_id] = current_vertex
            v_id += 1

        elif evt == ply_reader.EVENT_FACE:
            faces[f_id] = data
            f_id += 1

    return vertices, faces
Пример #9
0
    def load_vectors(self):
        """ Loads the appropriate word vector from each corpus in self.corpora

        """
        for corp in self.corpora:
            rows = pd.read_pickle(
                '{0}{1}/{4}/{2}/{4}_IndexList_w={2}_lems=False_min_occs={3}_no_stops=False.pickle'.format(
                    self.base, corp[0], corp[1], corp[2], self.english))
            i = rows.index(self.greek)
            if self.norm:
                os.system('echo Now normalizing {}'.format(corp[0]))
                orig = np.memmap(
                    '{0}{1}/{4}/{2}/{5}_{2}_lems=False_{4}_min_occ={3}_{6}no_stops=False_weighted={7}.dat'.format(
                    self.base, corp[0], corp[1], corp[2], self.english, self.prefix, self.svd, corp[3]),
                    dtype='float', shape=(len(rows), len(rows)))
                normed = np.memmap(
                    '{0}{1}/{4}/{2}/{5}_{2}_lems=False_{4}_min_occ={3}_{6}no_stops=False_weighted={7}_NORMED.dat'.format(
                    self.base, corp[0], corp[1], corp[2], self.english, self.prefix, self.svd, corp[3]),
                    dtype='float', mode='w+', shape=(len(rows), len(rows)))
                normed[:] = scale(orig)
                r = normed[i]
                del normed
                del orig
            else:
                r = np.memmap(
                '{0}{1}/{4}/{2}/{5}_{2}_lems=False_{4}_min_occ={3}_{6}no_stops=False_weighted={7}_NORMED.dat'.format(
                    self.base, corp[0], corp[1], corp[2], self.english,
                    self.prefix, self.svd, corp[3]), dtype='float',
                shape=(len(rows), len(rows)))[i]
            self.ekk_rows[corp[0]] = pd.Series(r, index=rows)
Пример #10
0
 def save(self, dirname = None):
     """Save the current rdfspace to a directory (by default the directory in which indexes are stored)"""
     if dirname is None and self._index_dir is not None:
         dirname = self._index_dir
     if not os.path.exists(dirname):
         os.makedirs(dirname)
     # We memmap big matrices, as pickle eats the whole RAM
     # We don't save the full adjacency matrix
     ut_m = np.memmap(os.path.join(dirname, 'ut.dat'), dtype='float64', mode='w+', shape=self._ut_shape)
     ut_m[:] = self._ut[:]
     s_m = np.memmap(os.path.join(dirname, 's.dat'), dtype='float64', mode='w+', shape=self._s_shape)
     s_m[:] = self._s[:]
     vt_m = np.memmap(os.path.join(dirname, 'vt.dat'), dtype='float64', mode='w+', shape=self._vt_shape)
     vt_m[:] = self._vt[:]
     if self._index_dir is None:
         # The index is in memory, we'll pickle it with the rest
         (adjacency, ut, s, vt) = (self._adjacency, self._ut, self._s, self._vt)
         (self._adjacency, self._ut, self._s, self._vt) = (None, None, None, None)
         f = open(os.path.join(dirname, 'space.dat'), 'w')
         pickle.dump(self, f)
         f.close()
         (self._adjacency, self._ut, self._s, self._vt) = (adjacency, ut, s, vt)
     else:
         # Flushing indexes
         self._uri_index.close()
         self._index_uri.close()
         # The index is stored in dbm, we will exclude it from the pickle
         (adjacency, ut, s, vt) = (self._adjacency, self._ut, self._s, self._vt)
         (self._adjacency, self._ut, self._s, self._vt, self._uri_index, self._index_uri) = (None, None, None, None, None, None)
         f = open(os.path.join(dirname, 'space.dat'), 'w')
         pickle.dump(self, f)
         f.close()
         (self._adjacency, self._ut, self._s, self._vt) = (adjacency, ut, s, vt)
         self._uri_index = dbm.open(os.path.join(dirname, 'uri_index'), 'r')
         self._index_uri = dbm.open(os.path.join(dirname, 'index_uri'), 'r')
Пример #11
0
def readchunk(H, fname, bottom=None, top=None, extra=''):
  fid = int(fname.replace('.', '-').split('-')[-1])
  dispx = numpy.memmap(fname % ('dispx' + extra), mode='r', dtype='f4')
  dispy = numpy.memmap(fname % ('dispy' + extra), mode='r', dtype='f4')
  dispz = numpy.memmap(fname % ('dispz' + extra), mode='r', dtype='f4')
  delta = numpy.memmap(fname % ('delta' + extra), mode='r', dtype='f4')
  if False and H['Scale'] == 0.0:
    assert H['DownSample'] == 1
    xstart = fid * int(numpy.ceil(1.0 * H['Nmesh'] / H['NTask']))
    xend = (fid + 1) * int(numpy.ceil(1.0 * H['Nmesh'] / H['NTask']))
    if xend > H['Nmesh']: xend = H['Nmesh']
    index = numpy.arange(
               xstart * H['Nmesh'] * H['Nmesh'],
               xend * H['Nmesh'] * H['Nmesh'])
    if xstart > xend:
      xstart = xend
  else:
    index = numpy.memmap(fname % 'index', mode='r', dtype='i8')
  ipos = numpy.array(numpy.unravel_index(index, H['Size']), dtype='i4').T
  ipos += H['Offset']
  if bottom is not None and top is not None:
    includemask = ipos_in_box(ipos, bottom, top)
  else:
    includemask = numpy.ones(len(ipos), dtype='?')
  result = numpy.empty(includemask.sum(), dtype=[('ipos', ('i4', 3)), ('disp', ('f4', 3)), ('delta', 'f4')])
  if len(result) == 0: return result
  result['ipos'] = ipos[includemask]
  result['disp'][:, 0] = dispx[includemask]
  result['disp'][:, 1] = dispy[includemask]
  result['disp'][:, 2] = dispz[includemask]
  result['delta'] = delta[includemask]
  return result
Пример #12
0
 def sim_calc(self):
     nt = self.corpora[0]
     self.scores = {}
     for corp in self.corpora:
         i_nt = []
         i_c2 = []
         rows = self.ekk_rows[corp[0]]
         for i, word in enumerate(self.ekk_rows['NT']):
             if word in rows:
                 i_nt.append(i)
                 i_c2.append(self.ekk_rows[corp[0]].index(word))
         d_c2 = np.memmap(
             '{0}{1}/{4}/{2}/{5}_{2}_lems=False_{4}_min_occ={3}_{6}no_stops=False_NORMED.dat'.format(
                 self.base, corp[0], corp[1], corp[2], self.english, self.prefix, self.svd),
             dtype='float32', shape=(len(rows), len(rows)))[i_c2]
         d_c2 = d_c2[:, i_c2]
         d_nt = np.memmap(
             '{0}{1}/{4}/{2}/{5}_{2}_lems=False_{4}_min_occ={3}_{6}no_stops=False_NORMED.dat'.format(
                 self.base, nt[0], nt[1], nt[2], self.english, self.prefix,
                 self.svd), dtype='float32',
             shape=(len(self.ekk_rows['NT']), len(self.ekk_rows['NT'])))[
             i_nt]
         d_nt = d_nt[:, i_nt]
         self.scores['{0}_{1}'.format('NT', corp[0])] = np.average(np.diag(
             1 - pairwise_distances(d_nt, d_c2, metric='cosine',
                                    n_jobs=12)))
Пример #13
0
def main(A):
    sightlines = Sightlines(A)
    fgpa = FGPAmodel(A)

    Npixels = sightlines.Npixels.sum()
    specloglam = numpy.memmap(A.SpectraOutputLogLam, mode='w+', 
            dtype='f4', shape=Npixels)
    # now save LogLam of the pixels for ease of access
    # (not used by our code)
    LogLamGrid = A.LogLamGrid
    LogLamCenter = 0.5 * (LogLamGrid[1:] + LogLamGrid[:-1])
    for index in range(len(sightlines)):
        sl2 = slice(sightlines.PixelOffset[index], 
                sightlines.PixelOffset[index] + sightlines.Npixels[index])
        sl = slice(
            sightlines.LogLamGridIndMin[index],
            sightlines.LogLamGridIndMax[index] - 1)
        specloglam[sl2] = LogLamCenter[sl]
    specloglam.flush()

    # now save QSONpixel for ease of access
    # (not used by our code)
    QSONpixel = numpy.memmap(A.QSONpixel, mode='w+', 
            dtype='i4', shape=len(sightlines))
    QSONpixel[...] = numpy.int32(sightlines.Npixels)
    QSONpixel.flush()
Пример #14
0
    def parse_graph(self, graph_path, data_dir='data', load_edges=False, extend_paths=2):
        graph = parser.Graph(graph_path)
        self.from_nodes, self.to_nodes = graph.get_mappings()
        graph.save_mappings(self.output_dir)

        if load_edges:
            self.inverse_degrees = np.memmap(
                os.path.join(data_dir, 'inverse_degrees.mat'),
                mode='r',
                dtype='float32'
            )
            self.from_to_idxs = np.memmap(
                os.path.join(data_dir, 'from_to.mat'),
                mode='r',
                dtype='int32'
            )
            self.from_to_idxs = np.reshape(self.from_to_idxs, newshape=(self.inverse_degrees.shape[0], 2))
        else:
            from_to_idxs, inverse_degrees = graph.extend_graph(max_degree=extend_paths)
            self.from_to_idxs = np.memmap(
                os.path.join(data_dir, 'from_to.mat'),
                mode='r+',
                shape=from_to_idxs.shape,
                dtype='int32'
            )
            self.from_to_idxs[:] = from_to_idxs[:]
            self.inverse_degrees = np.memmap(
                os.path.join(data_dir, 'inverse_degrees.mat'),
                mode='r+',
                shape=inverse_degrees.shape,
                dtype='float32'
            )
            self.inverse_degrees[:] = inverse_degrees[:]
Пример #15
0
    def get_session(self, session=-1, signal="data"):
        """Return the aggregate data array of a session

        If the session consists in many buffers, they are concatenated into a
        single buffer loaded in memory.

        If the data is a single file, it is memmaped as an array.
        """
        sessions = self.list_sessions()
        if isinstance(session, int):
            session_id = sessions[session]
        elif session in sessions:
            session_id = session
        else:
            raise ValueError("No such session %r" % session)

        signal_folder = os.path.join(self.data_folder, session_id, signal)
        data_files = os.listdir(signal_folder)
        dtypes = [self.decode_dtype(filename) for filename in data_files]
        if len(data_files) == 0:
            return np.array([])
        elif len(data_files) == 1:
            return np.memmap(os.path.join(signal_folder, data_files[0]), dtype=dtypes[0])
        else:
            return np.concatenate(
                [np.memmap(os.path.join(signal_folder, f), dtype=dtype) for f, dtype in zip(data_files, dtypes)]
            )
Пример #16
0
def memmap(docompute, dowrite, verbose):

    afilename = os.path.join(OUT_DIR, "memmap-a.bin")
    bfilename = os.path.join(OUT_DIR, "memmap-b.bin")
    rfilename = os.path.join(OUT_DIR, "memmap-output.bin")
    if dowrite:
        t0 = time()
        a = np.memmap(afilename, dtype='float32', mode='w+', shape=shape)
        b = np.memmap(bfilename, dtype='float32', mode='w+', shape=shape)

        # Fill arrays a and b
        #row = np.linspace(0, 1, ncols)
        row = np.arange(0, ncols, dtype='float32')
        for i in range(nrows):
            a[i] = row * (i + 1)
            b[i] = row * (i + 1) * 2
        del a, b  # flush data
        print("[numpy.memmap] Time for creating inputs:",
              round(time() - t0, 3))

    if docompute:
        t0 = time()
        # Reopen inputs in read-only mode
        a = np.memmap(afilename, dtype='float32', mode='r', shape=shape)
        b = np.memmap(bfilename, dtype='float32', mode='r', shape=shape)
        # Create the array output
        r = np.memmap(rfilename, dtype='float32', mode='w+', shape=shape)
        # Do the computation row by row
        for i in range(nrows):
            r[i] = eval(expr, {'a': a[i], 'b': b[i]})
        if verbose:
            print("First ten values:", r[0, :10])
        del a, b
        del r  # flush output data
        print("[numpy.memmap] Time for compute & save:", round(time() - t0, 3))
def load_data(fname, use_cropped=False, as_grey=False):
    n = 4543
    size = int(fname.split('_')[0])

    if use_cropped:
        if as_grey:
            X_fname = 'cache/X_cropped_grey_%s.npy' % fname
            y_fname = 'cache/y_cropped_grey_%s.npy' % fname
        else:
            X_fname = 'cache/X_cropped_%s.npy' % fname
            y_fname = 'cache/y_cropped_%s.npy' % fname

    else:
        X_fname = 'cache/X_%s.npy' % fname
        y_fname = 'cache/y_%s.npy' % fname

    num_channels = 1 if args.as_grey else 3
    X_shape = (n, num_channels, size, size)
    y_shape = (n,)

    X = np.memmap(X_fname, dtype=np.float32, mode='r', shape=X_shape)
    y = np.memmap(y_fname, dtype=np.int32, mode='r', shape=y_shape)

    assert X.shape == X_shape
    assert y.shape == y_shape

    return X, y
Пример #18
0
    def update(self):
        """ Updates L-BFGS algorithm history
        """
        unix.cd(self.path)

        s = self.load('m_new') - self.load('m_old')
        y = self.load('g_new') - self.load('g_old')

        m = len(s)
        n = self.memory

        if self.memory_used == 0:
            S = np.memmap('LBFGS/S', mode='w+', dtype='float32', shape=(m, n))
            Y = np.memmap('LBFGS/Y', mode='w+', dtype='float32', shape=(m, n))
            S[:, 0] = s
            Y[:, 0] = y
            self.memory_used = 1

        else:
            S = np.memmap('LBFGS/S', mode='r+', dtype='float32', shape=(m, n))
            Y = np.memmap('LBFGS/Y', mode='r+', dtype='float32', shape=(m, n))
            S[:, 1:] = S[:, :-1]
            Y[:, 1:] = Y[:, :-1]
            S[:, 0] = s
            Y[:, 0] = y

            if self.memory_used < self.memory:
                self.memory_used += 1

        return S, Y
Пример #19
0
    def __init__(self, one_hot=False,
                    shuffle_rng=None, preproc=[], size=(48,48), num_channels=1, img_per_seq=3,
                    path=None):

        if path is None:
            path = '/data/lisa/data/faces/EmotiW/preproc/arranged_data'

        self.x = np.memmap(path + '_x.npy', mode='r', dtype='float32')
        self.y = np.memmap(path + '_y.npy', mode='r', dtype='uint8')
        self.y = self.y.view()
        self.y.shape = (len(self.y)/(img_per_seq), img_per_seq, 1)

        self.x = self.x.view()
        self.x.shape = (len(self.y), img_per_seq, size[0], size[1], num_channels)
        
        if shuffle_rng is None:
            shuffle_rng = np.random.RandomState((2013, 06, 11))
        elif not isinstance(shuffle_rng, np.random.RandomState):
            shuffle_rng = np.random.RandomState(shuffle_rng)

        self.permutation = shuffle_rng.permutation(len(self.y))
        self.one_hot = one_hot

        self.space = CompositeSpace(
            (FaceTubeSpace(shape=size,
                          num_channels=num_channels,
                          axes=('b', 't', 0, 1, 'c')),
            VectorSpace(dim=(self.one_hot and 7 or 1))))
        self.source = ('features', 'targets')
        self.data_specs = (self.space, self.source)

        self.n_samples = len(self.y)
Пример #20
0
def get_data(start,stop):
     
     #n = np.exp(np.squeeze(collect("n",tind=[start,stop],path=path,info=False)))
     n = (np.squeeze(collect("n",tind=[start,stop],path=path,info=False)))
     #phi = (np.squeeze(collect("phi",tind=[start,stop],path=path,info=False)))
     n_mmap = np.memmap(nfile,dtype=n.dtype.name,mode='w+',shape=n.shape)
     n_mmap[:] = n[:]
     
     print 'n_mmap.shape :',n_mmap.shape,n.shape
     del n
     gc.collect()
     u = np.squeeze(collect("u",tind=[start,stop],path=path,info=False))
     u_mmap = np.memmap(ufile,dtype=u.dtype.name,mode='w+',shape=u.shape)
     u_mmap[:] = u[:]
     del u

     gc.collect()
     fft_u = np.fft.rfft(u_mmap)
     power = fft_u.conj()*fft_u
     A_k = np.real(np.sqrt(power))
     
     del fft_u,power
     
     gc.collect()
     phi = np.squeeze(collect("phi",tind=[start,stop],path=path,info=False))
     phi_mmap = np.memmap(phifile,dtype=phi.dtype.name,mode='w+',shape=phi.shape)
     phi_mmap[:] = phi[:]
     
     del phi


     gc.collect()
     
     return n_mmap,u_mmap,A_k,phi_mmap
Пример #21
0
def LogOfMatrix(ccMapObj):

    ccMapObj.make_readable()

    LogHiCmap = CCMAP()
    LogHiCmap.path2matrix = os.getcwd() + '/nparray_' + getRandomName() + '.bin'

    LogHiCmap.shape = ccMapObj.shape
    LogHiCmap.xticks = ccMapObj.xticks
    LogHiCmap.yticks = ccMapObj.yticks
    LogHiCmap.binsize = ccMapObj.binsize
    LogHiCmap.bLog = True

    bNonZeros = None
    #if ccMapObj.bNoData is not None:
    #	LogHiCmap.bNoData = ccMapObj.bNoData
    #	bNonZeros = ~LogHiCmap.bNoData
    #else:
    LogHiCmap.bNoData = np.all( ccMapObj.matrix == 0.0, axis=0)
    bNonZeros = ~LogHiCmap.bNoData

    # Log of part of matrix containing data
    path2matrixA = os.getcwd() + '/nparray_' + getRandomName() + '.bin'
    A = (ccMapObj.matrix[bNonZeros,:])[:,bNonZeros]   # Selected row-column which are not all zeros
    BinMatrixA = np.memmap(path2matrixA, dtype=dtype_npBINarray, mode='w+', shape=A.shape)
    BinMatrixA[:] = np.log10(A)[:]
    BinMatrixA.flush()

    # Assigning minvalue and maxvalue
    LogHiCmap.maxvalue = float(np.amax(BinMatrixA))
    minvalue = np.amin(BinMatrixA)
    v_steps = np.linspace(minvalue, LogHiCmap.maxvalue, 100)
    LogHiCmap.minvalue = minvalue - (v_steps[1] - v_steps[0])

    # Making full matrix
    BinLogMatrix = np.memmap(LogHiCmap.path2matrix, dtype=dtype_npBINarray, mode='w+', shape=LogHiCmap.shape)
    A_i = -1
    A_j = 0
    for i in range(BinLogMatrix.shape[0]):
        if not LogHiCmap.bNoData[i]:
            A_i += 1

        A_j = 0
        for j in range(BinLogMatrix.shape[1]):
            if LogHiCmap.bNoData[i] or LogHiCmap.bNoData[j]:
                BinLogMatrix[i][j] = LogHiCmap.minvalue
            else:
                BinLogMatrix[i][j] = BinMatrixA[A_i][A_j]
                A_j += 1
    BinLogMatrix.flush()

    del BinLogMatrix
    del BinMatrixA

    try:
        os.remove(path2matrixA)
    except:
        pass

    return LogHiCmap
Пример #22
0
	def _train(self, x):
# 		print self.dtype
		if len(x) > self.defaultOutputLength:
			self.defaultOutputLength = len(x)
		self.cacheLength += len(x)
		if self.cache is None:
			if self.cacheSize == -1:
				#self.cache = np.memmap(self.cacheName, dtype='float32', mode='w+', shape = x.shape)
				self.cache = np.memmap(self.cacheName, dtype=self.dtype, mode='w+', shape = x.shape)
			else:
				#self.cache = np.memmap(self.cacheName, dtype='float32', mode='w+', shape = (self.cacheSize, len(x[0])))
				self.cache = np.memmap(self.cacheName, dtype=self.dtype, mode='w+', shape = (self.cacheSize, len(x[0])))
		elif self.cacheSize == -1:
			self.reshape((self.cache.shape[0]+len(x), len(x[0])))
# 			print x[0][0].dtype.itemsize
# 			print self.cache._mmap.size()
# 			#self.cache._mmap.resize( (self.cache.shape[0]+len(x), len(x[0])) )
# 			print self.cache.shape
# 			newShape = (self.cache.shape[0]+len(x), len(x[0]))
# 			memmap_resize( newShape, self.cache )
# 			del self.cache
# 			self.cache = np.memmap(self.cacheName, dtype=self.dtype, mode='w+', shape = newShape)
# 			print "new size: "+str(self.cache._mmap.size())
# 			print self.cache.reshape(newShape)
		self.cache[self.cachePos:self.cachePos+len(x)] = x
# 		print self.cache._mmap.size()
# 		print self.cache[0][0]
# 		print self.cache[0][0].dtype.itemsize
# 		print "---"
		self.cachePos += len(x)
Пример #23
0
def group_iter(input_arrays, swath_cols, swath_rows, input_dtype, output_arrays, grid_cols, grid_rows, group_size):
    ret_input_arrays = []
    ret_output_arrays = []

    for idx, (ia, oa) in enumerate(zip(input_arrays, output_arrays)):
        if isinstance(ia, str):
            ret_input_arrays.append(numpy.memmap(ia, shape=(swath_rows, swath_cols), dtype=input_dtype, mode='r'))
        else:
            ret_input_arrays.append(ia)

        # We iterate over this so that we only create output arrays when they are used
        if oa is None:
            ret_output_arrays.append(numpy.empty((grid_rows, grid_cols), dtype=ia.dtype))
            # we should return the numpy arrays in the main function since the user didn't provide any
            output_arrays[idx] = ret_output_arrays[-1]
        elif isinstance(oa, str):
            ret_output_arrays.append(numpy.memmap(oa, shape=(grid_rows, grid_cols), dtype=input_dtype, mode='w+'))
        else:
            ret_output_arrays.append(oa)

        if group_size is None or len(ret_input_arrays) >= group_size:
            LOG.debug("Yielding group of size %d because group size is %d", len(ret_input_arrays), group_size)
            yield tuple(ret_input_arrays), tuple(ret_output_arrays)
            ret_input_arrays = []
            ret_output_arrays = []

    if len(ret_input_arrays):
        LOG.debug("Yielding remaining group items to process for EWA resampling")
        yield tuple(ret_input_arrays), tuple(ret_output_arrays)
Пример #24
0
    def next(self):
        # for python 2.x
        # Keep under lock only the mechainsem which advance the indexing of each batch
        # see # http://anandology.com/blog/using-iterators-and-generators/
        with self.lock:
            song_idx, self.cur_song = self.cur_song, self.cur_song+1

        bX, bY = (None, None)
        if song_idx < self.n_songs:
            x_path = self.data[self.sidstr[song_idx]]['X_path']
            y_path = self.data[self.sidstr[song_idx]]['y_path']
            bX = np.memmap(
                x_path,
                dtype='float32',
                mode='r',
                shape=tuple(self.data[self.sidstr[song_idx]]['X_shape'])
                )
            bY = np.memmap(
                y_path,
                dtype='float32',
                mode='r',
                shape=tuple(self.data[self.sidstr[song_idx]]['y_shape'])
                )
            return bX, bY
        else:
            raise StopIteration()
        return bX, bY
Пример #25
0
    def __init__(self,fdata,fndata):
        #dump to binary
        print('Initialize binary from ' + fdata)

        if (not os.path.isfile(fndata)):

            print('create Epix100a flat file ' + fndata + ' from ' + fdata)
            
            datain=Epix100a(fdata);

            #write header
            binheader=np.zeros(16).astype(np.uint32);
            binheader[0:6]=[datain.nframes, datain.my*datain.mx, datain.my, datain.mx, datain.nblocks, datain.nbcols];
            binheader.tofile(fndata);    

            #write data
            dataout=np.memmap(fndata,dtype=np.int16,mode='r+', shape=(datain.nframes,datain.my,datain.mx),offset=64);
            t0=time.clock();
            for iframe in range(datain.nframes):
                dataout[iframe]=datain.frame(iframe);
                if (iframe%100==0):
                    #progress(iframe,nframes,iframe);
                    print (str(iframe)+' - '+str(1000*(time.clock()-t0)/(iframe+1))+' ms. average frame: '+str(np.mean(datain.frame(iframe))))
            dataout.flush();
            
            del dataout;
            del datain;
        
        #get nr of frames
        else:
            print(fndata + ' file already exists.')
        data=np.memmap(fndata,dtype=np.uint32,mode='r',shape=((64)),offset=0); 
        self.nframes=data[0]; self.nframesize=data[1]; self.my=data[2]; self.mx=data[3]; self.nblocks=data[4]; self.nbcols=data[5];
        self.data=np.memmap(fndata,dtype=np.int16,mode='c',shape=(self.nframes,self.my,self.mx),offset=64);
Пример #26
0
    def extract_to_memmap(self):
        """
        Allocate a memmap, fill it with extracted features, return r/o view.
        """
        filename = self.filename
        feature_shp = self.feature_shp
        print('Creating memmap %s for features of shape %s' % (
                                              filename,
                                              str(feature_shp)))
        features_fp = np.memmap(filename,
            dtype='float32',
            mode='w+',
            shape=feature_shp)
        info = open(filename+'.info', 'w')
        cPickle.dump(('float32', feature_shp), info)
        del info

        self.extract_to_storage(features_fp)

        # -- docs here:
        #    http://docs.scipy.org/doc/numpy/reference/generated/numpy.memmap.html
        #    say that deletion is the way to flush changes !?
        del features_fp
        rval = np.memmap(self.filename,
            dtype='float32',
            mode='r',
            shape=feature_shp)
        return rval
Пример #27
0
    def __next__(self):
        #check to see if at end of chunks
        if self._chunk_counter==self.num_chunks:
            offset = int(self._chunk_counter * self.chunksize)
            row_size = self.rmndr_row_size
            self._chunk_counter += 1
        elif self._chunk_counter < self.num_chunks:
            offset = int(self._chunk_counter * self.chunksize)
            end_dp = (self._chunk_counter+1) + self.chunksize
            row_size = self.chunk_row_size
            self._chunk_counter += 1
        elif self._chunk_counter > self.num_chunks:
            raise StopIteration

        if self.abr.header['f_structure']['nDataFormat'][0]==1: #float data
            data = memmap(self.abr.fid, dtype = float32, shape = (row_size,self.ncols), offset = offset+self.offset_base)
            return data

        elif self.abr.header['f_structure']['nDataFormat'][0]==0: #integer data
            try:
                data = memmap(self.abr.fid, dtype = int16, shape = (row_size,self.ncols),
                              mode = 'r',offset = offset + self.offset_base)
            except ValueError:
                pdb.set_trace()
            data = data[:].astype(float32)
            data = self.abr.scale_int_data(data)
            return data
Пример #28
0
def get_sequence(mraw_path, file_shape, nmax=None, offset=0):
    '''
    Get a sequence of image files as 3D numpy array.

    :param mraw_path: path to .mraw file containing image data
    :param file_shape: tuple, (ntotal, height, width) of images in .mraw file
    :param nmax: maximum number of images in sequence
    :param offset: First image to be read
    :return: 3D array of image sequence
    '''
    ntotal, h, w = file_shape
    byte_size = 2*h*w                   # Number of bytes for one image 
    byte_offset = offset * byte_size    # Offset to first byte to be read

    # If only a single image was requested:
    if nmax and nmax == 1:
        with open(mraw_path, 'rb') as mraw:
            imarray = np.memmap(mraw, dtype=np.uint16, offset=byte_offset, mode='r', shape=(h, w))
    # Only display nmax or less images:
    elif nmax and ntotal > nmax:
        image_step = ntotal//nmax
        with open(mraw_path, 'rb') as mraw:
            memmap = np.memmap(mraw, dtype=np.uint16, offset=byte_offset, mode='r', shape=(ntotal-offset, h, w))
            imarray = memmap[::image_step, :, :]
    # If there are less than nmax images:
    else:
        with open(mraw_path, 'rb') as mraw:
            imarray = np.memmap(mraw, dtype=np.uint16, offset=byte_offset, mode='r', shape=(ntotal-offset, h, w))

    return imarray
Пример #29
0
    def convert(cls, file_path):
        meta_path = file_path + '.meta'
        index_path = file_path + '.idx'
        edge_path = file_path + '.bin'

        with open(file_path, 'r') as f:
            nodes, edges = map(int, f.readline().split())
            nodes, edges = nodes + 1, edges + 1
            with open(meta_path, 'w+') as m:
                m.write('{} {}'.format(nodes, edges))
            index_map = np.memmap(index_path, dtype='uint32', mode='w+', shape=(nodes, 2))
            edge_map = np.memmap(edge_path, dtype='uint32', mode='w+', shape=(edges, 1))

            current = 0
            count = 0
            degree = 0

            for line in f:
                origin, destination = map(int, line.split())
                while current < origin:
                    index_map[current] = (count - degree, degree)
                    degree = 0
                    current += 1
                if current == origin:
                    degree += 1
                edge_map[count] = destination
                count += 1

            index_map[current] = (count - degree, degree)
            index_map.flush()
            edge_map.flush()
Пример #30
0
def main(A):
    """convolve the tau(mass) field, 
    add in thermal broadening and redshift distortion """

    sightlines = Sightlines(A)
    maker = SpectraMaker(A, sightlines)
    fgpa = FGPAmodel(A)

    Npixels = sightlines.Npixels.sum()

    spectaureal = numpy.memmap(A.SpectraOutputTauReal, mode='w+', 
            dtype='f4', shape=Npixels)
    spectaured = numpy.memmap(A.SpectraOutputTauRed, mode='w+', 
            dtype='f4', shape=Npixels)
    specdelta = numpy.memmap(A.SpectraOutputDelta, mode='w+', 
            dtype='f4', shape=Npixels)

    def work(i):
        sl2 = slice(sightlines.PixelOffset[i], 
                sightlines.PixelOffset[i] + sightlines.Npixels[i])
        result =  maker.convolve(i, Afunc=fgpa.Afunc, Bfunc=fgpa.Bfunc)
        spectaureal[sl2] = result.taureal
        spectaured[sl2] = result.taured
        specdelta[sl2] = result.delta
        sightlines.Z_RED[i] = result.Zqso
    chunkmap(work, range(len(sightlines)), 100)

    spectaureal.flush()
    spectaured.flush()
    specdelta.flush()
    sightlines.Z_RED.flush()
Пример #31
0
def save_memmap(filenames,
                base_name='Yr',
                resize_fact=(1, 1, 1),
                remove_init=0,
                idx_xy=None,
                order='F',
                xy_shifts=None,
                is_3D=False,
                add_to_movie=0,
                border_to_0=0):
    """ Saves efficiently a list of tif files into a memory mappable file

    Parameters:
    ----------
        filenames: list
            list of tif files or list of numpy arrays

        base_name: str
            the base used to build the file name. IT MUST NOT CONTAIN "_"

        resize_fact: tuple
            x,y, and z downampling factors (0.5 means downsampled by a factor 2)

        remove_init: int
            number of frames to remove at the begining of each tif file
            (used for resonant scanning images if laser in rutned on trial by trial)

        idx_xy: tuple size 2 [or 3 for 3D data]
            for selecting slices of the original FOV, for instance
            idx_xy = (slice(150,350,None), slice(150,350,None))

        order: string
            whether to save the file in 'C' or 'F' order

        xy_shifts: list
            x and y shifts computed by a motion correction algorithm to be applied before memory mapping

        is_3D: boolean
            whether it is 3D data
    Returns:
    -------
        fname_new: the name of the mapped file, the format is such that
            the name will contain the frame dimensions and the number of f

    """

    # TODO: can be done online
    Ttot = 0
    for idx, f in enumerate(filenames):
        if isinstance(f, str):
            print(f)

        if is_3D:
            #import tifffile
            #            print("Using tifffile library instead of skimage because of  3D")
            Yr = f if isinstance(f, basestring) else tifffile.imread(f)
            if idx_xy is None:
                Yr = Yr[remove_init:]
            elif len(idx_xy) == 2:
                Yr = Yr[remove_init:, idx_xy[0], idx_xy[1]]
            else:
                Yr = Yr[remove_init:, idx_xy[0], idx_xy[1], idx_xy[2]]

        else:
            Yr = cm.load(f, fr=1, in_memory=True) if isinstance(
                f, basestring) else cm.movie(f)
            if xy_shifts is not None:
                Yr = Yr.apply_shifts(xy_shifts,
                                     interpolation='cubic',
                                     remove_blanks=False)

            if idx_xy is None:
                if remove_init > 0:
                    Yr = np.array(Yr)[remove_init:]
            elif len(idx_xy) == 2:
                Yr = np.array(Yr)[remove_init:, idx_xy[0], idx_xy[1]]
            else:
                raise Exception('You need to set is_3D=True for 3D data)')
                Yr = np.array(Yr)[remove_init:, idx_xy[0], idx_xy[1],
                                  idx_xy[2]]

        if border_to_0 > 0:

            min_mov = Yr.calc_min()
            Yr[:, :border_to_0, :] = min_mov
            Yr[:, :, :border_to_0] = min_mov
            Yr[:, :, -border_to_0:] = min_mov
            Yr[:, -border_to_0:, :] = min_mov

        fx, fy, fz = resize_fact
        if fx != 1 or fy != 1 or fz != 1:

            if 'movie' not in str(type(Yr)):
                Yr = cm.movie(Yr, fr=1)

            Yr = Yr.resize(fx=fx, fy=fy, fz=fz)
        T, dims = Yr.shape[0], Yr.shape[1:]
        Yr = np.transpose(Yr, list(range(1, len(dims) + 1)) + [0])
        Yr = np.reshape(Yr, (np.prod(dims), T), order='F')

        if idx == 0:
            fname_tot = base_name + '_d1_' + str(dims[0]) + '_d2_' + str(
                dims[1]) + '_d3_' + str(
                    1 if len(dims) == 2 else dims[2]) + '_order_' + str(order)
            if isinstance(f, str):
                fname_tot = os.path.join(os.path.split(f)[0], fname_tot)
            big_mov = np.memmap(fname_tot,
                                mode='w+',
                                dtype=np.float32,
                                shape=(np.prod(dims), T),
                                order=order)
        else:
            big_mov = np.memmap(fname_tot,
                                dtype=np.float32,
                                mode='r+',
                                shape=(np.prod(dims), Ttot + T),
                                order=order)

        big_mov[:, Ttot:Ttot +
                T] = np.asarray(Yr, dtype=np.float32) + 1e-10 + add_to_movie
        big_mov.flush()
        del big_mov
        Ttot = Ttot + T

    fname_new = fname_tot + '_frames_' + str(Ttot) + '_.mmap'
    os.rename(fname_tot, fname_new)

    return fname_new
Пример #32
0
def normed_patch_data_mat(raw_mat,
                          save_dir,
                          mean_mode='global_channel',
                          sdev_mode='global_channel',
                          file_name='normed_mat.npy',
                          batch_size=0):

    modes = ('global_channel', 'global_feature', 'local_channel', 'local_full',
             'gc', 'gf', 'lc', 'lf', 'none')
    assert mean_mode in modes
    assert sdev_mode in modes or isinstance(sdev_mode, float)
    num_patches, n_channels, n_feats_per_channel = raw_mat.shape
    batch_size = batch_size if batch_size else num_patches
    assert num_patches % batch_size == 0
    data_mat = np.memmap(save_dir + file_name,
                         dtype=np.float32,
                         mode='w+',
                         shape=raw_mat.shape)

    # MEAN treatment ######
    if mean_mode in ('global_channel', 'gc'):
        channel_mean = np.mean(raw_mat, axis=(0, 2))
        for idx in range(num_patches // batch_size):
            batch = raw_mat[idx * batch_size:(idx + 1) * batch_size, :, :]
            batch_t = np.transpose(batch, axes=(0, 2, 1))
            batch_t_centered = batch_t - channel_mean
            batch_centered = np.transpose(batch_t_centered, axes=(0, 2, 1))
            data_mat[idx * batch_size:(idx + 1) *
                     batch_size, :, :] = batch_centered

        np.save(save_dir + 'data_mean.npy', channel_mean)

    elif mean_mode in ('global_feature', 'gf'):
        feature_mean = np.mean(raw_mat, axis=0)

        for idx in range(num_patches // batch_size):
            batch = raw_mat[idx * batch_size:(idx + 1) * batch_size, :, :]
            data_mat[idx * batch_size:(idx + 1) *
                     batch_size, :, :] = batch - feature_mean

        np.save(save_dir + 'data_mean.npy', feature_mean)

    elif mean_mode in ('local_channel', 'lc'):
        for idx in range(num_patches // batch_size):
            batch = raw_mat[idx * batch_size:(idx + 1) * batch_size, :, :]
            channel_mean = np.mean(batch,
                                   axis=2)  # shape=[n_patches, n_channels]
            batch_t = np.transpose(batch, axes=(2, 0, 1))
            batch_t_centered = batch_t - channel_mean
            batch_centered = np.transpose(batch_t_centered, axes=(1, 2, 0))
            data_mat[idx * batch_size:(idx + 1) *
                     batch_size, :, :] = batch_centered

    elif mean_mode in ('local_full', 'lf'):
        for idx in range(num_patches // batch_size):
            batch = raw_mat[idx * batch_size:(idx + 1) * batch_size, :, :]
            sample_mean = np.mean(batch, axis=(1, 2))
            batch_t = np.transpose(batch)
            batch_t_centered = batch_t - sample_mean
            batch_centered = np.transpose(batch_t_centered)
            data_mat[idx * batch_size:(idx + 1) *
                     batch_size, :, :] = batch_centered

    else:  # mean_mode is 'none'
        pass

    # SDEV treatment ######
    if sdev_mode in ('global_channel', 'gc'):
        feat_sdev = np.std(raw_mat, axis=0)
        channel_sdev = np.mean(feat_sdev, axis=1)
        for idx in range(num_patches // batch_size):
            batch = raw_mat[idx * batch_size:(idx + 1) * batch_size, :, :]
            batch = np.rollaxis(np.rollaxis(batch, axis=2, start=1) /
                                channel_sdev,
                                axis=2,
                                start=1)
            data_mat[idx * batch_size:(idx + 1) * batch_size, :, :] = batch

        np.save(save_dir + 'data_sdev.npy', channel_sdev)

    elif sdev_mode in ('global_feature', 'gf'):
        feat_sdev = np.std(raw_mat, axis=0)

        for idx in range(num_patches // batch_size):
            batch = raw_mat[idx * batch_size:(idx + 1) * batch_size, :, :]
            batch = batch / feat_sdev
            data_mat[idx * batch_size:(idx + 1) * batch_size, :, :] = batch

        np.save(save_dir + 'data_sdev.npy', feat_sdev)

    elif sdev_mode in ('local_channel', 'lc'):  # seems like a bad idea anyway
        raise NotImplementedError

    elif sdev_mode in ('local_full', 'lf'):  # this too
        for idx in range(num_patches // batch_size):
            batch = raw_mat[idx * batch_size:(idx + 1) * batch_size, :, :]
            sample_sdev = np.std(batch, axis=(1, 2))
            batch_t = np.transpose(batch)
            batch_t_scaled = batch_t / sample_sdev
            batch_scaled = np.transpose(batch_t_scaled)
            data_mat[idx * batch_size:(idx + 1) *
                     batch_size, :, :] = batch_scaled

    elif isinstance(sdev_mode, float):
        data_mat[:, :, :] = sdev_mode * raw_mat[:, :, :]

    else:  # sdev_mode == 'none'
        pass

    del data_mat
    data_mat = np.memmap(save_dir + file_name,
                         dtype=np.float32,
                         mode='r',
                         shape=raw_mat.shape)
    return data_mat
Пример #33
0
    if base_name is None:

        base_name = mmap_fnames[0]
        base_name = base_name[:base_name.find('_d1_')] + '-#-' + str(
            len(mmap_fnames))

    fname_tot = (base_name + '_d1_' + str(dims[0]) + '_d2_' + str(dims[1]) +
                 '_d3_' + str(1 if len(dims) == 2 else dims[2]) + '_order_' +
                 str(order) + '_frames_' + str(tot_frames) + '_.mmap')
    fname_tot = os.path.join(os.path.split(mmap_fnames[0])[0], fname_tot)

    print(fname_tot)

    big_mov = np.memmap(fname_tot,
                        mode='w+',
                        dtype=np.float32,
                        shape=(d, tot_frames),
                        order='C')

    step = np.int(old_div(d, n_chunks))
    pars = []
    for ref in range(0, d - step + 1, step):
        pars.append([fname_tot, d, tot_frames, mmap_fnames, ref, ref + step])
    # last batch should include the leftover pixels
    pars[-1][-1] = d

    if dview is not None:
        if 'multiprocessing' in str(type(dview)):
            dview.map_async(save_portion, pars).get(9999999)
        else:
            dview.map_sync(save_portion, pars)
Пример #34
0
def get_data(size, vecsize):
    trainX = np.memmap('../data/prepared/TrainMap',
                       dtype='float',
                       mode='r',
                       shape=(size, vecsize))
    return trainX
def overlay_with_grid(image_path, pred_path, image_save_path, downsampled_image_save_path,
                      label_save_path, shape, show=False):
    # use one of the following based on the size of the image; if image is huge, go with the first one!
    ##########################################################################
    (H, W, C) = shape
    full_image = np.memmap(image_path, dtype=np.uint16, mode='r', shape=(H, W, C))#.transpose(1,0,2)
    full_label = np.memmap(pred_path, dtype=np.uint8, mode='r', shape=(H, W))#.transpose(1,0)
    x_start = 64 * 3
    y_start = 64 * 3
    x_end = x_start + 64 * 10
    y_end = y_start + 64 * 10
    image = full_image.copy()[y_start:y_end,x_start:x_end,:]
    # ex_array = []
    # for t in range(4, -1, -1):
    #     temp = np.expand_dims(image[:, :, t], 2)
    #     ex_array.append(temp)
    # image = np.dstack(ex_array)
    # do this for more than 3 channels
    show_image = image[:, :, :3]
    image = np.dstack((show_image[:, :, 2], show_image[:, :, 1], show_image[:, :, 0]))
    #################################
    label = full_label.copy()[y_start:y_end,x_start:x_end]
    # image = np.load(image_path, mmap_mode='r')
    # label = np.load(pred_path, mmap_mode='r')
    # print(image)
    ###########################################################################
    # x_start = 64 * 140
    # y_start = 64 * 10
    # x_end = x_start + 64 * 10
    # y_end = y_start + 64 * 10
    # image = image[y_start:y_end,x_start:x_end,:]
    # label = label[y_start:y_end,x_start:x_end]
    ###########################################################################
    # colored_label = convert_to_colors(label)
    my_dpi = 300

    # Set up figure
    fig = pl.figure(figsize=(float(image.shape[0])/my_dpi, float(image.shape[1])/my_dpi), dpi=my_dpi)
    ax = fig.add_subplot(111)

    # Remove whitespace from around the image
    fig.subplots_adjust(left=0, right=1, bottom=0, top=1)

    # Set the gridding interval: here we use the major tick interval
    myInterval = 64.
    loc = plticker.MultipleLocator(base=myInterval)
    ax.xaxis.set_major_locator(loc)
    ax.yaxis.set_major_locator(loc)

    # Add the grid
    ax.grid(which='major', axis='both', linestyle='-', color='g')

    # Add the image
    ax.imshow(image)

    # Find number of gridsquares in x and y direction
    nx = abs(int(float(ax.get_xlim()[1] - ax.get_xlim()[0]) / float(myInterval)))
    ny = abs(int(float(ax.get_ylim()[1] - ax.get_ylim()[0]) / float(myInterval)))

    # Add some labels to the gridsquares
    mt.rcParams.update({'font.size': 2})
    for j in range(ny):
        y = myInterval / 2 + j * myInterval
        for i in range(nx):
            x = myInterval / 2. + float(i) * myInterval
            # ax.text(x, y, '{:d}'.format(i + j * nx), color='w', ha='center', va='center').set_color('red')
            # find the label at this point
            this_label = label[int(y),int(x)]
            ax.text(x, y, '{}'.format(all_labels_inverted[this_label]),
                    color='w', ha='center', va='center').set_color('yellow')

    # Save the figure
    fig.savefig(image_save_path, dpi=my_dpi)

    ############ we will save downsampled images to show how it relates to pixel-wise results ##########
    # save colored label as well
    # 1. reduce 64*64 blocks, 2. apply filter to remove segmentation noise, 3. convert labels to colors
    colored_labels = block_reduce(full_label, block_size=(64,64), func=np.max)
    # colored_labels = cv2.medianBlur(colored_labels, ksize=3)
    filtered_forest = colored_labels[colored_labels == 1].sum()/colored_labels.reshape(-1).shape[0]*100
    colored_labels = convert_to_colors(colored_labels, flag='forest')
    # print(set( tuple(v) for m2d in colored_labels for v in m2d )) # if you want to check unique colors
    pl.imsave(label_save_path, colored_labels)
    # downsample and save input image
    downsampled_input_image = block_reduce(full_image, block_size=(64,64,1), func=np.max)
    # downsampled_image_path = os.path.join(os.path.splitext(image_save_path)[0]+'_down.png')
    pl.imsave(downsampled_image_save_path, downsampled_input_image)
    if show:
        pl.show()
    return filtered_forest
Пример #36
0
    'img_q_id_test',
    'question_test',
    'choices_test',
]
#load wrods 
ID_PKL = pickle.load(open(data_prefix+paths[0]+'.pkl','rb'))
QUESTION_PKL = pickle.load(open(data_prefix+paths[1]+'.pkl','rb'))
CHOICE_PKL = pickle.load(open(data_prefix+paths[2]+'.pkl','rb'))
#load picture features
IM_ID = pickle.load(open('../Data/val2014/ID.pkl','rb'))
IM_ID_DICT = dict()
for num in xrange(len(IM_ID)):
    ID = IM_ID[num].split('_')[2].split('.')[0]
    IM_ID_DICT[ID]=num
mem_shape = (40504,1,1000)
mem_image = np.memmap('../Data/val2014/vgg_feats.memmap',dtype='float32',mode='r',shape=mem_shape )
#===== prepare pickList =====
pickList = range(0,len(ID_PKL))
numToC = {0:'A',1:'B',2:'C',3:'D',4:'E'}
answers = []
# maybe this will help?  -- Ray.
# this really help, thanks!  -- Angus.
#print '{0:{fill}{align}12}'.format(ID_PKL[0][0],fill='0',align='>')

# printing 300-dim word vector
#print word_vec[ QUESTION_PKL[0][0] ]
print "start making model..."
model = Keras_model.keras_model(20)
model.load_weights(MODEL_NAME)
#===== Start training =====
print "Start testing!"
Пример #37
0
def make_flattened_patch_data(num_patches,
                              ph,
                              pw,
                              classifier,
                              map_name,
                              n_channels,
                              n_feats_white,
                              whiten_mode='pca',
                              batch_size=100,
                              mean_mode='local_full',
                              sdev_mode='global_feature',
                              raw_mat_load_path='',
                              n_val_patches=0):
    """
    creates whitening, covariance, raw and whitened feature matrices for separate channels.
    all data is saved as [n_patches, n_channels, n_features_per_channel]
    """

    save_dir = make_data_dir(map_name,
                             ph,
                             pw,
                             mean_mode,
                             sdev_mode,
                             n_feats_white,
                             classifier=classifier)

    if not os.path.exists(save_dir):
        os.makedirs(save_dir)

    if raw_mat_load_path:
        raw_mat = np.memmap(raw_mat_load_path,
                            dtype=np.float32,
                            mode='r',
                            shape=(num_patches, n_channels, ph * pw))
    else:
        raw_mat = raw_patch_data_mat(map_name, classifier, num_patches, ph, pw,
                                     batch_size, n_channels, save_dir)
    print('raw mat done')

    norm_mat = normed_patch_data_mat(raw_mat,
                                     save_dir,
                                     mean_mode=mean_mode,
                                     sdev_mode=sdev_mode)
    print('normed mat done')

    print('mat dims pre flatten:', norm_mat.shape)
    flat_mat = norm_mat.reshape([num_patches, -1])

    cov = flattened_cov_acc(flat_mat, save_dir)
    print('cov done')

    whiten, unwhiten = flattened_whitening_mats(cov, whiten_mode, save_dir,
                                                n_feats_white)
    print('whitening mats done')

    data_mat = np.memmap(save_dir + 'data_mat_' + whiten_mode +
                         '_whitened.npy',
                         dtype=np.float32,
                         mode='w+',
                         shape=(num_patches, n_feats_white))

    for idx in range(num_patches // batch_size):
        image = flat_mat[idx * batch_size:(idx + 1) *
                         batch_size, :]  # [bs, n_f]
        # whiten is [n_fw, n_f], target [bs, n_fw]
        data_mat[
            idx * batch_size:(idx + 1) *
            batch_size, :] = image @ whiten.T  # [bs, n_f] x [n_f, n_fw] = [bs, n_fw]
    print('whitened data done')

    if n_val_patches > 0:
        add_flattened_validation_set(n_val_patches, ph, pw, classifier,
                                     map_name, n_channels, n_feats_white,
                                     whiten_mode, batch_size, mean_mode,
                                     sdev_mode)
Пример #38
0
def make_channel_separate_patch_data(num_patches,
                                     ph,
                                     pw,
                                     classifier,
                                     map_name,
                                     n_channels,
                                     n_feats_per_channel_white,
                                     whiten_mode='pca',
                                     batch_size=100,
                                     mean_mode='global_channel',
                                     sdev_mode='global_channel',
                                     raw_mat_load_path='',
                                     n_val_patches=0):
    """
    creates whitening, covariance, raw and whitened feature matrices for separate channels.
    They are saved as 3d matrices where the first dimension is the channel index
    """

    save_dir = make_data_dir(map_name,
                             ph,
                             pw,
                             mean_mode,
                             sdev_mode,
                             n_features_white=n_feats_per_channel_white,
                             classifier=classifier)
    save_dir = save_dir.rstrip('/') + '_channelwise/'

    if not os.path.exists(save_dir):
        os.makedirs(save_dir)

    if raw_mat_load_path:
        raw_mat = np.memmap(raw_mat_load_path,
                            dtype=np.float32,
                            mode='r',
                            shape=(num_patches, n_channels, ph * pw))
    else:
        raw_mat = raw_patch_data_mat(map_name, classifier, num_patches, ph, pw,
                                     batch_size, n_channels, save_dir)
    print('raw mat done')

    norm_mat = normed_patch_data_mat(raw_mat,
                                     save_dir,
                                     mean_mode=mean_mode,
                                     sdev_mode=sdev_mode)
    print('normed mat done')

    cov = channel_independent_cov_acc(norm_mat, save_dir)
    print('cov done')

    n_dims_to_drop = ph * pw - n_feats_per_channel_white
    channel_whiten, channel_unwhiten = channel_independent_whitening_mats(
        cov, whiten_mode, save_dir, n_dims_to_drop=n_dims_to_drop)
    print('whitening mats done')

    data_mat = np.memmap(
        save_dir + 'data_mat_' + whiten_mode + '_whitened_channelwise.npy',
        dtype=np.float32,
        mode='w+',
        shape=(num_patches, n_channels, channel_whiten.shape[1]))

    for idx in range(num_patches // batch_size):
        image = norm_mat[idx * batch_size:(idx + 1) *
                         batch_size, :, :]  # [bs, n_c, n_fpc]
        # channel_whiten is [n_c, n_fpcw, n_fpc], target [bs, n_c, n_fpcw]
        image = np.expand_dims(image, axis=3)  # [bs, n_c, n_fpc, 1]
        # [n_c, n_fpcw, n_fpc] x [n_c, n_fpc, 1] = [n_c, n_fpcw]
        data_mat[idx * batch_size:(idx + 1) * batch_size, :, :] = np.squeeze(
            channel_whiten @ image)
    print('whitened data done')

    if n_val_patches > 0:
        add_channelwise_validation_set(n_val_patches, ph, pw, classifier,
                                       map_name, n_channels,
                                       n_feats_per_channel_white, whiten_mode,
                                       batch_size, mean_mode, sdev_mode)
    def __init__(
        self,
        ds_root,  # pre-processed dataset root directory (where to find .dat files)
        mode='train',  # mode of use of the dataset object (may be 'train', 'validation' or 'test')
        n_samples=None,  # number of samples to consider (used just to access the right pre-processed files)
        return_malicious=True,  # whether to return the malicious label for the data point or not
        return_counts=True,  # whether to return the counts for the data point or not
        return_tags=True,  # whether to return the tags for the data points or not
        return_shas=False
    ):  # whether to return the sha256 of the data points or not
        """ Initialize Dataset class.

        Args:
            ds_root: Pre-processed dataset root directory (where to find .dat files)
            mode: Mode of use of the dataset object (it may be 'train', 'validation' or 'test') (default: 'train')
            n_samples: Number of samples to consider (used just to access the right pre-processed files) (default: None)
            return_malicious: Whether to return the malicious label for the data point or not (default: True)
            return_counts: Whether to return the counts for the data point or not (default: True)
            return_tags: Whether to return the tags for the data points or not (default: True)
            return_shas: Whether to return the sha256 of the data points or not (default: False)
        """

        self.return_counts = return_counts
        self.return_tags = return_tags
        self.return_malicious = return_malicious
        self.return_shas = return_shas

        # if mode is not in one of the expected values raise an exception
        if mode not in {'train', 'validation', 'test'}:
            raise ValueError('invalid mode {}'.format(mode))

        # if n_samples is not set or it is <= 0 -> set it to the max
        if n_samples is None or n_samples <= 0:
            n_samples = total_n_samples[mode]

        # set feature dimension
        ndim = 2381

        # set labels dimension to 1 (malware) + 1 (count) + n_tags (tags)
        labels_dim = 1 + 1 + len(Dataset.tags)

        # generate X (features vector), y (labels vector) and S (shas) file names
        X_path = os.path.join(ds_root, "X_{}_{}.dat".format(mode, n_samples))
        y_path = os.path.join(ds_root, "y_{}_{}.dat".format(mode, n_samples))
        S_path = os.path.join(ds_root, "S_{}_{}.dat".format(mode, n_samples))

        # log error and exit if at least one of the dataset files (X, y, S) does not exist
        if not (os.path.exists(X_path) and os.path.exists(y_path)
                and os.path.exists(S_path)):
            logger.error(
                "X, y, S files for mode {} and amount {} not found.".format(
                    mode, n_samples))
            sys.exit(1)

        logger.info('Opening Dataset at {} in {} mode.'.format(ds_root, mode))

        # open S (shas) memory map in Read+ mode (+ because pytorch does not support read only ndarrays)
        self.S = np.memmap(S_path, dtype=np.dtype('U64'), mode="r+")
        # get number of elements from S vector
        self.N = self.S.shape[0]

        # open y (labels) memory map in Read+ mode (+ because pytorch does not support read only ndarrays)
        self.y = np.memmap(y_path,
                           dtype=np.float32,
                           mode="r+",
                           shape=(self.N, labels_dim))

        # open X (features) memory map in Read+ mode (+ because pytorch does not support read only ndarrays)
        self.X = np.memmap(X_path,
                           dtype=np.float32,
                           mode="r+",
                           shape=(self.N, ndim))

        logger.info("{} samples loaded.".format(self.N))
PATCH_SIZE = 256
slice_size = int(np.ceil(np.sqrt(2*PATCH_SIZE**2)))
# ensure slice_size is compatible with several (here 5) maxpool operations
slice_size += 32-slice_size%32

expected_n_samples = 70000

patient_markers = np.loadtxt("../../patient_markers.txt").astype(np.int32)

memmap_shape = (expected_n_samples, 25, slice_size, slice_size)
info_memmap_shape = (expected_n_samples, 4)

memmap_name = "patchSegmentation_allInOne_ws_t1km_flair_adc_cbv"

memmap_data = memmap("%s.memmap" % (memmap_name), dtype=np.float32, mode="w+", shape=memmap_shape)
memmap_gt = memmap("%s_info.memmap" % (memmap_name), dtype=np.float32, mode="w+", shape=info_memmap_shape)

def add_patch_to_memmap(x, y, z, t1km_img, flair_img, adc_img, cbv_img, seg_combined, slice_size, patient_id, patient_state, data_ctr):
    t1km_patch = t1km_img[z-2:z+3, x:x+slice_size, y:y+slice_size]
    flair_patch = flair_img[z-2:z+3, x:x+slice_size, y:y+slice_size]
    adc_patch = adc_img[z-2:z+3, x:x+slice_size, y:y+slice_size]
    cbv_patch = cbv_img[z-2:z+3, x:x+slice_size, y:y+slice_size]
    seg_patch = seg_combined[z-2:z+3, x:x+slice_size, y:y+slice_size]
    # no empty slices
    if len(np.unique(seg_patch[2])) == 1 and np.unique(seg_patch[2])[0] == 0:
        return data_ctr
    memmap_data[data_ctr, 0:5, :, :] = t1km_patch
    memmap_data[data_ctr, 5:10, :, :] = flair_patch
    memmap_data[data_ctr, 10:15, :, :] = adc_patch
    memmap_data[data_ctr, 15:20, :, :] = cbv_patch
Пример #41
0
    [mad_estimator.input, writer_1.input,
     peak_detector.get_input('data')])
manager.connect(mad_estimator.get_output('mads'),
                [peak_detector.get_input('mads'), writer_2.input])
manager.connect(peak_detector.get_output('peaks'), writer_3.input)
manager.start()
director.sleep(duration=5.0)

director.stop()
director.destroy()

start_mad = mad_estimator.start_step
neg_peak_file = writer_3.recorded_peaks['negative']
pos_peak_file = writer_3.recorded_peaks['positive']

x1 = numpy.memmap('/tmp/input.dat', dtype=numpy.float32, mode='r')
x1 = x1.reshape(x1.size / nb_channels, nb_channels)

neg_peaks = numpy.fromfile(neg_peak_file, dtype=numpy.int32)
neg_peaks = neg_peaks.reshape(neg_peaks.size / 2, 2)

pos_peaks = numpy.fromfile(pos_peak_file, dtype=numpy.int32)
pos_peaks = pos_peaks.reshape(pos_peaks.size / 2, 2)

mads = numpy.fromfile('/tmp/mads.dat', dtype=numpy.float32)
t_max = mads.size / nb_channels
mads = mads[:t_max * nb_channels].reshape(t_max, nb_channels)

channel_to_show = 0

t_stop = (start_mad + 10) * nb_samples
Пример #42
0
def raw_patch_data_mat(map_name,
                       classifier,
                       num_patches,
                       ph,
                       pw,
                       batch_size,
                       n_channels,
                       save_dir,
                       file_name='raw_mat.npy'):
    """
    create (num_patches, n_channels, feats per channel) matrix of extracted patches
    """
    assert num_patches % batch_size == 0

    if classifier.lower() == 'vgg16':
        classifier = Vgg16()
        image_subdir = 'images_resized_224/'
        img_dims = [batch_size, 224, 224, 3]
    elif classifier.lower() == 'alexnet':
        classifier = AlexNet()
        image_subdir = 'images_resized_227/'
        img_dims = [batch_size, 227, 227, 3]
    else:
        raise NotImplementedError

    file_path = save_dir + file_name

    with tf.Graph().as_default() as graph:
        with tf.Session() as sess:

            img_pl = tf.placeholder(dtype=tf.float32,
                                    shape=img_dims,
                                    name='img_pl')
            classifier.build(img_pl, rescale=1.0)
            feat_map = graph.get_tensor_by_name(map_name)
            map_dims = [d.value for d in feat_map.get_shape()]
            n_feats_per_channel = ph * pw
            # n_features = n_feats_per_channel * map_dims[3]

            data_path = '../data/imagenet2012-validationset/'
            img_file = 'train_48k_images.txt'

            raw_mat = np.memmap(file_path,
                                dtype=np.float32,
                                mode='w+',
                                shape=(num_patches, n_channels,
                                       n_feats_per_channel))

            max_h = map_dims[1] - ph
            max_w = map_dims[2] - pw

            with open(data_path + img_file) as f:
                image_files = [k.rstrip() for k in f.readlines()]

            image_paths = [
                data_path + image_subdir + k[:-len('JPEG')] + 'bmp'
                for k in image_files
            ]
            img_mat = np.zeros(shape=img_dims)

            for count in range(num_patches // batch_size):

                for idx in range(batch_size):
                    img_path = image_paths[idx + (count * batch_size) %
                                           len(image_paths)]
                    img_mat[idx, :, :, :] = load_image(img_path, resize=False)

                if count == 0:
                    print('Verifying scale - this should be around 255: ',
                          np.max(img_mat))

                map_mat = sess.run(feat_map, feed_dict={img_pl: img_mat})

                for idx in range(batch_size):
                    h = np.random.randint(0, max_h)
                    w = np.random.randint(0, max_w)
                    map_patch = np.transpose(map_mat[idx, h:h + ph,
                                                     w:w + pw, :],
                                             axes=(2, 0, 1))
                    map_patch = map_patch.reshape([n_channels,
                                                   -1]).astype(np.float32)
                    raw_mat[idx + (count * batch_size), :, :] = map_patch

        del raw_mat

    raw_mat = np.memmap(file_path,
                        dtype=np.float32,
                        mode='r',
                        shape=(num_patches, n_channels, n_feats_per_channel))
    return raw_mat
Пример #43
0
def main(args, _=None):
    """Run the ``catalyst-data text2embeddings`` script."""
    batch_size = args.batch_size
    num_workers = args.num_workers
    max_length = args.max_length
    pooling_groups = args.pooling.split(",")

    utils.set_global_seed(args.seed)
    utils.prepare_cudnn(args.deterministic, args.benchmark)

    if hasattr(args, "in_huggingface"):
        model_config = BertConfig.from_pretrained(args.in_huggingface)
        model_config.output_hidden_states = args.output_hidden_states
        model = BertModel.from_pretrained(args.in_huggingface,
                                          config=model_config)
        tokenizer = BertTokenizer.from_pretrained(args.in_huggingface)
    else:
        model_config = BertConfig.from_pretrained(args.in_config)
        model_config.output_hidden_states = args.output_hidden_states
        model = BertModel(config=model_config)
        tokenizer = BertTokenizer.from_pretrained(args.in_vocab)
    if hasattr(args, "in_model"):
        checkpoint = utils.load_checkpoint(args.in_model)
        checkpoint = {"model_state_dict": checkpoint}
        utils.unpack_checkpoint(checkpoint=checkpoint, model=model)

    model = model.eval()
    model, _, _, _, device = utils.process_components(model=model)

    df = pd.read_csv(args.in_csv)
    df = df.dropna(subset=[args.txt_col])
    df.to_csv(f"{args.out_prefix}.df.csv", index=False)
    df = df.reset_index().drop("index", axis=1)
    df = list(df.to_dict("index").values())
    num_samples = len(df)

    open_fn = LambdaReader(
        input_key=args.txt_col,
        output_key=None,
        lambda_fn=partial(
            tokenize_text,
            strip=args.strip,
            lowercase=args.lowercase,
            remove_punctuation=args.remove_punctuation,
        ),
        tokenizer=tokenizer,
        max_length=max_length,
    )

    dataloader = utils.get_loader(
        df,
        open_fn,
        batch_size=batch_size,
        num_workers=num_workers,
    )

    features = {}
    dataloader = tqdm(dataloader) if args.verbose else dataloader
    with torch.no_grad():
        for idx, batch in enumerate(dataloader):
            batch = utils.any2device(batch, device)
            bert_output = model(**batch)
            mask = (batch["attention_mask"].unsqueeze(-1)
                    if args.mask_for_max_length else None)

            if utils.is_wrapped_with_ddp(model):
                # using several gpu
                hidden_size = model.module.config.hidden_size
                hidden_states = model.module.config.output_hidden_states

            else:
                # using cpu or one gpu
                hidden_size = model.config.hidden_size
                hidden_states = model.config.output_hidden_states

            features_ = process_bert_output(
                bert_output=bert_output,
                hidden_size=hidden_size,
                output_hidden_states=hidden_states,
                pooling_groups=pooling_groups,
                mask=mask,
            )

            # create storage based on network output
            if idx == 0:
                for key, value in features_.items():
                    name_ = key if isinstance(key, str) else f"{key:02d}"
                    _, embedding_size = value.shape
                    features[name_] = np.memmap(
                        f"{args.out_prefix}.{name_}.npy",
                        dtype=np.float32,
                        mode="w+",
                        shape=(num_samples, embedding_size),
                    )

            indices = np.arange(idx * batch_size,
                                min((idx + 1) * batch_size, num_samples))
            for key, value in features_.items():
                name_ = key if isinstance(key, str) else f"{key:02d}"
                features[name_][indices] = _detach(value)
Пример #44
0
def do_cuts(args):
  from root_optimize.timing import secondsToStr

  # before doing anything, let's ensure the directory we make is ok
  if not os.path.exists(args.output_directory):
    os.makedirs(args.output_directory)
  elif args.overwrite:
    import shutil
    shutil.rmtree(args.output_directory)
  else:
    raise IOError("Output directory already exists: {0:s}".format(args.output_directory))

  # first step is to group by the sample DID
  dids = defaultdict(list)
  for fname in args.files:
    dids[utils.get_did(fname)].append(fname)

  # load in the supercuts file
  supercuts = utils.read_supercuts_file(args.supercuts)

  # load up the weights file
  if not os.path.isfile(args.weightsFile):
    raise ValueError('The supplied weights file `{0}` does not exist or I cannot find it.'.format(args.weightsFile))
  else:
    weights = json.load(file(args.weightsFile))

  # parallelize
  num_cores = min(multiprocessing.cpu_count(), args.num_cores)
  logger.log(25, "Using {0} cores".format(num_cores) )

  pids = None
  # if pids is None, do_cut() will disable the progress
  if not args.hide_subtasks:
    from numpy import memmap, uint64
    pids = memmap(os.path.join(tempfile.mkdtemp(), 'pids'), dtype=uint64, shape=num_cores, mode='w+')

  overall_progress = tqdm.tqdm(total=len(dids), desc='Num. files', position=0, leave=True, unit='file', dynamic_ncols=True)
  class CallBack(object):
    completed = defaultdict(int)

    def __init__(self, index, parallel):
      self.index = index
      self.parallel = parallel

    def __call__(self, index):
      CallBack.completed[self.parallel] += 1
      overall_progress.update()
      overall_progress.refresh()
      if self.parallel._original_iterable:
        self.parallel.dispatch_next()

  import joblib.parallel
  joblib.parallel.CallBack = CallBack

  results = Parallel(n_jobs=num_cores)(delayed(utils.do_cut)(did, files, supercuts, weights, args.tree_name, args.output_directory, args.eventWeightBranch, args.numpy, pids) for did, files in dids.iteritems())

  overall_progress.close()

  for did, result in zip(dids, results):
    logger.log(25, 'DID {0:s}: {1:s}'.format(did, 'ok' if result[0] else 'not ok'))

  logger.log(25, "Total CPU elapsed time: {0}".format(secondsToStr(sum(result[1] for result in results))))

  return True
Пример #45
0
    # t1 = time.clock()
    # print(t1)
    for epoch in range(epoch_num):
        print("############### Epoch",str(epoch+1)," #################")

        train_loss_online=[]
        train_loss = []
        dev_loss = []

        print("______________Training_________________")
        for i in range(6): ###needmod 20
            print("=============================================")
            x_train_file = xFile + str(i+1)+".dat"
            # print("Loading x training data from",x_train_file)
            # x_train = np.memmap(x_train_file, dtype='float', mode='r', shape=(350, 128, 64, 64, 1))
            x_train_load = np.memmap(x_train_file, dtype='float', mode='r', shape=(350, 128, 64, 64, 1))
            x_train = x_train_load.copy()
            del x_train_load
            print(x_train.shape)

            y_train_file = yFile + str(i+1)+".npy"
            # print("Loading y training data from", y_train_file)
            y_train_load = np.load(y_train_file)
            y_train = y_train_load.copy()
            del y_train_load
            # print(y_train.shape)

            # # TODO: Delete
            # x_train = x_train[:100] ###
            # y_train = y_train[:100] ###
            print("Training model on training data",str(i+1)+"/6 ...")
Пример #46
0
def GetData(filename):
    return np.memmap(filename, dtype='h', mode='r', offset=44)
Пример #47
0
MODEL += '_normed'
PKL_ID = './ID.pkl'
#MEM_DATA = 'data.fbank.memmap'
PGRAM_ROOT = 'dnn_result/posteriorgram/'
DNN_MODEL = 'Angus_2'
MEM_PGRAM = PGRAM_ROOT + DNN_MODEL + '.pgram'
MEM_LABEL = 'label.memmap'
MEM_PGRAM_shape = (1124823, 48)
STATE_LENGTH = 1943
PHONE_LENGTH = 48
#LABEL_VARIETY = 1943
LABEL_VARIETY = 48

print "Reading data..."
mem_pgram = np.memmap(MEM_PGRAM,
                      dtype='float32',
                      mode='r',
                      shape=MEM_PGRAM_shape)
mem_label = np.memmap(MEM_LABEL, dtype='int16', mode='r', shape=(1124823, ))
IDs = readID(PKL_ID)
idx = 0
IDs_utter = []
while idx <= len(IDs) - 1:
    IDs_utter.append(["_".join(IDs[idx][0].split('_')[0:2]), IDs[idx][1]])
    #IDs_utter = [utter_name,utter_max]
    idx += IDs[idx][1]

print "Preparing pickList..."
pickList = range(0, len(IDs_utter))
pickList = shuffle(pickList)
frame_max = max(IDs_utter, key=lambda x: x[1])
train_data_length = len(pickList) * VAL_SET_RATIO
Пример #48
0
def file_reader(filename, endianess='<', **kwds):
    metadata = {}
    f = open(filename, 'rb')
    std_header = np.fromfile(f, dtype=get_std_dtype_list(endianess), count=1)
    fei_header = None
    if std_header['NEXT'] / 1024 == 128:
        print "It seems to contain an extended FEI header"
        fei_header = np.fromfile(f,
                                 dtype=get_fei_dtype_list(endianess),
                                 count=1024)
    if f.tell() == 1024 + std_header['NEXT']:
        print "The FEI header was correctly loaded"
    else:
        print "There was a problem reading the extended header"
        f.seek(1024 + std_header['NEXT'])
        fei_header = None
    NX, NY, NZ = std_header['NX'], std_header['NY'], std_header['NZ']
    data = np.memmap(f,
                     mode='c',
                     offset=f.tell(),
                     dtype=get_data_type(std_header['MODE'],
                                         endianess)).squeeze().reshape(
                                             (NX, NY, NZ), order='F').T

    original_metadata = {'std_header': sarray2dict(std_header)}
    if fei_header is not None:
        fei_dict = sarray2dict(fei_header, )
        del fei_dict['empty']
        original_metadata['fei_header'] = fei_dict

    dim = len(data.shape)
    if fei_header is None:
        # The scale is in Amstrongs, we convert it to nm
        scales = [
            10 * float(std_header['Zlen'] / std_header['MZ'])
            if float(std_header['MZ']) != 0 else 1,
            10 * float(std_header['Ylen'] / std_header['MY'])
            if float(std_header['MY']) != 0 else 1,
            10 * float(std_header['Xlen'] / std_header['MX'])
            if float(std_header['MX']) != 0 else 1,
        ]
        offsets = [
            10 * float(std_header['ZORIGIN']),
            10 * float(std_header['YORIGIN']),
            10 * float(std_header['XORIGIN']),
        ]

    else:
        # FEI does not use the standard header to store the scale
        # It does store the spatial scale in pixel_size, one per angle in
        # meters
        scales = [
            1,
        ] + [
            fei_header['pixel_size'][0] * 10**9,
        ] * 2
        offsets = [
            0,
        ] * 3

    units = [Undefined, 'nm', 'nm']
    names = ['z', 'y', 'x']
    metadata = {
        'General': {
            'original_filename': os.path.split(filename)[1]
        },
        "Signal": {
            'signal_type': "",
            'record_by': 'image',
        },
    }
    # create the axis objects for each axis
    axes = [{
        'size': data.shape[i],
        'index_in_array': i,
        'name': names[i + 3 - dim],
        'scale': scales[i + 3 - dim],
        'offset': offsets[i + 3 - dim],
        'units': units[i + 3 - dim],
    } for i in xrange(dim)]

    dictionary = {
        'data': data,
        'axes': axes,
        'metadata': metadata,
        'original_metadata': original_metadata,
    }

    return [
        dictionary,
    ]
Пример #49
0
                            if 'EC_number' in feature.qualifiers.keys():
                                eci = eci + 1

                output[i] = eci
                print d, 'has', eci, 'features'

            ## For some assemblies an error is raised on a second(?) record identified
            ## in the Genbank file.  It isn't clear why this is happening, pass the error
            ## here.

            except AttributeError:
                pass


sums = np.memmap(open('tmp.paprica.mmp', 'w+b'),
                 shape=genome_data.index.shape[0],
                 dtype='uint64')

Parallel(n_jobs=-1)(delayed(count_ec)(sums, i)
                    for i in range(0, len(genome_data.index)))

eci = int(
    sums.sum() * 2
)  # Count_ec is undercounting and not clear why.  Multiply by 2 to insure large enough array.

## Delete mmp

os.remove('tmp.paprica.mmp')

## Create numpy array for data and a 1D array that will become dataframe index.
## You can probably parallelize this as above, but going to take some effort.
img_list = []
count_imgs = [0] * 3
with open('./data/celeba/list_eval_partition.txt', 'r') as fp:
    for line in fp:
        img_name, img_set = line.split(' ')
        img_set = int(img_set)
        count_imgs[img_set] = count_imgs[img_set] + 1
        img_list.append([img_name, img_set])

imgs_dir = './data/celeba/img_align_celeba/'

data_mean = 0.431751299266
data_std = 0.300219581459

train_x = np.memmap('.tmp_celeba_train.npy', np.float32, 'w+',
                    shape=(count_imgs[0] + count_imgs[1], 64, 64, 3))
test_x = np.memmap('.tmp_celeba_test.npy', np.float32, 'w+',
                   shape=(count_imgs[2], 64, 64, 3))

train_count = 0
test_count = 0
for i in img_list:
    img_name, img_set = i
    img = skimage.transform.resize(plt.imread(imgs_dir + img_name), (64, 64))
    img = (img - data_mean) / data_std

    if img_set == 2:
        test_x[test_count] = img
        test_count = test_count + 1
    else:
        train_x[train_count] = img
Пример #51
0
NSQUARES = int(sys.argv[1]) 

# Initialize
img = numpy.zeros((N, N), numpy.uint8)
centers = numpy.random.random_integers(0, N, size=(NSQUARES, 2))
radii = numpy.random.randint(0, N/9, size=NSQUARES)
colors = numpy.random.randint(100, 255, size=NSQUARES)

# Generate squares
for i in xrange(NSQUARES):
   xindices = range(centers[i][0] - radii[i], centers[i][0] + radii[i])
   xindices = numpy.clip(xindices, 0, N - 1)
   yindices = range(centers[i][1] - radii[i], centers[i][1] + radii[i])
   yindices = numpy.clip(yindices, 0, N - 1)

   if len(xindices) == 0 or len(yindices) == 0:
      continue

   coordinates = numpy.meshgrid(xindices, yindices)
   img[coordinates] = colors[i]

# Load into memory map
img.tofile('random_squares.raw')
img_memmap = numpy.memmap('random_squares.raw', shape=img.shape)

# Display image
matplotlib.pyplot.imshow(img_memmap)
matplotlib.pyplot.axis('off')
matplotlib.pyplot.show()
Пример #52
0
    if x2y2 < R**2 and x2y2 > 0.0:
        weight = (0.25 / np.pi)**2
        zlim = np.sqrt(R**2 - x2y2)
        integral = integ.quad(integrand, -zlim, zlim, args=(kapparho, x2y2))[0]
        return weight * np.exp(
            -kapparho * zlim) * integral * kapparho * dx * dy
    else:
        if x2y2 == 0.0 and args.direct_light:
            return 0.25 * np.exp(-kapparho * R) / np.pi
        else:
            return 0.0


# open the image file
data = np.memmap(args.file,
                 dtype=np.float64,
                 shape=(args.nx, args.ny),
                 mode="r")

profile_data = data.reshape(-1)
xy = np.array(
    np.meshgrid(np.linspace(-1.0, 1.0, args.nx),
                np.linspace(-1.0, 1.0, args.ny))).transpose()
xy = xy.reshape((-1, 2))
profile_radius = np.sqrt(xy[:, 0]**2 + xy[:, 1]**2)

image_xy = xy.reshape((args.nx, args.ny, 2))
image_data = data.reshape((args.nx, args.ny))

ra = np.linspace(0.0, 1.0, 100)
dx = 2.0 / args.nx
dy = 2.0 / args.ny
Пример #53
0
    def _parse_header(self):
        with io.open(self.filename, 'rb') as fid:
            f = StructFile(fid)

            # Name
            f.seek(64)
            surname = f.read(22).strip(b' ')
            firstname = f.read(20).strip(b' ')

            # Date
            day, month, year, hour, minute, sec = f.read_f('bbbbbb',
                                                           offset=128)
            rec_datetime = datetime.datetime(year + 1900, month, day, hour,
                                             minute, sec)

            Data_Start_Offset, Num_Chan, Multiplexer, Rate_Min, Bytes = f.read_f(
                'IHHHH', offset=138)

            # header version
            header_version, = f.read_f('b', offset=175)
            assert header_version == 4

            # area
            f.seek(176)
            zone_names = [
                'ORDER', 'LABCOD', 'NOTE', 'FLAGS', 'TRONCA', 'IMPED_B',
                'IMPED_E', 'MONTAGE', 'COMPRESS', 'AVERAGE', 'HISTORY',
                'DVIDEO', 'EVENT A', 'EVENT B', 'TRIGGER'
            ]
            zones = {}
            for zname in zone_names:
                zname2, pos, length = f.read_f('8sII')
                zones[zname] = zname2, pos, length
                assert zname == zname2.decode('ascii').strip(' ')

            # raw signals memmap
            sig_dtype = 'u' + str(Bytes)
            self._raw_signals = np.memmap(self.filename,
                                          dtype=sig_dtype,
                                          mode='r',
                                          offset=Data_Start_Offset).reshape(
                                              -1, Num_Chan)

            # Reading Code Info
            zname2, pos, length = zones['ORDER']
            f.seek(pos)
            code = np.frombuffer(f.read(Num_Chan * 2), dtype='u2')

            units_code = {
                -1: 'nV',
                0: 'uV',
                1: 'mV',
                2: 1,
                100: 'percent',
                101: 'dimensionless',
                102: 'dimensionless'
            }
            sig_channels = []
            sig_grounds = []
            for c in range(Num_Chan):
                zname2, pos, length = zones['LABCOD']
                f.seek(pos + code[c] * 128 + 2, 0)

                chan_name = f.read(6).strip(b"\x00").decode('ascii')
                ground = f.read(6).strip(b"\x00").decode('ascii')
                sig_grounds.append(ground)
                logical_min, logical_max, logical_ground, physical_min, physical_max = f.read_f(
                    'iiiii')
                k, = f.read_f('h')
                units = units_code.get(k, 'uV')

                factor = float(physical_max -
                               physical_min) / float(logical_max -
                                                     logical_min + 1)
                gain = factor
                offset = -logical_ground * factor

                f.seek(8, 1)
                sampling_rate, = f.read_f('H')
                sampling_rate *= Rate_Min
                chan_id = c
                group_id = 0
                sig_channels.append((chan_name, chan_id, sampling_rate,
                                     sig_dtype, units, gain, offset, group_id))

            sig_channels = np.array(sig_channels, dtype=_signal_channel_dtype)
            assert np.unique(sig_channels['sampling_rate']).size == 1
            self._sampling_rate = float(
                np.unique(sig_channels['sampling_rate'])[0])

            # Event channels
            event_channels = []
            event_channels.append(('Trigger', '', 'event'))
            event_channels.append(('Note', '', 'event'))
            event_channels.append(('Event A', '', 'epoch'))
            event_channels.append(('Event B', '', 'epoch'))
            event_channels = np.array(event_channels,
                                      dtype=_event_channel_dtype)

            # Read trigger and notes
            self._raw_events = []
            ev_dtypes = [
                ('TRIGGER', [('start', 'u4'), ('label', 'u2')]),
                ('NOTE', [('start', 'u4'), ('label', 'S40')]),
                ('EVENT A', [('label', 'u4'), ('start', 'u4'),
                             ('stop', 'u4')]),
                ('EVENT B', [('label', 'u4'), ('start', 'u4'),
                             ('stop', 'u4')]),
            ]
            for zname, ev_dtype in ev_dtypes:
                zname2, pos, length = zones[zname]
                dtype = np.dtype(ev_dtype)
                rawevent = np.memmap(self.filename,
                                     dtype=dtype,
                                     mode='r',
                                     offset=pos,
                                     shape=length // dtype.itemsize)

                keep = (rawevent['start'] >= rawevent['start'][0]) & (
                    rawevent['start'] <
                    self._raw_signals.shape[0]) & (rawevent['start'] != 0)
                rawevent = rawevent[keep]
                self._raw_events.append(rawevent)

            # No spikes
            unit_channels = []
            unit_channels = np.array(unit_channels, dtype=_unit_channel_dtype)

            # fille into header dict
            self.header = {}
            self.header['nb_block'] = 1
            self.header['nb_segment'] = [1]
            self.header['signal_channels'] = sig_channels
            self.header['unit_channels'] = unit_channels
            self.header['event_channels'] = event_channels

            # insert some annotation at some place
            self._generate_minimal_annotations()
            bl_annotations = self.raw_annotations['blocks'][0]
            seg_annotations = bl_annotations['segments'][0]

            for d in (bl_annotations, seg_annotations):
                d['rec_datetime'] = rec_datetime
                d['firstname'] = firstname
                d['surname'] = surname
                d['header_version'] = header_version

            for c in range(sig_channels.size):
                anasig_an = seg_annotations['signals'][c]
                anasig_an['ground'] = sig_grounds[c]
                channel_an = self.raw_annotations['signal_channels'][c]
                channel_an['ground'] = sig_grounds[c]
Пример #54
0
def main():
    import numpy as np
    import os, sys, time, getopt
    from auxil import subset
    from ipyparallel import Client
    from osgeo import gdal
    from osgeo.gdalconst import GA_ReadOnly, GDT_Byte
    from tempfile import NamedTemporaryFile
    usage = '''
Usage:
------------------------------------------------

Sequential change detection for polarimetric SAR images

python %s [OPTIONS]  infiles* outfile enl

Options:
  
  -h           this help
  -m           run 3x3 median filter on p-values prior to thresholding (e.g. for noisy satellite data)  
  -d  <list>   files are to be co-registered to a subset dims = [x0,y0,rows,cols] of the first image, otherwise
               it is assumed that the images are co-registered and have identical spatial dimensions  
  -s  <float>  significance level for change detection (default 0.0001)

infiles:

  full paths to all input files: /path/to/infile_1 /path/to/infile_1 ... /path/to/infile_k
  
outfile:

  without path (will be written to same directory as infile_1)
  
enl:

  equivalent number of looks

-------------------------------------------------''' % sys.argv[0]

    options, args = getopt.getopt(sys.argv[1:], 'hmd:s:')
    medianfilter = False
    dims = None
    significance = 0.0001
    for option, value in options:
        if option == '-h':
            print usage
            return
        elif option == '-m':
            medianfilter = True
        elif option == '-d':
            dims = eval(value)
        elif option == '-s':
            significance = eval(value)
    k = len(args) - 2
    fns = args[0:k]
    n = np.float64(eval(args[-1]))
    outfn = args[-2]
    gdal.AllRegister()
    start = time.time()
    #  first SAR image
    try:
        inDataset1 = gdal.Open(fns[0], GA_ReadOnly)
        cols = inDataset1.RasterXSize
        rows = inDataset1.RasterYSize
        bands = inDataset1.RasterCount
    except Exception as e:
        print 'Error: %s  -- Could not read file' % e
        sys.exit(1)
    if dims is not None:
        #  images are not yet co-registered, so subset first image and register the others
        _, _, cols, rows = dims
        fn0 = subset.subset(fns[0], dims)
        args1 = [(fns[0], fns[i], dims) for i in range(1, k)]
        try:
            print ' \nattempting parallel execution of co-registration ...'
            start1 = time.time()
            c = Client()
            print 'available engines %s' % str(c.ids)
            v = c[:]
            v.execute('from registersar import register')
            fns = v.map_sync(call_register, args1)
            print 'elapsed time for co-registration: ' + str(time.time() -
                                                             start1)
        except Exception as e:
            start1 = time.time()
            print '%s \nFailed, so running sequential co-registration ...' % e
            fns = map(call_register, args1)
            print 'elapsed time for co-registration: ' + str(time.time() -
                                                             start)
        fns.insert(0, fn0)
        #      point inDataset1 to the subset image for correct georefrerencing
        inDataset1 = gdal.Open(fn0, GA_ReadOnly)
    print '==============================================='
    print '     Multi-temporal SAR Change Detection'
    print '==============================================='
    print time.asctime()
    print 'First (reference) filename:  %s' % fns[0]
    print 'number of images: %i' % k
    print 'equivalent number of looks: %f' % n
    print 'significance level: %f' % significance
    if bands == 9:
        print 'Quad ploarization'
    elif bands == 4:
        print 'Dual polarizaton'
    elif bands == 3:
        print 'Quad polarization, diagonal only'
    elif bands == 2:
        print 'Dual polarization, diagonal only'
    else:
        print 'Intensity image'


#  output file
    path = os.path.abspath(fns[0])
    dirn = os.path.dirname(path)
    outfn = dirn + '/' + outfn
    #  create temporary, memory-mapped array of change indices p(Ri<ri)
    mm = NamedTemporaryFile()
    pvarray = np.memmap(mm.name,
                        dtype=np.float64,
                        mode='w+',
                        shape=(k - 1, k - 1, rows * cols))
    lnQs = np.zeros(k - 1)
    print 'pre-calculating Rj and p-values ...'
    start1 = time.time()
    try:
        print 'attempting parallel calculation ...'
        c = Client()
        print 'available engines %s' % str(c.ids)
        v = c[:]
        print 'ell = ',
        sys.stdout.flush()
        for i in range(k - 1):
            print i + 1,
            sys.stdout.flush()
            args1 = [(fns[i:j + 2], n, cols, rows, bands)
                     for j in range(i, k - 1)]
            results = v.map_sync(PV, args1)  # list of tuples (p-value, lnRj)
            pvs = [result[0] for result in results]
            lnRjs = np.array([result[1] for result in results])
            lnQs[i] = np.sum(lnRjs)
            if medianfilter:
                pvs = v.map_sync(call_median_filter, pvs)
            for j in range(i, k - 1):
                pvarray[i, j, :] = pvs[j - i].ravel()
    except Exception as e:
        print '%s \nfailed, so running sequential calculation ...' % e
        print 'ell= ',
        sys.stdout.flush()
        for i in range(k - 1):
            print i + 1,
            sys.stdout.flush()
            args1 = [(fns[i:j + 2], n, cols, rows, bands)
                     for j in range(i, k - 1)]
            results = map(PV, args1)  # list of tuples (p-value, lnRj)
            pvs = [result[0] for result in results]
            lnRjs = np.array([result[1] for result in results])
            lnQs[i] = np.sum(lnRjs)
            if medianfilter:
                pvs = map(call_median_filter, pvs)
            for j in range(i, k - 1):
                pvarray[i, j, :] = pvs[j - i].ravel()
    print '\nelapsed time for p-value calculation: ' + str(time.time() -
                                                           start1)
    cmap, smap, fmap, bmap = change_maps(pvarray, significance)
    #  write to file system
    cmap = np.reshape(cmap, (rows, cols))
    fmap = np.reshape(fmap, (rows, cols))
    smap = np.reshape(smap, (rows, cols))
    bmap = np.reshape(bmap, (rows, cols, k - 1))
    driver = inDataset1.GetDriver()
    basename = os.path.basename(outfn)
    name, _ = os.path.splitext(basename)
    outfn1 = outfn.replace(name, name + '_cmap')
    outDataset = driver.Create(outfn1, cols, rows, 1, GDT_Byte)
    geotransform = inDataset1.GetGeoTransform()
    if geotransform is not None:
        outDataset.SetGeoTransform(geotransform)
    projection = inDataset1.GetProjection()
    if projection is not None:
        outDataset.SetProjection(projection)
    outBand = outDataset.GetRasterBand(1)
    outBand.WriteArray(cmap, 0, 0)
    outBand.FlushCache()
    print 'last change map written to: %s' % outfn1
    outfn2 = outfn.replace(name, name + '_fmap')
    outDataset = driver.Create(outfn2, cols, rows, 1, GDT_Byte)
    if geotransform is not None:
        outDataset.SetGeoTransform(geotransform)
    if projection is not None:
        outDataset.SetProjection(projection)
    outBand = outDataset.GetRasterBand(1)
    outBand.WriteArray(fmap, 0, 0)
    outBand.FlushCache()
    print 'frequency map written to: %s' % outfn2
    outfn3 = outfn.replace(name, name + '_bmap')
    outDataset = driver.Create(outfn3, cols, rows, k - 1, GDT_Byte)
    if geotransform is not None:
        outDataset.SetGeoTransform(geotransform)
    if projection is not None:
        outDataset.SetProjection(projection)
    for i in range(k - 1):
        outBand = outDataset.GetRasterBand(i + 1)
        outBand.WriteArray(bmap[:, :, i], 0, 0)
        outBand.FlushCache()
    print 'bitemporal map image written to: %s' % outfn3
    outfn4 = outfn.replace(name, name + '_smap')
    outDataset = driver.Create(outfn4, cols, rows, 1, GDT_Byte)
    if geotransform is not None:
        outDataset.SetGeoTransform(geotransform)
    if projection is not None:
        outDataset.SetProjection(projection)
    outBand = outDataset.GetRasterBand(1)
    outBand.WriteArray(smap, 0, 0)
    outBand.FlushCache()
    print 'first change map written to: %s' % outfn4
    print 'total elapsed time: ' + str(time.time() - start)
    outDataset = None
    inDataset1 = None
Пример #55
0
    def __init__(self,
                 training_path,
                 epoch,
                 tokenizer,
                 num_data_epochs,
                 reduce_memory=False):
        self.vocab = tokenizer.vocab
        self.tokenizer = tokenizer
        self.epoch = epoch
        self.data_epoch = int(epoch % num_data_epochs)
        logger.info('training_path: {}'.format(training_path))
        data_file = training_path / "epoch_{}.json".format(self.data_epoch)
        metrics_file = training_path / "epoch_{}_metrics.json".format(
            self.data_epoch)

        logger.info('data_file: {}'.format(data_file))
        logger.info('metrics_file: {}'.format(metrics_file))

        assert data_file.is_file() and metrics_file.is_file()
        metrics = json.loads(metrics_file.read_text())
        num_samples = metrics['num_training_examples']
        seq_len = metrics['max_seq_len']
        self.temp_dir = None
        self.working_dir = None
        if reduce_memory:
            self.temp_dir = TemporaryDirectory()
            self.working_dir = Path('/cache')
            input_ids = np.memmap(filename=self.working_dir /
                                  'input_ids.memmap',
                                  mode='w+',
                                  dtype=np.int32,
                                  shape=(num_samples, seq_len))
            input_masks = np.memmap(filename=self.working_dir /
                                    'input_masks.memmap',
                                    shape=(num_samples, seq_len),
                                    mode='w+',
                                    dtype=np.bool)
            segment_ids = np.memmap(filename=self.working_dir /
                                    'segment_ids.memmap',
                                    shape=(num_samples, seq_len),
                                    mode='w+',
                                    dtype=np.bool)
            lm_label_ids = np.memmap(filename=self.working_dir /
                                     'lm_label_ids.memmap',
                                     shape=(num_samples, seq_len),
                                     mode='w+',
                                     dtype=np.int32)
            lm_label_ids[:] = -1
            is_nexts = np.memmap(filename=self.working_dir / 'is_nexts.memmap',
                                 shape=(num_samples, ),
                                 mode='w+',
                                 dtype=np.bool)
        else:
            input_ids = np.zeros(shape=(num_samples, seq_len), dtype=np.int32)
            input_masks = np.zeros(shape=(num_samples, seq_len), dtype=np.bool)
            segment_ids = np.zeros(shape=(num_samples, seq_len), dtype=np.bool)
            lm_label_ids = np.full(shape=(num_samples, seq_len),
                                   dtype=np.int32,
                                   fill_value=-1)
            is_nexts = np.zeros(shape=(num_samples, ), dtype=np.bool)

        logging.info("Loading training examples for epoch {}".format(epoch))

        with data_file.open() as f:
            for i, line in enumerate(
                    tqdm(f, total=num_samples, desc="Training examples")):
                line = line.strip()
                example = json.loads(line)
                features = convert_example_to_features(example, tokenizer,
                                                       seq_len)
                input_ids[i] = features.input_ids
                segment_ids[i] = features.segment_ids
                input_masks[i] = features.input_mask
                lm_label_ids[i] = features.lm_label_ids
                is_nexts[i] = features.is_next

        # assert i == num_samples - 1  # Assert that the sample count metric was true
        logging.info("Loading complete!")
        self.num_samples = num_samples
        self.seq_len = seq_len
        self.input_ids = input_ids
        self.input_masks = input_masks
        self.segment_ids = segment_ids
        self.lm_label_ids = lm_label_ids
        self.is_nexts = is_nexts
def create_np_memmap_file(path, column_size, row_size):
    os.makedirs(os.path.dirname(path), exist_ok=True)
    np.memmap(path, dtype='float32', mode='w+', shape=(column_size, row_size))
Пример #57
0
    def testTokenize(self):
        import shutil
        import tempfile

        class TestEnum(Enum):
            VAL1 = 'val1'

        tempdir = tempfile.mkdtemp('mars_test_utils_')
        try:
            filename = os.path.join(tempdir, 'test_npa.dat')
            mmp_array = np.memmap(filename,
                                  dtype=float,
                                  mode='w+',
                                  shape=(3, 4))
            mmp_array[:] = np.random.random((3, 4)).astype(float)
            mmp_array.flush()
            del mmp_array

            mmp_array1 = np.memmap(filename, dtype=float, shape=(3, 4))
            mmp_array2 = np.memmap(filename, dtype=float, shape=(3, 4))

            try:
                v = [
                    1, 2.3, '456', u'789', b'101112', None, np.ndarray,
                    [912, 'uvw'],
                    np.arange(0, 10),
                    np.array(10),
                    np.array([b'\x01\x32\xff']), np.int64, TestEnum.VAL1
                ]
                copy_v = copy.deepcopy(v)
                self.assertEqual(
                    utils.tokenize(v + [mmp_array1], ext_data=1234),
                    utils.tokenize(copy_v + [mmp_array2], ext_data=1234))
            finally:
                del mmp_array1, mmp_array2
        finally:
            shutil.rmtree(tempdir)

        v = {'a', 'xyz', 'uvw'}
        self.assertEqual(utils.tokenize(v), utils.tokenize(copy.deepcopy(v)))

        v = dict(x='abcd', y=98765)
        self.assertEqual(utils.tokenize(v), utils.tokenize(copy.deepcopy(v)))

        v = dict(x=dict(a=1, b=[1, 2, 3]), y=12345)
        self.assertEqual(utils.tokenize(v), utils.tokenize(copy.deepcopy(v)))

        # pandas relative
        if pd is not None:
            df = pd.DataFrame([[utils.to_binary('测试'),
                                utils.to_text('数据')]],
                              index=['a'],
                              columns=['中文', 'data'])
            v = [
                df, df.index, df.columns, df['data'],
                pd.Categorical(list('ABCD'))
            ]
            self.assertEqual(utils.tokenize(v),
                             utils.tokenize(copy.deepcopy(v)))

        non_tokenizable_cls = type('non_tokenizable_cls', (object, ), {})
        with self.assertRaises(TypeError):
            utils.tokenize(non_tokenizable_cls())

        class CustomizedTokenize(object):
            def __mars_tokenize__(self):
                return id(type(self)), id(non_tokenizable_cls)

        self.assertEqual(utils.tokenize(CustomizedTokenize()),
                         utils.tokenize(CustomizedTokenize()))

        v = lambda x: x + 1
        self.assertEqual(utils.tokenize(v), utils.tokenize(copy.deepcopy(v)))

        def f(a, b):
            return np.add(a, b)

        self.assertEqual(utils.tokenize(f), utils.tokenize(copy.deepcopy(f)))

        partial_f = partial(f, 1)
        self.assertEqual(utils.tokenize(partial_f),
                         utils.tokenize(copy.deepcopy(partial_f)))
import numpy
import matplotlib.pyplot as plt
data = numpy.memmap("output2.wav", dtype='h', mode='r')
#dtype of h is: h              : <type 'numpy.int16'>
#this can be found with
#for k,v in np.sctypeDict.iteritems(): print '{0:14s} : {1:40s}'.format(str(k), v)
print "VALUES:", data
plt.plot(data)
plt.show()
Пример #59
0
 def create_mask(self, shape):
     print "Creating a mask"
     self.temp_file = tempfile.mktemp()
     shape = shape[0] + 1, shape[1] + 1, shape[2] + 1
     self.matrix = numpy.memmap(self.temp_file, mode='w+', dtype='uint8', shape=shape)
Пример #60
0
yt = {}
for fold in range(lstm.stratifications):
    #for fold in range(1):
    print("============ fold {}".format(fold))
    yp[fold] = {}
    ycn[fold] = {}
    yc[fold] = {}
    ytn[fold] = {}
    yt[fold] = {}
    for task in tasks:
        #for task in tasks[:1]:
        print("-------- task {}".format(task))
        XTest = lstm.getXTest(fold, task)
        yp[fold][task] = np.memmap(
            "./tmp/yp-" + str(fold) + "-" + str(task) + ".dat",
            mode='w+',
            shape=(XTest.shape[0], lstm.model[fold][task].output_shape[1]),
            dtype=np.float)

        if not task in ftTasks:
            yp[fold][task][:] = lstm.model[fold][task].predict_proba(XTest)
            ycn[fold][task] = lstm.model[fold][task].predict_classes(XTest)
        else:
            XTestFT = lstm.getXTestFT(fold, task)
            yp[fold][task][:] = lstm.model[fold][task].predict_proba(
                [XTestFT, XTest])
            ycn[fold][task] = lstm.model[fold][task].predict_classes(
                [XTestFT, XTest])

        if not task in ftTasks:
            yt[fold][task] = lstm.getYTestName(