Пример #1
0
    def rearrangement(self, orignal_X, target_mp):

        """
        Re-Arragement feature maps X from orignal_mp's to target_mp's style, in case that feature already extracted but the position need to be refit and rearrangement.

        parameters
        -------------------
        orignal_X: the feature values transformed from orignal_mp(object self)
        target_mp: the target feature map object

        return
        -------------
        target_X, shape is (N, W, H, C)
        """
        assert self.flist == target_mp.flist, print_error('Input features list is different, can not re-arrangement, check your flist by mp.flist method' )
        assert len(orignal_X.shape) == 4, print_error('Input X has error shape, please reshape to (samples, w, h, channels)')
        
        idx = self._S.df.sort_values('indices').idx.tolist()
        idx = np.argsort(idx)

        N = len(orignal_X) #number of sample
        M = len(self.flist) # number of features
        res = []
        for i in tqdm(range(N), ascii=True):
            x = orignal_X[i].sum(axis=-1)
            vector_1d_ordered = x.reshape(-1,)
            vector_1d_ordered = vector_1d_ordered[:M]
            vector_1d = vector_1d_ordered[idx]
            fmap = target_mp._S.transform(vector_1d)
            res.append(fmap)
        return np.stack(res)
Пример #2
0
 def batch_transform(self, 
                     array_2d, 
                     scale = True, 
                     scale_method = 'minmax',
                     n_jobs=4):
 
     """
     parameters
     --------------------
     array_2d: 2D numpy array feature points, M(samples) x N(feature ponits)
     scale: bool, if True, we will apply MinMax scaling by the precomputed values
     scale_method: {'minmax', 'standard'}
     n_jobs: number of parallel
     """
     
     if not self.isfit:
         print_error('please fit first!')
         return
     
     assert type(array_2d) == np.ndarray, 'input must be numpy ndarray!' 
     assert array_2d.ndim == 2, 'input must be 2-D  numpy array!' 
     
     P = Parallel(n_jobs=n_jobs)
     res = P(delayed(self.transform)(arr_1d, 
                                     scale,
                                     scale_method) for arr_1d in tqdm(array_2d, ascii=True)) 
     X = np.stack(res) 
     
     return X
Пример #3
0
 def swap_log(swap, error=True):
     sinfo = []
     for l in swap.split('\n'):
         if l == '':
             continue
         sinfo.append(l)
     for o in sinfo:
         if error:
             print_error(o)
         else:
             print_info(o)
     return
Пример #4
0
    def transform(self, 
                  arr_1d, 
                  scale = True, 
                  scale_method = 'minmax',):
    
    
        """
        parameters
        --------------------
        arr_1d: 1d numpy array feature points
        scale: bool, if True, we will apply MinMax scaling by the precomputed values
        scale_method: {'minmax', 'standard'}
        """
        
        if not self.isfit:
            print_error('please fit first!')
            return

        if scale:
            if scale_method == 'standard':
                arr_1d = self.StandardScaler(arr_1d, self.x_mean, self.x_std)
            else:
                arr_1d = self.MinMaxScaleClip(arr_1d, self.x_min, self.x_max)
        
        df = pd.DataFrame(arr_1d).T
        df.columns = self.alist
        
        df = df[self.flist]
        vector_1d = df.values[0] #shape = (N, )
        fmap = self._S.transform(vector_1d)  
        p, q, c = fmap.shape
        
        if self.fmap_shape != None:        
            m, n = self.fmap_shape
            if (m > p) | (n > q):
                fps = []
                for i in range(c):
                    fp = smartpadding(fmap[:,:,i], self.fmap_shape)
                    fps.append(fp)
                fmap = np.stack(fps, axis=-1)
        return np.nan_to_num(fmap)   
Пример #5
0
    def transform(self, 
                  smiles, 
                  scale = True, 
                  scale_method = 'minmax',):
    
    
        """
        parameters
        --------------------
        smiles:smiles string of compound
        scale: bool, if True, we will apply MinMax scaling by the precomputed values
        scale_method: {'minmax', 'standard'}
        """
        
        if not self.isfit:
            print_error('please fit first!')
            return

        arr = self.extract.transform(smiles)
        df = pd.DataFrame(arr).T
        df.columns = self.extract.bitsinfo.IDs
        
        if (scale) & (self.ftype == 'descriptor'):
            
            if scale_method == 'standard':
                df = self.StandardScaler(df,  
                                    self.scale_info['mean'],
                                    self.scale_info['std'])
            else:
                df = self.MinMaxScaleClip(df, 
                                     self.scale_info['min'], 
                                     self.scale_info['max'])
        
        df = df[self.flist]
        vector_1d = df.values[0] #shape = (N, )
        fmap = self._S.transform(vector_1d)       
        return np.nan_to_num(fmap)   
Пример #6
0
def ImapUnorder(processor,
                iterator,
                max_workers=10,
                fail_in_file='./filed.lst'):
    '''
    processor: fuction
    iterator: list or iterator,each element should be a tuple or dict, so that data can be used as ordered 
    '''
    with ProcessPoolExecutor(max_workers=max_workers) as executor:

        with open(fail_in_file, 'w+') as f:
            futures = {
                executor.submit(processor, IdPlusSmile): IdPlusSmile
                for IdPlusSmile in iterator
            }
            success, _ = wait(futures)
            with pbar(total=len(futures)) as pb:
                for i in success:
                    IdPlusSmile = futures[i]
                    print_info('deal ' + str(IdPlusSmile))
                    try:
                        data_dict = i.result()
                        yield data_dict
                    except Exception as exc:
                        print_warn(
                            'because of the process is dead, input: %s is fialed when deal with %s: %s, so we will deal it automatically'
                            % (IdPlusSmile, processor, exc))

                        try:
                            yield processor(IdPlusSmile)
                        except:
                            f.write(str(IdPlusSmile) + '\n')
                            print_error(
                                ' input: %s is fialed when deal with %s: %s' %
                                (IdPlusSmile, processor, exc))
                    pb.update(1)