def rearrangement(self, orignal_X, target_mp): """ Re-Arragement feature maps X from orignal_mp's to target_mp's style, in case that feature already extracted but the position need to be refit and rearrangement. parameters ------------------- orignal_X: the feature values transformed from orignal_mp(object self) target_mp: the target feature map object return ------------- target_X, shape is (N, W, H, C) """ assert self.flist == target_mp.flist, print_error('Input features list is different, can not re-arrangement, check your flist by mp.flist method' ) assert len(orignal_X.shape) == 4, print_error('Input X has error shape, please reshape to (samples, w, h, channels)') idx = self._S.df.sort_values('indices').idx.tolist() idx = np.argsort(idx) N = len(orignal_X) #number of sample M = len(self.flist) # number of features res = [] for i in tqdm(range(N), ascii=True): x = orignal_X[i].sum(axis=-1) vector_1d_ordered = x.reshape(-1,) vector_1d_ordered = vector_1d_ordered[:M] vector_1d = vector_1d_ordered[idx] fmap = target_mp._S.transform(vector_1d) res.append(fmap) return np.stack(res)
def batch_transform(self, array_2d, scale = True, scale_method = 'minmax', n_jobs=4): """ parameters -------------------- array_2d: 2D numpy array feature points, M(samples) x N(feature ponits) scale: bool, if True, we will apply MinMax scaling by the precomputed values scale_method: {'minmax', 'standard'} n_jobs: number of parallel """ if not self.isfit: print_error('please fit first!') return assert type(array_2d) == np.ndarray, 'input must be numpy ndarray!' assert array_2d.ndim == 2, 'input must be 2-D numpy array!' P = Parallel(n_jobs=n_jobs) res = P(delayed(self.transform)(arr_1d, scale, scale_method) for arr_1d in tqdm(array_2d, ascii=True)) X = np.stack(res) return X
def swap_log(swap, error=True): sinfo = [] for l in swap.split('\n'): if l == '': continue sinfo.append(l) for o in sinfo: if error: print_error(o) else: print_info(o) return
def transform(self, arr_1d, scale = True, scale_method = 'minmax',): """ parameters -------------------- arr_1d: 1d numpy array feature points scale: bool, if True, we will apply MinMax scaling by the precomputed values scale_method: {'minmax', 'standard'} """ if not self.isfit: print_error('please fit first!') return if scale: if scale_method == 'standard': arr_1d = self.StandardScaler(arr_1d, self.x_mean, self.x_std) else: arr_1d = self.MinMaxScaleClip(arr_1d, self.x_min, self.x_max) df = pd.DataFrame(arr_1d).T df.columns = self.alist df = df[self.flist] vector_1d = df.values[0] #shape = (N, ) fmap = self._S.transform(vector_1d) p, q, c = fmap.shape if self.fmap_shape != None: m, n = self.fmap_shape if (m > p) | (n > q): fps = [] for i in range(c): fp = smartpadding(fmap[:,:,i], self.fmap_shape) fps.append(fp) fmap = np.stack(fps, axis=-1) return np.nan_to_num(fmap)
def transform(self, smiles, scale = True, scale_method = 'minmax',): """ parameters -------------------- smiles:smiles string of compound scale: bool, if True, we will apply MinMax scaling by the precomputed values scale_method: {'minmax', 'standard'} """ if not self.isfit: print_error('please fit first!') return arr = self.extract.transform(smiles) df = pd.DataFrame(arr).T df.columns = self.extract.bitsinfo.IDs if (scale) & (self.ftype == 'descriptor'): if scale_method == 'standard': df = self.StandardScaler(df, self.scale_info['mean'], self.scale_info['std']) else: df = self.MinMaxScaleClip(df, self.scale_info['min'], self.scale_info['max']) df = df[self.flist] vector_1d = df.values[0] #shape = (N, ) fmap = self._S.transform(vector_1d) return np.nan_to_num(fmap)
def ImapUnorder(processor, iterator, max_workers=10, fail_in_file='./filed.lst'): ''' processor: fuction iterator: list or iterator,each element should be a tuple or dict, so that data can be used as ordered ''' with ProcessPoolExecutor(max_workers=max_workers) as executor: with open(fail_in_file, 'w+') as f: futures = { executor.submit(processor, IdPlusSmile): IdPlusSmile for IdPlusSmile in iterator } success, _ = wait(futures) with pbar(total=len(futures)) as pb: for i in success: IdPlusSmile = futures[i] print_info('deal ' + str(IdPlusSmile)) try: data_dict = i.result() yield data_dict except Exception as exc: print_warn( 'because of the process is dead, input: %s is fialed when deal with %s: %s, so we will deal it automatically' % (IdPlusSmile, processor, exc)) try: yield processor(IdPlusSmile) except: f.write(str(IdPlusSmile) + '\n') print_error( ' input: %s is fialed when deal with %s: %s' % (IdPlusSmile, processor, exc)) pb.update(1)