def _fit_predictive_model(self, seqs, neg_seqs=None): # duplicate iterator pos_seqs, pos_seqs_ = tee(seqs) pos_graphs = mp_pre_process(pos_seqs, pre_processor=sequence_to_eden, n_blocks=self.pre_processor_n_blocks, block_size=self.pre_processor_block_size, n_jobs=self.pre_processor_n_jobs) if neg_seqs is None: # shuffle seqs to obtain negatives neg_seqs = seq_to_seq(pos_seqs_, modifier=shuffle_modifier, times=self.negative_ratio, order=self.shuffle_order) neg_graphs = mp_pre_process(neg_seqs, pre_processor=sequence_to_eden, n_blocks=self.pre_processor_n_blocks, block_size=self.pre_processor_block_size, n_jobs=self.pre_processor_n_jobs) # fit discriminative estimator self.estimator = fit(pos_graphs, neg_graphs, vectorizer=self.vectorizer, n_iter_search=self.n_iter_search, n_jobs=self.n_jobs, n_blocks=self.n_blocks, block_size=self.block_size, random_state=self.random_state)
def annotate(self, iterable): assert(is_iterable(iterable)), 'Not iterable' graphs = mp_pre_process(iterable, pre_processor=self.pre_processor, pre_processor_args=self.pre_processor_args, n_blocks=self.pre_processor_n_blocks, block_size=self.pre_processor_block_size, n_jobs=self.pre_processor_n_jobs) return self.vectorizer.annotate(graphs, self.estimator)
def annotate(self, iterable): assert (is_iterable(iterable)), 'Not iterable' graphs = mp_pre_process(iterable, pre_processor=self.pre_processor, pre_processor_args=self.pre_processor_args, n_blocks=self.pre_processor_n_blocks, block_size=self.pre_processor_block_size, n_jobs=self.pre_processor_n_jobs) return self.vectorizer.annotate(graphs, self.estimator)
def _data_matrix(self, iterable, fit_vectorizer=False): assert(is_iterable(iterable)), 'Not iterable' graphs = mp_pre_process(iterable, pre_processor=self.pre_processor, pre_processor_args=self.pre_processor_args, n_blocks=self.pre_processor_n_blocks, block_size=self.pre_processor_block_size, n_jobs=self.pre_processor_n_jobs) graphs, graphs_ = tee(graphs) self.vectorizer.set_params(**self.vectorizer_args) if fit_vectorizer: self.vectorizer.fit(graphs_) X = vectorize(graphs, vectorizer=self.vectorizer, n_jobs=self.n_jobs, n_blocks=self.n_blocks) return X
def seq_to_data_matrix(self, sequences=None): # Transform sequences to matrix graphs = mp_pre_process(sequences, pre_processor=self.pre_processor, pre_processor_args={}, n_jobs=-1) seq_data_matrix = vectorize(graphs, vectorizer=self.vectorizer, n_jobs=-1) # Densify the matrix seq_data_matrx = seq_data_matrix.toarray() # Standardize the matrix self.scale.fit(seq_data_matrx) std_seq_data_matrx = self.scale.transform(seq_data_matrx) return std_seq_data_matrx