def impute_from_model(self, models_folder: str, matrices: iter, postprocess=True): """Generator to provide imputed matrices on-the-fly Arguments: models_folder {str} -- Path to directory containing trained CpGNet models matrices {iter} -- An iterable containging n x m matrices with n=cpgs and m=reads Keyword Arguments: postprocess {bool} -- Round imputed values to 1s and 0s (default: {True}) """ model_path = os.path.join( models_folder, "saved_model_{}_cpgs.prelim".format(self.cpg_density)) trained_model = PReLIM(cpgDensity=self.cpg_density) print("Successfully loaded model: {}".format(model_path), flush=True) trained_model.model = load(model_path) for m in matrices: # only impute if there is an unknown if -1 in m: m = m.astype(float) pm = trained_model.impute(m) if postprocess: pm = self.postprocess_predictions(pm) # Nothing to impute, passback original matrix to keep list in order else: pm = m.copy() # K.clear_session() yield pm
def setUp(self): self.required_data = [bamA, prelim_model] check_data_exists(self.required_data) parser = ParseBam.BamFileReadParser( os.path.join(test_data_location, bamA), 20) reads = parser.parse_reads("chr1", 910600, 910700) self.matrix = parser.create_matrix(reads).dropna(how="all") self.imputer = Imputation(4, os.path.join(test_data_location, bamA)) self.prelim = PReLIM(4) self.prelim.model = load(os.path.join(test_data_location, prelim_model)) self.predictions = self.prelim.impute(np.array(self.matrix.fillna(-1))) self.imputed_matrix = self.imputer.postprocess_predictions( self.predictions) self.imputed_matrix = pd.DataFrame(self.imputed_matrix).dropna()
def __init__(self, cpg_density=None, save_path=None): """ Class to train a CpGNet model from input data :param cpg_density: Number of CpGs :type cpg_density: int :param save_path: Location of folder to save the resulting model files. One per cpg density """ if not cpg_density: raise AttributeError("CpG density must be specified") if not save_path: raise AttributeError( "Folder to save trained model must be specified") self.save_path = save_path self.cpg_density = cpg_density self.model = PReLIM(cpgDensity=cpg_density)
class TrainWithPReLIM: """ Used to train models using CpGnet """ def __init__(self, cpg_density=None, save_path=None): """ Class to train a CpGNet model from input data :param cpg_density: Number of CpGs :type cpg_density: int :param save_path: Location of folder to save the resulting model files. One per cpg density """ if not cpg_density: raise AttributeError("CpG density must be specified") if not save_path: raise AttributeError( "Folder to save trained model must be specified") self.save_path = save_path self.cpg_density = cpg_density self.model = PReLIM(cpgDensity=cpg_density) def save_net(self, model): """ Save the network to a file :param model: The trained PReLIM model. Located at PReLIM.model :type model: :class:`clubcpg_prelim.PReLIM` :return: Path to the saved model """ file_name = "saved_model_{}_cpgs.prelim".format(self.cpg_density) output = os.path.join(self.save_path, file_name) dump(model, output) print("Saved {} cpg model to {}".format(self.cpg_density, output)) return output def train_model(self, bins: iter): """ Train the CpGNet model on a list of provided bins :param bins: iterable containing CpG matrices of 1 (methylated), 0 (unmethylated), and -1 (unknown) :return: Path to the saved model file """ self.model.train(bins, model_file="no") output = self.save_net(self.model.model) return output