Exemplo n.º 1
0
    def impute_from_model(self,
                          models_folder: str,
                          matrices: iter,
                          postprocess=True):
        """Generator to provide imputed matrices on-the-fly
        
        Arguments:
            models_folder {str} -- Path to directory containing trained CpGNet models
            matrices {iter} -- An iterable containging n x m matrices with n=cpgs and m=reads
        
        Keyword Arguments:
            postprocess {bool} -- Round imputed values to 1s and 0s  (default: {True})
        """

        model_path = os.path.join(
            models_folder,
            "saved_model_{}_cpgs.prelim".format(self.cpg_density))

        trained_model = PReLIM(cpgDensity=self.cpg_density)
        print("Successfully loaded model: {}".format(model_path), flush=True)
        trained_model.model = load(model_path)

        for m in matrices:
            # only impute if there is an unknown
            if -1 in m:
                m = m.astype(float)
                pm = trained_model.impute(m)
                if postprocess:
                    pm = self.postprocess_predictions(pm)
            # Nothing to impute, passback original matrix to keep list in order
            else:
                pm = m.copy()

            # K.clear_session()
            yield pm
Exemplo n.º 2
0
 def setUp(self):
     self.required_data = [bamA, prelim_model]
     check_data_exists(self.required_data)
     parser = ParseBam.BamFileReadParser(
         os.path.join(test_data_location, bamA), 20)
     reads = parser.parse_reads("chr1", 910600, 910700)
     self.matrix = parser.create_matrix(reads).dropna(how="all")
     self.imputer = Imputation(4, os.path.join(test_data_location, bamA))
     self.prelim = PReLIM(4)
     self.prelim.model = load(os.path.join(test_data_location,
                                           prelim_model))
     self.predictions = self.prelim.impute(np.array(self.matrix.fillna(-1)))
     self.imputed_matrix = self.imputer.postprocess_predictions(
         self.predictions)
     self.imputed_matrix = pd.DataFrame(self.imputed_matrix).dropna()
Exemplo n.º 3
0
    def __init__(self, cpg_density=None, save_path=None):
        """
        Class to train a CpGNet model from input data

        :param cpg_density: Number of CpGs
        :type cpg_density: int
        :param save_path: Location of folder to save the resulting model files. One per cpg density
        """
        if not cpg_density:
            raise AttributeError("CpG density must be specified")
        if not save_path:
            raise AttributeError(
                "Folder to save trained model must be specified")
        self.save_path = save_path
        self.cpg_density = cpg_density
        self.model = PReLIM(cpgDensity=cpg_density)
Exemplo n.º 4
0
class TrainWithPReLIM:
    """
    Used to train models using CpGnet
    """
    def __init__(self, cpg_density=None, save_path=None):
        """
        Class to train a CpGNet model from input data

        :param cpg_density: Number of CpGs
        :type cpg_density: int
        :param save_path: Location of folder to save the resulting model files. One per cpg density
        """
        if not cpg_density:
            raise AttributeError("CpG density must be specified")
        if not save_path:
            raise AttributeError(
                "Folder to save trained model must be specified")
        self.save_path = save_path
        self.cpg_density = cpg_density
        self.model = PReLIM(cpgDensity=cpg_density)

    def save_net(self, model):
        """
        Save the network to a file

        :param model: The trained PReLIM model. Located at PReLIM.model
        :type model: :class:`clubcpg_prelim.PReLIM`
        :return: Path to the saved model
        """
        file_name = "saved_model_{}_cpgs.prelim".format(self.cpg_density)
        output = os.path.join(self.save_path, file_name)
        dump(model, output)
        print("Saved {} cpg model to {}".format(self.cpg_density, output))

        return output

    def train_model(self, bins: iter):
        """
        Train the CpGNet model on a list of provided bins

        :param bins: iterable containing CpG matrices of 1 (methylated), 0 (unmethylated), and -1 (unknown)
        :return: Path to the saved model file
        """
        self.model.train(bins, model_file="no")
        output = self.save_net(self.model.model)

        return output