示例#1
0
    def reJoinRepDecoy(self):
        """rebuild the decoys energies of the representative pt in each cluster"""
        with open(self.rep_dc_file, "w") as coords_output_buffer:
            for file_dic in self.file_list:
                for key in file_dic.keys():
                    print key, "==>", file_dic[key]

                decoy_file = file_dic["decoy_file"]
                mcc_lst_file = file_dic["mcc_lst_file"]
                energy_file = file_dic["energy_file"]
                cluster_file = file_dic["cluster_file"]
                mcc_matrix_file = file_dic["mcc_matrix_file"]

                dc_coord = DecoyCoord(origial_text=decoy_file)
                dc_coord.setDecoyUnitLines()

                mcc_matrix = np.loadtxt(mcc_matrix_file, skiprows=1)
                cluster_array = np.loadtxt(cluster_file, dtype=int)
                energy_sheet = pd.read_csv(energy_file, sep="\s", header=None)  # separeated by space
                energy_sheet.columns = self.energy_columns
                mcc_lst = pd.read_csv(mcc_lst_file, names=self.mcc_name)

                source_sheet = pd.concat([energy_sheet, mcc_lst], axis=1)  # concat along the x axis

                for cluster_num, cluster in pick_rep.reGroup(cluster_array):
                    conf_rep = pick_rep.pickRep(mcc_matrix, cluster)
                    self.rep_sheet = self.rep_sheet.append(
                        source_sheet.ix[conf_rep], ignore_index=False
                    )  # append to the rep energy sheet
                    dc_coord.appendRepCoords(coords_output_buffer, conf_rep)  # append to the rep coord file

        self.rep_sheet = self.rep_sheet.sort(
            columns="mcc", ascending=False
        )  # resort according mcc value, descending order
        self.rep_sheet.to_csv(self.rep_ener_file)
示例#2
0
    def secondReJoinRepDecoy(self):
        """
        rebuild the dataframe of features of the all, high, low decoys
        """
        self.buildSecondFn()
        self.displaySecondFn()
        self.initSecondRepFeatureFrame()

        # input
        file_dic = self.final_dic
        mcc_matrix_file = file_dic['mcc_matrix_file']
        features_file = file_dic['features_file']
        cluster_file = file_dic['cluster_file']

        # output
        rep_all_ener_file = file_dic['rep_all_ener_file']
        rep_low_ener_file = file_dic['rep_low_ener_file']
        rep_high_ener_file = file_dic['rep_high_ener_file']

        # loading
        mcc_matrix = np.loadtxt(mcc_matrix_file, skiprows=1)
        cluster_array = np.loadtxt(cluster_file, dtype=int)
        feature_sheet = pd.read_csv(features_file)      # separeated by comman, which is default

        for cluster_num, cluster in pick_rep.reGroup(cluster_array):
            conf_rep = pick_rep.pickRep(mcc_matrix, cluster)
            self.all_rep_sheet = self.all_rep_sheet.append(feature_sheet.ix[conf_rep], ignore_index=True)      # append to the rep energy sheet

        self.all_rep_sheet = self.all_rep_sheet.sort(columns='mcc', ascending=False)        # resort according mcc value, descending order
        self.high_rep_sheet = self.all_rep_sheet[self.all_rep_sheet['mcc'] > 0.6]
        self.low_rep_sheet = self.all_rep_sheet[self.all_rep_sheet['mcc'] < 0.4]

        self.all_rep_sheet.to_csv(rep_all_ener_file)
        self.high_rep_sheet.to_csv(rep_high_ener_file)
        self.low_rep_sheet.to_csv(rep_low_ener_file)
示例#3
0
    def secondTestReJoinRepDecoy(self):
        """
        rebuild the dataframe of features of the all, high, low decoys
        """
        self.buildTestSecondFn()
        self.displaySecondFn()
        self.initSecondRepFeatureFrame()

        # input
        file_dic = self.final_dic
        mcc_matrix_file = file_dic["mcc_matrix_file"]
        features_file = file_dic["features_file"]
        cluster_file = file_dic["cluster_file"]
        mcc_file = file_dic["mcc_file"]

        # output
        rep_all_ener_file = file_dic["rep_all_ener_file"]
        rep_low_ener_file = file_dic["rep_low_ener_file"]
        rep_high_ener_file = file_dic["rep_high_ener_file"]

        # loading
        #         mcc_matrix = np.loadtxt(mcc_matrix_file, skiprows=1)
        cluster_array = np.loadtxt(cluster_file, dtype=int)
        feature_sheet = pd.read_csv(features_file, sep="\s+")  # separeated by comman, which is default
        mcc_sheet = pd.read_csv(mcc_file, header=None)
        mcc_sheet.columns = ["mcc"]
        feature_sheet = pd.concat([feature_sheet, mcc_sheet], axis=1)

        #        import random
        #        max_conf_num = len(feature_sheet['mcc'])
        for cluster_num, cluster in pick_rep.reGroup(cluster_array):
            # conf_rep = pick_rep.pickRep(mcc_matrix, cluster)
            conf_rep = choice(cluster) - 1
            # conf_rep = random.randint(0, max_conf_num)
            self.all_rep_sheet = self.all_rep_sheet.append(
                feature_sheet.ix[conf_rep], ignore_index=True
            )  # append to the rep energy sheet

        self.all_rep_sheet = self.all_rep_sheet.sort(
            columns="mcc", ascending=False
        )  # resort according mcc value, descending order
        self.high_rep_sheet = self.all_rep_sheet[self.all_rep_sheet["mcc"] > 0.6]
        self.low_rep_sheet = self.all_rep_sheet[self.all_rep_sheet["mcc"] < 0.4]

        self.all_rep_sheet.to_csv(rep_all_ener_file)
        self.high_rep_sheet.to_csv(rep_high_ener_file)
        self.low_rep_sheet.to_csv(rep_low_ener_file)