def get_model_RFE_top_features(self,expression_file,ic50_file,target_features,drug):
     expression_frame,ic50_series = dfm.get_expression_frame_and_ic50_series_for_drug(expression_file, ic50_file,drug,normalized=True,trimmed=True,threshold=None)
     scikit_data,scikit_target = dfm.get_scikit_data_and_target(expression_frame,ic50_series)
     step_length = int(len(scikit_data.tolist()[0]) / 100) + 1
     selector = RFE(self.model,int(target_features),step=step_length)
     selector.fit(scikit_data,scikit_target)
     return [expression_frame.index[i] for i in xrange(0,len(expression_frame.index)) if selector.support_[i]]
 def get_model_coefficients_threshold(self,expression_file,ic50_file,threshold,drug):
     if(self.model_type == 'svm' and self.kernel == 'linear'):
         expression_frame,ic50_series = dfm.get_expression_frame_and_ic50_series_for_drug(expression_file, ic50_file,drug,normalized=True,trimmed=True,threshold=threshold)
         scikit_data,scikit_target = dfm.get_scikit_data_and_target(expression_frame,ic50_series)
         self.model.fit(scikit_data,scikit_target)
         return expression_frame.index, self.model.coef_[0]
     else:
         raise Exception("Method only defined for the SVM linear model")
    def get_predictions_full_CCLE_dataset_threshold(self,expression_file,ic50_file,threshold,drug):
        training_frame,training_series = dfm.get_expression_frame_and_ic50_series_for_drug(expression_file,ic50_file,drug,normalized=True,trimmed=True,threshold=threshold)
        training_data,training_target = dfm.get_scikit_data_and_target(training_frame,training_series)

        cell_lines, testing_data = dfm.get_normalized_full_expression_identifiers_and_data(expression_file,training_frame.index)

        self.model.fit(training_data,training_target)
        predictions = self.model.predict(testing_data)

        return cell_lines, predictions
    def get_predictions_full_CCLE_dataset_top_features(self,expression_file,ic50_file,num_features,drug):
        expression_frame,ic50_series = dfm.get_expression_frame_and_ic50_series_for_drug(expression_file,ic50_file,drug,normalized=True,trimmed=True)
        top_features = dfm.get_pval_top_n_features(expression_frame,ic50_series,num_features)
        expression_frame = expression_frame.ix[top_features]
        scikit_data,scikit_target = dfm.get_scikit_data_and_target(expression_frame,ic50_series)

        cell_lines, testing_data = dfm.get_normalized_full_expression_identifiers_and_data(expression_file,expression_frame.index)
        self.model.fit(scikit_data,scikit_target)
        predictions = self.model.predict(testing_data)

        return cell_lines,predictions,list(top_features)