def writePredictionsByCellLine(self, drug_scores_by_cell_line,
                                input_folder):
     total_drugs = numpy.max(
         [len(drugs) for drugs in drug_scores_by_cell_line.values()])
     header = ["Cell Line"]
     best_drug = " best drug"
     best_drug_score = " best drug score"
     for i in range(1, total_drugs + 1):
         suffix = MachineLearningService.generateNumericalSuffix(i)
         header.append(SafeCastUtil.safeCast(i, str) + suffix + best_drug)
         header.append(
             SafeCastUtil.safeCast(i, str) + suffix + best_drug_score)
     predictions_by_cell_line_path = input_folder + "/" + RecommendationsService.PREDICTIONS_BY_CELL_LINE_FILE
     with open(predictions_by_cell_line_path, "w",
               newline='') as predictions_by_cell_line_file:
         try:
             writer = csv.writer(predictions_by_cell_line_file)
             writer.writerow(header)
             for cell_line in drug_scores_by_cell_line.keys():
                 drug_scores = sorted(
                     drug_scores_by_cell_line.get(cell_line),
                     reverse=True,
                     key=lambda x: x[1])
                 row = [cell_line]
                 for drug_and_score in drug_scores:
                     row.append(drug_and_score[0])
                     row.append(drug_and_score[1])
                 writer.writerow(row)
         except ValueError as error:
             self.log.error("Error writing to %s. %s",
                            predictions_by_cell_line_file, error)
    def fetchFeatureImportances(self, model, features_in_order):
        evaluated_features = [
            feature for feature in features_in_order
            if self.bin_cat_matrix_name not in feature
        ]
        importances_map = OrderedDict()
        for model_phrase in model.models_by_phrase:
            if hasattr(model_phrase.model,
                       "coef_") and len(evaluated_features) == len(
                           model_phrase.model.coef_):
                for i in range(0, len(evaluated_features)):
                    weighted_score = model_phrase.model.coef_[
                        i] * model_phrase.score
                    if importances_map.get(evaluated_features[i]) is None:
                        importances_map[evaluated_features[i]] = [
                            weighted_score
                        ]
                    else:
                        importances_map[evaluated_features[i]].append(
                            weighted_score)

        feature_names = SafeCastUtil.safeCast(importances_map.keys(), list)
        average_coefficients = [
            numpy.sum(imps) / len(evaluated_features)
            for imps in SafeCastUtil.safeCast(importances_map.values(), list)
        ]
        return super().normalizeCoefficients(average_coefficients,
                                             feature_names)
Exemplo n.º 3
0
 def toDict(dict_as_string):
     dictionary = OrderedDict()
     split_dict = dict_as_string.split(",")
     for key_val_pair in split_dict:
         as_tuple = SafeCastUtil.safeCast(key_val_pair.split(":"), tuple)
         dictionary[as_tuple[0].strip()] = SafeCastUtil.safeCast(as_tuple[1].strip(), float, as_tuple[1].strip())
     return dictionary
Exemplo n.º 4
0
    def generateGeneLists(self, features_per_file, important_features):
        gene_list_num = 1
        while len(important_features) > 1:
            gene_list_size = random.randint(2, len(important_features))
            gene_list = [
                self.FEATURE_PREFIX + SafeCastUtil.safeCast(feature, str)
                for feature in important_features[:gene_list_size]
            ]
            gene_list.append("")
            important_features = important_features[gene_list_size:]
            file_name = self.path + "/" + ArgumentProcessingService.GENE_LISTS + \
                        SafeCastUtil.safeCast(gene_list_num, str) + ".csv"
            with open(file_name, "w") as file:
                writer = csv.writer(file,
                                    delimiter=',',
                                    quotechar='|',
                                    quoting=csv.QUOTE_MINIMAL)
                writer.writerow(gene_list)
            gene_list_num += 1

        with open(self.path + "/" + self.SIGNIFICANT_GENE_LIST + ".csv",
                  "w") as file:
            writer = csv.writer(file,
                                delimiter=',',
                                quotechar='|',
                                quoting=csv.QUOTE_MINIMAL)
            significant_features = []
            for significant_feature in range(
                    1,
                    SafeCastUtil.safeCast(features_per_file / 10, int) + 1):
                significant_features.append(
                    RandomizedDataGenerator.SIGNIFICANT_FEATURE_PREFIX +
                    SafeCastUtil.safeCast(significant_feature, str))
            gene_list = significant_features
            writer.writerow(gene_list)
Exemplo n.º 5
0
    def generateRandomizedFiles(self,
                                num_feature_files,
                                num_cells,
                                num_features,
                                is_classifier,
                                monte_carlo_permutations,
                                data_split,
                                individual_algorithm=None,
                                individual_hyperparams=None,
                                analyze_all=False,
                                use_static_features=False):
        features_per_file = SafeCastUtil.safeCast(
            num_features / num_feature_files, int)
        results = self.generateResultsCSV(is_classifier, num_cells)
        important_features = random.sample(
            range(1, features_per_file + 1),
            SafeCastUtil.safeCast((features_per_file / 3), int))

        file_names = self.generateFeaturesCSVs(num_feature_files, num_cells,
                                               features_per_file, results,
                                               important_features,
                                               use_static_features)
        self.generateGeneLists(features_per_file, important_features)

        static_features = ""
        if use_static_features:
            static_features += file_names[len(file_names) - 1]
        self.generateArgsTxt(is_classifier, monte_carlo_permutations,
                             data_split, individual_algorithm, file_names[0],
                             individual_hyperparams, analyze_all,
                             static_features)
        return
Exemplo n.º 6
0
 def generateNewReportFile(self, stats_overview_object):
     path_of_this_file = os.path.realpath(__file__)
     template_path = os.path.abspath(
         os.path.join(path_of_this_file,
                      os.pardir)) + "/Reports/reportTemplate.html"
     new_file = []
     with open(template_path) as template_file:
         try:
             for line_index, line in enumerate(template_file):
                 if "//INSERT DEFAULT MIN SCORE HERE" in line:
                     new_file.append(
                         "\t\t\t\tvar DEFAULT_MIN_SCORE = " +
                         SafeCastUtil.safeCast(
                             AbstractModelTrainer.DEFAULT_MIN_SCORE, str) +
                         ";\n")
                 elif "//INSERT CHART DATA HERE" in line:
                     new_file.append(
                         "\t\t\t\t$scope.allData = " +
                         SafeCastUtil.safeCast(stats_overview_object, str) +
                         ";\n")
                 elif "//INSERT IS CLASSIFIER HERE" in line:
                     new_file.append("\t\t\t\t$scope.isClassifier = " +
                                     SafeCastUtil.safeCast(
                                         self.is_classifier, str).lower() +
                                     ";\n")
                 else:
                     new_file.append(line)
         except ValueError as valueError:
             self.log.error(valueError)
         finally:
             template_file.close()
     return new_file
Exemplo n.º 7
0
 def logIfBestHyperparamsOnRangeThreshold(self, best_hyperparams,
                                          record_diagnostics, input_folder):
     if not self.supportsHyperparams() or best_hyperparams is None:
         return
     hyperparam_keys = SafeCastUtil.safeCast(self.hyperparameters.keys(),
                                             list)
     for i in range(0, len(hyperparam_keys)):
         hyperparam_set = self.hyperparameters[hyperparam_keys[i]]
         optimal_value = best_hyperparams.get(hyperparam_keys[i])
         if optimal_value is None:
             self.log.warn(
                 "Unable to determine optimal value given hyperparams: %s",
                 SafeCastUtil.safeCast(best_hyperparams, str, None))
             continue
         if optimal_value >= hyperparam_set[len(hyperparam_set) - 1]:
             message = "Best hyperparam for " + self.algorithm + " on upper threshold of provided hyperparam " \
                       "set: " + hyperparam_keys[i] + " = " + SafeCastUtil.safeCast(optimal_value, str) + "\n"
             self.log.debug(message)
             if record_diagnostics:
                 DiagnosticsFileWriter.writeToFile(input_folder, message,
                                                   self.log)
         elif optimal_value <= hyperparam_set[0]:
             message = "Best hyperparam for " + self.algorithm + " on lower threshold of provided hyperparam " \
                       "set: " + hyperparam_keys[i] + " = " + SafeCastUtil.safeCast(optimal_value, str) + "\n"
             self.log.debug(message)
             if record_diagnostics:
                 DiagnosticsFileWriter.writeToFile(input_folder, message,
                                                   self.log)
 def fetchAndCastHyperparams(self, config, trainer):
     hyperparams = config.hyperparams.split(",")
     hyperparam_dict = OrderedDict()
     keys = SafeCastUtil.safeCast(trainer.hyperparameters.keys(), list)
     for i in range(0, len(keys)):
         hyperparam_dict[keys[i]] = SafeCastUtil.safeCast(hyperparams[i], float)
     return hyperparam_dict
Exemplo n.º 9
0
def promptUserForInput():
    simulation_to_run = input(
        "-------Main Menu-------\n"
        "Choose your task:\n"
        "\t0: Analysis of cell lines\n"
        "\t1: Convert MATLAB to CSV file\n"
        "\t2: Dr.S Analysis (Drug Recommendations System)\n"
        "\tQ: Quit\n")

    option_as_int = SafeCastUtil.safeCast(simulation_to_run, int)
    option_as_string = SafeCastUtil.safeCast(simulation_to_run, str, "Q")

    if option_as_string == "Q":
        return
    elif option_as_int == 0:
        input_folder = recursivelyPromptUser("Enter path of input folder:\n",
                                             str)
        runMainCellLineAnalysis(input_folder)
    elif option_as_int == 1:
        matlab_files_directory = recursivelyPromptUser(
            "Enter folder path of the matlab files:\n", str)
        FileConverter.convertMatLabToCSV(matlab_files_directory)
    elif option_as_int == 2:
        input_folder = recursivelyPromptUser(
            "Enter folder path of the input folder:\n", str)
        fetchRecommendations(input_folder)
 def writeToPredictionsCsvInLock(self, cell_line, drug, input_folder,
                                 prediction, score):
     self.log.debug("Locking current thread %s.",
                    threading.current_thread())
     lock = threading.Lock()
     lock.acquire(True)
     write_action = "w"
     if self.PREDICTIONS_FILE in os.listdir(input_folder):
         write_action = "a"
     with open(input_folder + "/" + self.PREDICTIONS_FILE,
               write_action,
               newline='') as predictions_file:
         try:
             writer = csv.writer(predictions_file)
             if write_action == "w":
                 writer.writerow(
                     ["Drug", "Cell_Line", "Prediction", "R2^Score"])
             line = [
                 drug, cell_line,
                 SafeCastUtil.safeCast(prediction, str),
                 SafeCastUtil.safeCast(score, str)
             ]
             writer.writerow(line)
         except ValueError as error:
             self.log.error("Error writing to file %s. %s",
                            self.PREDICTIONS_FILE, error)
         finally:
             predictions_file.close()
             self.log.debug("Releasing current thread %s.",
                            threading.current_thread())
             lock.release()
Exemplo n.º 11
0
    def fetchAllHyperparamPermutations(self, hyperparams):
        all_perms = []
        hyperparam_keys = SafeCastUtil.safeCast(hyperparams.keys(), list)
        zero_filled_indices = SafeCastUtil.safeCast(
            numpy.zeros(len(hyperparam_keys)), list)
        target_index = len(zero_filled_indices) - 1
        current_perm = zero_filled_indices[:]
        while target_index >= 0:
            current_hyperparams = OrderedDict()
            for i in range(0, len(current_perm)):
                param_name = hyperparam_keys[i]
                current_hyperparams[param_name] = hyperparams[param_name][
                    SafeCastUtil.safeCast(current_perm[i], int)]
            if current_hyperparams not in all_perms:
                clone_map = copy.deepcopy(current_hyperparams)
                all_perms.append(clone_map)

            if current_perm[target_index] < len(
                    hyperparams[hyperparam_keys[target_index]]) - 1:
                current_perm[target_index] += 1
                while len(current_perm) > target_index + 1 and current_perm[target_index + 1] <\
                        len(hyperparams[hyperparam_keys[target_index]]):
                    target_index += 1
            else:
                target_index -= 1
                for subsequent_index in range(target_index,
                                              len(current_perm) - 1):
                    current_perm[subsequent_index + 1] = 0
        return all_perms
Exemplo n.º 12
0
 def extractCastedFeatures(self, line):
     features = []
     for feature in line.split(","):
         if SafeCastUtil.safeCast(feature, float) is not None:
             features.append(SafeCastUtil.safeCast(feature.strip(), float))
         else:
             features.append(SafeCastUtil.safeCast(feature.strip(), str))
     return features
Exemplo n.º 13
0
 def toString(dictionary):
     hyperparam_string = ""
     keys = SafeCastUtil.safeCast(dictionary.keys(), list)
     for i in range(0, len(keys)):
         hyperparam_string += (keys[i] + ": " + SafeCastUtil.safeCast(dictionary[keys[i]], str))
         if i < len(keys) - 1:
             hyperparam_string += ", "
     return hyperparam_string
 def formatFullFeatureMatrix(self, feature_names, transposed_dict):
     feature_matrix = {self.FEATURE_NAMES: feature_names}
     all_cell_lines = SafeCastUtil.safeCast(transposed_dict.keys(), list)
     num_cell_lines = len(all_cell_lines)
     for i in range(num_cell_lines):
         values = SafeCastUtil.safeCast(transposed_dict[all_cell_lines[i]].values(), list)
         formatted_values = [self.formatValue(value) for value in values]
         feature_matrix[all_cell_lines[i]] = SafeCastUtil.safeCast(formatted_values, list)
     return feature_matrix
 def furtherSplitTrainingMatrix(self, percent, matrix):
     self.log.info(percent, matrix)
     new_matrix_len = SafeCastUtil.safeCast(
         len(matrix.keys()) * (percent / 100), int)
     split_matrix = {}
     for cell_line in SafeCastUtil.safeCast(matrix.keys(), list):
         if len(split_matrix.keys()) < new_matrix_len:
             split_matrix[cell_line] = matrix[cell_line]
     return matrix
Exemplo n.º 16
0
    def logOptimalHyperParams(self, hyperparams, feature_set_as_string,
                              record_diagnostics, input_folder):
        message = "Optimal Hyperparameters for " + feature_set_as_string + " " + self.algorithm + " algorithm " \
                  "chosen as:\n"

        for key in SafeCastUtil.safeCast(hyperparams.keys(), list):
            message += "\t" + key + " = " + SafeCastUtil.safeCast(
                hyperparams[key], str) + "\n"
        self.log.info(message)
        if record_diagnostics:
            DiagnosticsFileWriter.writeToFile(input_folder, message, self.log)
Exemplo n.º 17
0
 def writeRandomFeature(self, file_name):
     if self.BINARY_CATEGORICAL_SUFFIX in file_name:
         return [np.random.choice(["'0'", "'1'"])]
     if self.CATEGORICAL_SUFFIX in file_name:
         return [
             SafeCastUtil.safeCast(
                 np.random.choice(["a", "b", "c", "d", "e"]), str)
         ]
     elif self.INTEGER_SUFFIX in file_name:
         return [SafeCastUtil.safeCast(np.random.randint(0, 100), str)]
     else:
         return [SafeCastUtil.safeCast(np.random.random_sample(), str)]
 def extractCastedFeatures(self, line, important_feature_indices):
     important_features = []
     feature_values = line.strip().split(",")
     for index in important_feature_indices:
         if index is None:
             # TODO: Verify that this is acceptable, it works for one hot encoding and should never vary in any model
             important_features.append(self.UNFILLED_VALUE_PLACEHOLDER)
         else:
             if SafeCastUtil.safeCast(feature_values[index], float) is not None:
                 important_features.append(SafeCastUtil.safeCast(feature_values[index].strip(), float))
             else:
                 important_features.append(SafeCastUtil.safeCast(feature_values[index].strip(), str))
     return important_features
 def writeDiagnostics(self, features_removed):
     message = ""
     for feature_file in features_removed:
         message += "\nFeatures from gene list(s) not available in " + feature_file[0] + ":\n"
         for gene_list in feature_file[1].keys():
             num_genes_missing = len(feature_file[1][gene_list])
             percent_genes_missing = round((num_genes_missing / feature_file[2]) * 100, 2)
             message += ("\t" + SafeCastUtil.safeCast(num_genes_missing, str) + " (" +
                                SafeCastUtil.safeCast(percent_genes_missing, str) + " %" +
                         ") features not present in " + gene_list + ".csv:\n")
             for gene in feature_file[1][gene_list]:
                 message += ("\t\t" + gene[0] + " at index " + SafeCastUtil.safeCast(gene[1], str) + "\n")
     message += "\n\n######################\n\n"
     DiagnosticsFileWriter.writeToFile(self.input_folder, message, self.log)
Exemplo n.º 20
0
def promptUserForInput():
    simulation_to_run = input("-------Main Menu-------\n"
                              "Choose your task:\n"
                              "\t0: Analysis of cell lines\n"
                              "\tQ: Quit\n")

    simulation_as_int = SafeCastUtil.safeCast(simulation_to_run, int)
    simulation_as_string = SafeCastUtil.safeCast(simulation_to_run, str, "Q")

    if simulation_as_string == "Q":
        return
    elif simulation_as_int == 0:
        input_folder = recursivelyPromptUser("Enter path of input folder:\n",
                                             str)
        runMainCellLineAnalysis(input_folder)
Exemplo n.º 21
0
    def convertMatLabToCSV(matlab_files_directory):

        log = LoggerFactory.createLog(__name__)

        os.chdir(matlab_files_directory)
        matlab_files = glob.glob("*.mat")

        for input_file in matlab_files:
            drug_name = input_file.split("gexmutcnum.mat")[0].strip()
            new_directory = matlab_files_directory + "/" + drug_name + "_analysis"
            matlab_file = scipy.io.loadmat(input_file)

            os.mkdir(new_directory)

            format_id_string = lambda array: SafeCastUtil.safeCast(
                array[0], str)
            for key in SafeCastUtil.safeCast(
                    FileConverter.VARIABLE_MATCHES.keys(), list):
                header = [
                    format_id_string(feature_name)
                    for feature_name in matlab_file.get(key)[0]
                ]
                file_name = new_directory + "/" + drug_name + "_" + FileConverter.FILE_NAMES[
                    key] + ".csv"
                cell_line_data = FileConverter.formatCellLineData(
                    matlab_file.get(FileConverter.VARIABLE_MATCHES.get(key)),
                    key)
                FileConverter.validateAndWriteCSV(
                    cell_line_data, header, file_name, log,
                    FileConverter.EXPECTED_TYPES[key])

            cell_line_ids = [
                format_id_string(cell_id)
                for cell_id in matlab_file.get(FileConverter.ID_FIELD)
            ]
            results = matlab_file.get(FileConverter.RESULTS_FIELD)
            zipped_results = SafeCastUtil.safeCast(
                zip(cell_line_ids, results[0]), list)
            results_file = new_directory + "/" + drug_name + "_results.csv"

            FileConverter.validateAndWriteCSV(zipped_results,
                                              ["cell_line", "result"],
                                              results_file, log, float)
            log.info(
                "The MATLAB file for %s has been successfully converted into csv files ready to be used"
                " with the CLA software!", drug_name)

        log.info("All MATLAB files have been processed!")
Exemplo n.º 22
0
    def oneHotEncodeCategoricalVariables(self, genomes_matrix,
                                         categorical_variables):
        if categorical_variables is None or len(categorical_variables) == 0:
            return genomes_matrix
        encoded_matrix = []  # List of lists
        for genome in genomes_matrix:
            encoded_matrix.append(list(genome))
        sorted_deduped_variables = numpy.sort(
            numpy.unique(categorical_variables))[::-1]
        for variable_raw in sorted_deduped_variables:
            categorical_variable = SafeCastUtil.safeCast(variable_raw, int)
            if categorical_variable is None:
                self.log.warning(
                    "Aborting one-hot-encoding. Non-integer categorical variable index detected."
                )
            if len(encoded_matrix[0]) > categorical_variable > 0:
                assigned_values = []
                for genome in encoded_matrix:
                    value = genome[categorical_variable]
                    if SafeCastUtil.safeCast(value, int) is None:
                        self.log.warning(
                            "Aborting one-hot-encoding. Non integer value for categorical variable "
                            "detected.")
                        return genomes_matrix
                    if value not in assigned_values:
                        assigned_values.append(value)
                assigned_values = numpy.sort(assigned_values)

                for matrix_row in range(0, len(encoded_matrix)):
                    for feature_index in range(
                            0, len(encoded_matrix[matrix_row])):
                        if feature_index == categorical_variable:
                            value_as_multiple_categories = []
                            for assigned_value in assigned_values:
                                boolean_value = 0
                                if assigned_value == encoded_matrix[
                                        matrix_row][feature_index]:
                                    boolean_value = 1
                                value_as_multiple_categories.append(
                                    boolean_value)
                            new_genome = numpy.concatenate(
                                (encoded_matrix[matrix_row]
                                 [:categorical_variable],
                                 value_as_multiple_categories,
                                 encoded_matrix[matrix_row]
                                 [categorical_variable + 1:]))
                            encoded_matrix[matrix_row] = new_genome
        return encoded_matrix
 def assertResultsForIndividualCombo(self, target_dir, algorithm,
                                     expected_lines, is_classifier):
     file_name = algorithm + ".csv"
     assert file_name in os.listdir(target_dir)
     num_lines = 0
     with open(target_dir + "/" + file_name) as csv_file:
         try:
             for line_index, line in enumerate(csv_file):
                 num_lines += 1
                 line_split = line.strip().split(",")
                 if line_index == 0:
                     assert line_split == MachineLearningService.getCSVFileHeader(
                         is_classifier, algorithm, 1)
                     continue
                 feature_gene_list_combo = line_split[0]
                 assert ":" in feature_gene_list_combo
                 score = SafeCastUtil.safeCast(line_split[1], float)
                 assert score > AbstractModelTrainer.DEFAULT_MIN_SCORE
                 if len(line_split) > 3:
                     top_importance = line_split[3]
                     assert top_importance is not None
         except AssertionError as error:
             self.log.error(error)
         finally:
             self.log.debug("Closing file %s", file_name)
             csv_file.close()
             assert num_lines == expected_lines
    def createAndValidateFeatureMatrix(self, results_list, gene_lists, write_diagnostics, feature_files,
                                       static_feature_files):
        incomplete_features = []
        for file in [feature_file for feature_file in feature_files if feature_file not in static_feature_files]:
            features_path = self.input_folder + "/" + file
            validated_features, num_features = self.validateGeneLists(features_path, file, gene_lists)
            incomplete_features.append([file, validated_features, num_features])

        if write_diagnostics:
            self.writeDiagnostics(incomplete_features)

        feature_matrix = {self.FEATURE_NAMES: []}
        for file in feature_files:
            features_path = self.input_folder + "/" + file
            if file not in static_feature_files:
                self.extractFeatureMatrix(feature_matrix, features_path, file, gene_lists, results_list)
            else:
                data_frame = self.fetchFullDataframe([result[0] for result in results_list], file)
                feature_names = SafeCastUtil.safeCast(data_frame.columns, list)
                transposed_dict = data_frame.T.to_dict()
                formatted_matrix = self.formatFullFeatureMatrix(feature_names, transposed_dict)

                for key in formatted_matrix.keys():
                    if key in feature_matrix:
                        [feature_matrix[key].append(value) for value in formatted_matrix[key]]
                    else:
                        feature_matrix[key] = formatted_matrix[key]
        return feature_matrix
 def validateAndExtractResults(self, results_file, is_classifier):
     sample_list = []
     cast_type = float
     if is_classifier:
         cast_type = int
     results_path = self.input_folder + "/" + results_file
     with open(results_path) as data_file:
         try:
             for line_index, line in enumerate(data_file):
                 if len(re.findall(r'^\s*$', line)) > 0 or line_index == 0:  # header or whitespace
                     continue
                 line_trimmed_split = line.strip().split(",")
                 if len(line_trimmed_split) != 2:
                     self.log.error("Each line in %s must be 2 columns. Aborting argument processing.",
                                    results_file)
                     raise ValueError("Each line in results file must be 2 columns.")
                 cell_line = line_trimmed_split[0]
                 cell_result = SafeCastUtil.safeCast(line_trimmed_split[1], cast_type)
                 if cell_line in sample_list:
                     self.log.error("Repeated cell line name: %s. Aborting argument processing.", cell_line)
                     raise ValueError("Repeated cell line name.")
                 else:
                     sample_list.append([cell_line, cell_result])
         except ValueError as value_error:
             self.log.error(value_error)
         finally:
             self.log.debug("Closing file %s", results_file)
             data_file.close()
     return sample_list
 def fetchDrugScoresByCellLine(self, input_folder):
     predictions_file = input_folder + "/" + RecommendationsService.PREDICTIONS_FILE
     drug_scores_by_cell_line = {}
     with open(predictions_file) as input_file:
         try:
             for line_index, line in enumerate(input_file):
                 if line_index == 0:
                     continue
                 line_split = line.split(",")
                 drug = line_split[0]
                 cell_line = line_split[1]
                 score = SafeCastUtil.safeCast(line_split[2], float)
                 if not drug or not cell_line or not score:
                     self.log.warning(
                         "Invalid line detected for %s at line %s.",
                         predictions_file, line_index + 1)
                     continue
                 if not drug_scores_by_cell_line.get(cell_line):
                     drug_scores_by_cell_line[cell_line] = [(drug, score)]
                 else:
                     drug_scores_by_cell_line[cell_line].append(
                         (drug, score))
         except ValueError as error:
             self.log.error("Error parsing predictions file %s. %s",
                            predictions_file, error)
     return drug_scores_by_cell_line
 def fetchBestHyperparams(self, row, indices_of_outer_loops):
     monte_carlo_results = self.getMonteCarloResults(
         row, indices_of_outer_loops)
     best_hyps = None
     top_score = AbstractModelTrainer.DEFAULT_MIN_SCORE
     max_num_occurrences = 0
     best_hyps_list = []
     for hyperparam in SafeCastUtil.safeCast(monte_carlo_results.keys(),
                                             list):
         if len(monte_carlo_results.get(hyperparam)) > max_num_occurrences:
             max_num_occurrences = len(monte_carlo_results.get(hyperparam))
             best_hyps_list = [hyperparam]
         elif len(monte_carlo_results.get(
                 hyperparam)) == max_num_occurrences:
             best_hyps_list.append(hyperparam)
     if len(best_hyps_list) == 1:
         best_hyps = hyperparam
         top_score = numpy.average(monte_carlo_results.get(hyperparam))
     elif len(best_hyps_list) > 1:
         top_score = 0
         for hyperparam in best_hyps_list:
             if numpy.average(
                     monte_carlo_results.get(hyperparam)) > top_score:
                 top_score = numpy.average(
                     monte_carlo_results.get(hyperparam))
                 best_hyps = hyperparam
     return best_hyps
 def fetchOrReturnDefault(self, field, to_type, default):
     if field:
         if field.lower() == 'false' and to_type is bool:
             return False
         return SafeCastUtil.safeCast(field, to_type)
     else:
         return default
    def writeToCSVInLock(self, line, input_folder, ml_algorithm, num_combos, outer_perms):
        lock = threading.Lock()
        lock.acquire(True)
        self.lockThreadMessage()

        file_name = ml_algorithm + ".csv"
        write_action = "w"
        if file_name in os.listdir(input_folder):
            write_action = "a"
        with open(input_folder + "/" + file_name, write_action, newline='') as csv_file:
            try:
                writer = csv.writer(csv_file)
                if write_action == "w":
                    writer.writerow(self.getCSVFileHeader(self.inputs.is_classifier, ml_algorithm, outer_perms))
                writer.writerow(line)
            except ValueError as error:
                self.log.error("Error writing to file %s. %s", file_name, error)
            finally:
                csv_file.close()

        total_lines = 0
        with open(input_folder + "/" + file_name) as csv_file:
            try:
                reader = csv.reader(csv_file)
                total_lines += (len(SafeCastUtil.safeCast(reader, list)) - 1)
            except ValueError as error:
                self.log.error("Error reading lines from file %s. %s", file_name, error)
            finally:
                csv_file.close()
                self.logPercentDone(total_lines, num_combos, ml_algorithm)

        self.unlockThreadMessage()
        lock.release()
    def assertRecsByCellLine(self, num_cell_lines, drug_names, target_dir):
        file_name = target_dir + "/" + RecommendationsService.PREDICTIONS_BY_CELL_LINE_FILE
        num_lines = 0
        with open(file_name) as csv_file:
            try:
                for line_index, line in enumerate(csv_file):
                    num_lines += 1
                    line_split = line.split(",")

                    if line_index == 0:
                        assert line_split[0] == "Cell Line"
                    else:
                        for i in range(0, len(line_split)):
                            if i == 0:
                                assert "cell_line" in line_split[i]
                            elif i % 2 == 0:
                                assert SafeCastUtil.safeCast(line_split[i], float) > AbstractModelTrainer.DEFAULT_MIN_SCORE
                            elif i % 2 == 1:
                                assert line_split[i] in drug_names
            except AssertionError as error:
                self.log.error(error)
            finally:
                self.log.debug("Closing file %s", file_name)
                csv_file.close()
                assert num_lines == num_cell_lines + 1