def evaluate(self, array_data): """ Calculate stdev value of the array and store it to history Parameters: """ result_object = ResultObject(None, None, None, CommandStatus.Error) array = array_data.data if numpy.issubdtype(array.dtype, numpy.number): idx = numpy.logical_not(numpy.isnan(array)) if StatContainer.conditional_array is not None and StatContainer.conditional_array.data.size == array.size: idx = numpy.logical_and(idx, StatContainer.conditional_array.data) std_val = numpy.std(array[idx]) result_object = ResultObject( std_val, [], DataType.array, CommandStatus.Success) result_object.createName( array_data.keyword_list, command_name=self.commandTags()[0], set_keyword_list=True) df_new = pd.DataFrame() df_new['Feature'] = [array_data.name] df_new['Standard Deviation'] = [std_val] TablePrinter.printDataFrame(df_new) # Printer.Print("Standard deviation of", array_data.name, # "is", std_val) else: Printer.Print("The array is not of numeric type so cannot", "find stdev") return result_object
def evaluate(self, data_frame, classifier_algo): """ Train a classifier on multiple arrays """ result_object = ResultObject(None, None, None, CommandStatus.Error) # Get the data frame df = data_frame.data #command_status, df, kl1, _ = DataGuru.transformArray_to_dataFrame(array_datas) if StatContainer.ground_truth is None: Printer.Print("Please set a feature vector to ground truth by", "typing set ground truth before using this command") result_object = ResultObject(None, None, None, CommandStatus.Error) return result_object else: df = DataGuru.removeGT(df, StatContainer.ground_truth) Y = StatContainer.filterGroundTruth() # Remove nans: df, Y = DataGuru.removenan(df, Y) # Get the classifier model model = classifier_algo.data[0] # Code to run the classifier X = df.values # Get a standard scaler for the extracted data X scaler = preprocessing.StandardScaler().fit(X) X = scaler.transform(X) # Train the classifier Printer.Print("Training the classifier") df_show = pd.DataFrame() df_show['Features'] = df.columns TablePrinter.printDataFrame(df_show) model.fit(X, Y) # Print an update Printer.Print("The classifier", classifier_algo.name, "has been trained") predictions = model.predict(X) accuracy = metrics.accuracy_score(predictions, Y) Printer.Print("Accuracy on training set : %s" % "{0:.3%}".format(accuracy)) trained_model = {'Scaler': scaler, 'Model': model} result_object = ResultObject(trained_model, [], DataType.trained_model, CommandStatus.Success) classifier_algo_name = classifier_algo.name.replace('.', ' ') result_object.createName(data_frame.keyword_list, command_name=classifier_algo_name, set_keyword_list=True) return result_object
def initializeTable(self): headers = ["Column_name", "Column_type", "Size", "Column_range"] alignments = [Align.Right, Align.Center, Align.Center, Align.Left] col_widths = [30, 15, 6, 40] TablePrinter.initialize(4, col_widths, headers, alignments, tabbed=False)
def evaluate(self, data_frame, target): """ Use one of the models to identify the top predictors """ result_object = ResultObject(None, None, None, CommandStatus.Error) # Get the data frame df = data_frame.data #command_status, df, kl1, _ = DataGuru.transformArray_to_dataFrame(array_datas) if StatContainer.ground_truth is None: Printer.Print("Please set a feature vector to ground truth by", "typing set ground truth before using this command") result_object = ResultObject(None, None, None, CommandStatus.Error) return result_object else: df = DataGuru.removeGT(df, StatContainer.ground_truth) Y = StatContainer.filterGroundTruth() # Remove nans: df, Y = DataGuru.removenan(df, Y) numbers = findNumbers(target.data, 1) if numbers != [] and numbers[0].data > 0: num = int(numbers[0].data) else: num = 10 # If not specified select top 10 features X = df.values # Get a standard scaler for the extracted data X scaler = preprocessing.StandardScaler().fit(X) X = scaler.transform(X) model = RandomForestClassifier(n_estimators=100) model.fit(X, Y) featImpVals = model.feature_importances_ featimp = pd.Series(featImpVals, index=df.columns).sort_values(ascending=False) df_show = pd.DataFrame() df_show['top features'] = featimp.index[0:num] df_show['feature importance'] = featimp.values[0:num] TablePrinter.printDataFrame(df_show) df_new = df[featimp.index[0:num]] result_object = ResultObject(df_new, [], DataType.csv, CommandStatus.Success) command_name = 'top.predictors' result_object.createName(data_frame.name, command_name=command_name, set_keyword_list=True) return result_object
def evaluate(self): """ Calculate average value of the array and store it to history Parameters: """ if StatContainer.row_labels is not None: TablePrinter.clearBackGround(StatContainer.row_labels.name) StatContainer.row_labels = None Printer.Print("clearing row labels") return ResultObject(None, None, None, CommandStatus.Success)
def evaluate(self, array_data): """ store ground truth to history Parameters: """ if StatContainer.ground_truth is not None: TablePrinter.clearBackGround(StatContainer.ground_truth.name) StatContainer.ground_truth = array_data Printer.Print("Setting reference to ", " ".join(array_data.keyword_list)) return ResultObject(None, None, None, CommandStatus.Success)
def evaluate(self, array_data): """ set row label Parameters: """ if StatContainer.row_labels is not None: TablePrinter.clearBackGround(StatContainer.row_labels.name) StatContainer.row_labels = array_data Printer.Print("Setting row label to ", " ".join(array_data.keyword_list)) return ResultObject(None, None, None, CommandStatus.Success)
def evaluate(self, array_data): """ Calculate max value of the array and store it to history Parameters: """ result_objects = [] result_object = ResultObject(None, None, None, CommandStatus.Error) array = array_data.data if numpy.issubdtype(array.dtype, numpy.number): idx = numpy.logical_not(numpy.isnan(array)) elif numpy.issubdtype(array.dtype, numpy.datetime64): idx = numpy.logical_not(numpy.isnat(array)) else: Printer.Print("The array is not supported type so cannot find max") return result_object if StatContainer.conditional_array is not None and StatContainer.conditional_array.data.size == array.size: idx = numpy.logical_and(idx, StatContainer.conditional_array.data) max_val = numpy.max(array[idx]) idx = numpy.argmax(array[idx]) if StatContainer.row_labels is not None: rl = StatContainer.row_labels.data max_rl = rl[idx] # Result for max index result_object = ResultObject(max_rl, [], DataType.array, CommandStatus.Success) result_object.createName(StatContainer.row_labels.name, command_name=self.commandTags()[0], set_keyword_list=True) result_objects.append(result_object) # Result for max value result_object = ResultObject(max_val, [], DataType.array, CommandStatus.Success) result_object.createName(array_data.keyword_list, command_name=self.commandTags()[0], set_keyword_list=True) result_objects.append(result_object) # Create a dataframe to store the results df_new = pd.DataFrame() df_new['Feature'] = [array_data.name] df_new['Maximum'] = [max_val] if StatContainer.row_labels is not None: df_new[StatContainer.row_labels.name] = [max_rl] #Printer.Print("Maximum of", array_data.name, "is", max_val, "corresponding to", max_rl) # else: #Printer.Print("Maximum of", array_data.name, "is", max_val) TablePrinter.printDataFrame(df_new) return result_objects
def evaluate(self, array_datas, data_frame): """ Calculate label-wise mean array store it to history Parameters: """ result_object = ResultObject(None, None, None, CommandStatus.Success) if data_frame is not None: df = data_frame.data cname = data_frame.name elif array_datas is not None: command_status, df, kl1, cname = DataGuru.transformArray_to_dataFrame( array_datas) if len(cname) == 0: cname = ".".join(kl1) if command_status == CommandStatus.Error: return ResultObject(None, None, None, CommandStatus.Error) else: Printer.Print("Please provide data frame or arrays to analyze") return ResultObject(None, None, None, CommandStatus.Error) df_new = self.performOperation(df) TablePrinter.printDataFrame(df_new) result_objects = [] # Adding the newly created CSV result_object = ResultObject(df_new, [], DataType.csv, CommandStatus.Success) command_name = "smry" result_object.createName(cname, command_name=command_name, set_keyword_list=True) result_objects.append(result_object) # create an updated list of column names by removing the common names kl1 = df_new.columns truncated_kl1, common_name = StatContainer.removeCommonNames(kl1) for col in range(0, len(kl1)): arr = df_new[kl1[col]] result_object = ResultObject(arr, [], DataType.array, CommandStatus.Success) result_object.createName(truncated_kl1[col], command_name=command_name, set_keyword_list=True) result_objects.append(result_object) return result_objects
def evaluate(self, array_data): """ Calculate range value of the array and store it to history Parameters: """ result_object = ResultObject(None, None, None, CommandStatus.Error) array = array_data.data if numpy.issubdtype(array.dtype, numpy.number): idx = numpy.logical_not(numpy.isnan(array)) elif numpy.issubdtype(array.dtype, numpy.datetime64): idx = numpy.logical_not(numpy.isnat(array)) else: Printer.Print("The array is not supported type so cannot find max") return result_object if StatContainer.conditional_array is not None and StatContainer.conditional_array.data.size == array.size: idx = numpy.logical_and(idx, StatContainer.conditional_array.data) max_val = numpy.max(array[idx]) min_val = numpy.min(array[idx]) range_val = max_val - min_val result_object = ResultObject(range_val, [], DataType.array, CommandStatus.Success) result_object.createName(array_data.keyword_list, command_name=self.commandTags()[0], set_keyword_list=True) df_new = pd.DataFrame() df_new['Feature'] = [array_data.name] df_new['Range'] = [range_val] df_new['Minimum'] = [min_val] df_new['Maximum'] = [max_val] TablePrinter.printDataFrame(df_new) # Printer.Print("Range of", array_data.name, "is", range_val, # "from", min_val, "to", max_val) return result_object
def evaluate(self, array_datas): """ Create a a new dataframe using the supplied arrays """ command_status, df, kl1, cname = DataGuru.transformArray_to_dataFrame( array_datas) if command_status == CommandStatus.Error: Printer.Print( "Please check whether the arrays are of the same size") return ResultObject(None, None, None, CommandStatus.Error) result_object = ResultObject(df, [], DataType.csv, CommandStatus.Success) command_name = 'concatenate.array' result_object.createName(cname, command_name=command_name, set_keyword_list=True) TablePrinter.printDataFrame(df) return result_object
def evaluate(self, history, user_conv): """ Takes in the session history and prints all the elements stored in each category """ result_object = ResultObject(None, None, None, CommandStatus.Success) if not hasattr(history.data, 'command_database'): Printer.Print("History does not contain command database") return result_object command_database = history.data.command_database TablePrinter.initialize( 4, [20, 20, 30, 55], ['Command Name', 'Command type', 'Keywords', 'Description'], [Align.Right, Align.Center, Align.Center, Align.Left]) user_input = user_conv.data user_command_types = [ data_object.data for data_object in self.commandtype_database.search(user_input) ] # try: for command_data_object in command_database.data_objects: command_object = command_data_object.data command_type = command_object.commandType() if user_command_types != [] and command_type not in user_command_types: continue if command_data_object.name is None: if len(command_data_object.keyword_list) == 0: object_name = "None" else: object_name = command_data_object.keyword_list[0] else: object_name = command_data_object.name command_type_name = command_type.name.lower() command_tags = ' '.join(command_object.commandTags()[:2]) command_brief = command_object.briefDescription() TablePrinter.addRow( (object_name, command_type_name, command_tags, command_brief)) # except: # result_object = ResultObject(None, None, None, CommandStatus.Error) TablePrinter.sort(1) # Sort data by command type TablePrinter.show() return result_object
def evaluate(self, array_datas, data_frame): """ Calculate ttest of the array and store it to history Parameters: """ if data_frame is not None: df = data_frame.data cname = data_frame.name elif array_datas is not None: command_status, df, kl1, cname = DataGuru.transformArray_to_dataFrame( array_datas) if command_status == CommandStatus.Error: return ResultObject(None, None, None, CommandStatus.Error) else: Printer.Print("Please provide data frame or arrays to analyze") return ResultObject(None, None, None, CommandStatus.Error) if StatContainer.ground_truth is None: print("Could not find the reference variable.") print("Please set the reference variable") return ResultObject(None, None, None, CommandStatus.Error) else: gtVals = StatContainer.filterGroundTruth() ground_truth = StatContainer.ground_truth.name if len(gtVals) != df.shape[0]: print( "The size of the ground truth does not match with arrays being analyzed" ) print(len(gtVals), df.shape[0]) return ResultObject(None, None, None, CommandStatus.Error) uniqVals = StatContainer.isCategorical(gtVals) df[ground_truth] = gtVals df_new = pd.DataFrame() if ground_truth in df.columns: df_new['features'] = df.columns.drop(ground_truth).values else: df_new['features'] = df.columns allCols = df_new['features'] for iter in range(len(uniqVals)): for iter1 in range(iter + 1, len(uniqVals)): df_new['pValue: ' + str(iter) + ' vs ' + str(iter1)] = np.zeros(df_new.shape[0]) for iter_feature in range(len(df_new['features'])): arr = df[allCols[iter_feature]] for iter in range(len(uniqVals)): uniV = uniqVals[iter] a = arr[gtVals == uniV] for iter1 in range(iter + 1, len(uniqVals)): b = arr[gtVals == uniqVals[iter1]] if uniV != uniqVals[iter1]: ttest_val = scipy.stats.ttest_ind(a, b, axis=0, equal_var=False) df_new['pValue: ' + str(iter) + ' vs ' + str(iter1)][iter_feature] = (ttest_val.pvalue) else: df_new['pValue: ' + str(iter) + ' vs ' + str(iter1)][iter_feature] = 0 TablePrinter.printDataFrame(df_new) result_objects = [] # Adding the newly created csv result_object = ResultObject(df_new, [], DataType.csv, CommandStatus.Success) result_object.createName(cname, command_name='sigtest', set_keyword_list=True) result_objects.append(result_object) # create an updated list of column names by removing the common names kl1 = df_new.columns truncated_kl1, common_name = StatContainer.removeCommonNames(kl1) for col in range(0, len(kl1)): arr = df_new[kl1[col]] result_object = ResultObject(arr, [], DataType.array, CommandStatus.Success) command_name = 'sigtest' result_object.createName(truncated_kl1[col], command_name=command_name, set_keyword_list=True) result_objects.append(result_object) return result_objects
def evaluate(self, array_datas, data_frame): """ Calculate ROC of the array and store it to history Parameters: """ if data_frame is not None: df = data_frame.data cname = data_frame.name elif array_datas is not None: command_status, df, kl1, cname = DataGuru.transformArray_to_dataFrame( array_datas) if command_status == CommandStatus.Error: return ResultObject(None, None, None, CommandStatus.Error) else: Printer.Print("Please provide data frame or arrays to analyze") return ResultObject(None, None, None, CommandStatus.Error) if StatContainer.ground_truth is None: Printer.Print("Could not find the reference variable.") Printer.Print("Please set the reference variable") return ResultObject(None, None, None, CommandStatus.Error) else: gtVals = StatContainer.filterGroundTruth() ground_truth = StatContainer.ground_truth.name if len(gtVals) != df.shape[0]: Printer.Print( "The size of the ground truth does not match with arrays being analyzed" ) Printer.Print(len(gtVals), df.shape[0]) return ResultObject(None, None, None, CommandStatus.Error) uniqVals = StatContainer.isCategorical(gtVals) df[ground_truth] = gtVals df_new = pd.DataFrame() if ground_truth in df.columns: df_new['features'] = df.columns.drop(ground_truth).values else: df_new['features'] = df.columns allCols = df_new['features'] for iter in range(len(uniqVals)): for iter1 in range(iter + 1, len(uniqVals)): df_new['AUC'] = 0 avgAUC = [] for iter_feature in range(len(df_new['features'])): arr = df[allCols[iter_feature]] model = LogisticRegression() X = arr.values X1 = X.reshape(-1, 1) model.fit(X1, gtVals) # evaluate the model allAUC = [] Y_Pr = model.predict_proba(X1) for iter in range(len(uniqVals)): fpr, tpr, thresholds = metrics.roc_curve( gtVals, Y_Pr[:, iter], pos_label=uniqVals[iter]) fpr, tpr, thresholds = metrics.roc_curve( gtVals, Y_Pr[:, iter], pos_label=uniqVals[iter]) auc_val = metrics.auc(fpr, tpr) allAUC.append(auc_val) avgAUC.append(np.mean(allAUC)) df_new['AUC'] = avgAUC TablePrinter.printDataFrame(df_new) # New data frame result_objects = [] result_object = ResultObject(df_new, [], DataType.csv, CommandStatus.Success) result_object.createName(cname, command_name='rcurve', set_keyword_list=True) result_objects.append(result_object) # create an updated list of column names by removing the common names kl1 = df_new.columns truncated_kl1, common_name = StatContainer.removeCommonNames(kl1) for col in range(0, len(kl1)): arr = df_new[kl1[col]] result_object = ResultObject(arr, [], DataType.array, CommandStatus.Success) command_name = 'rcurve' result_object.createName(truncated_kl1[col], command_name=command_name, set_keyword_list=True) result_objects.append(result_object) return result_objects
def __init__(self, parent=None): super(QtGUI, self).__init__(parent) # Create subcomponents of the GUI self.tab_container = QTabWidget() self.qt_printer = QtPrinter() self.qt_table_printer = QtTablePrinter() self.user_input = QCustomLineEdit() self.completion_model = QStringListModel() self.labels = [QLineEdit("None"), QLineEdit("None"), QLineEdit("None")] self.ground_truth = QLabel() self.row_label = QLabel() self.filter_label = QLabel() completer = QCompleter() completer.setModel(self.completion_model) self.user_input.setCompleter(completer) self.variable_history = QtTablePrinter() # Select global configs QTabManager.setParentWidget(self.tab_container) Window.selectWindowType(QtWindow) PropertyEditor.property_editor_class = QtPropertyEditor Printer.selectPrinter(self.qt_printer) TablePrinter.selectPrinter(self.qt_table_printer) # Get screen resolution: app = QApplication.instance() screen_resolution = app.desktop().screenGeometry() # Add ref labels self.ref_labels = [] for ref_label in ['Reference', 'Row Label', 'Filter']: self.ref_labels.append( QLabel('<span style=" font-size: ' + str(self.label_font) + 'pt; font-weight:600;">' + ref_label + ': </span>')) self.ref_labels[-1].setMinimumHeight(0.02 * screen_resolution.height()) # Font for user input: f = self.user_input.font() f.setPointSize(self.user_input_font) # sets the size to 27 self.user_input.setFont(f) f.setPointSize(self.label_font) for label in self.labels: label.setFont(f) label.setMinimumHeight(0.02 * screen_resolution.height()) label.setReadOnly(True) # Size self.user_input.setMinimumHeight(0.02 * screen_resolution.height()) self.qt_printer.text_box.setSizePolicy(QSizePolicy.Minimum, QSizePolicy.Expanding) self.tab_container.setSizePolicy(QSizePolicy.Expanding, QSizePolicy.Expanding) self.qt_table_printer.table_widget.setSizePolicy( QSizePolicy.MinimumExpanding, QSizePolicy.Expanding) self.variable_history.table_widget.setSizePolicy( QSizePolicy.Minimum, QSizePolicy.Minimum) self.ground_truth.setSizePolicy(QSizePolicy.Expanding, QSizePolicy.Minimum) # Layout layout = QVBoxLayout() # Add gt, rowlabels, filter hlayout = QHBoxLayout() for i in range(3): hlayout.addWidget(self.ref_labels[i]) hlayout.addWidget(self.labels[i]) # Add tabs for table and past history self.right_tab_widget = QTabWidget() self.right_tab_widget.addTab(self.qt_table_printer.table_widget, "Data Summary") self.right_tab_widget.addTab(self.variable_history.table_widget, "Past variables") # Add separate splitter for table and property editor h_splitter = QSplitter(Qt.Vertical) h_splitter.addWidget(self.right_tab_widget) h_splitter.setStretchFactor(0, 2) PropertyEditor.parent_widget = h_splitter # Add chat,window, tab splitter = QSplitter(Qt.Horizontal) splitter.addWidget(self.qt_printer.text_box) splitter.addWidget(self.tab_container) splitter.addWidget(h_splitter) splitter.setStretchFactor(0, 0) splitter.setStretchFactor(1, 2) splitter.setStretchFactor(2, 0) splitter.setSizes([1, 1000, 500]) # Final layout.addLayout(hlayout) layout.addWidget(splitter) layout.addWidget(self.user_input) self.setLayout(layout) # Connections self.qt_table_printer.table_widget.itemDoubleClicked.connect( self.double_click_table_cell)
def evaluate(self, array_data, user_conv): """ List all columns in a csv matrix """ result_object = ResultObject(None, None, None, CommandStatus.Error) data = array_data.data req_categories = [ res.data for res in self.column_type_db.search(user_conv.data) ] column_strings = { 'Categorical': [], 'Numeric': [], 'Logic': [], 'String': [], 'Unknown': [] } self.initializeTable() if hasattr(data, 'columns'): for column in data.columns: data_column = data[column] data_column.dropna(inplace=True) unique_vals = StatContainer.isCategorical(data_column) if unique_vals is not None: column_type = "Categorical" elif np.issubdtype(data_column.dtype, np.number): column_type = "Numeric" elif np.issubdtype(data_column.dtype, np.bool_): column_type = "Logic" elif (len(data_column) > 0 and isinstance(data_column.iloc[0], str)): column_type = "String" else: column_type = "Unknown" # If we did not request specific column just ignore if req_categories != [] and column_type not in req_categories: continue n_unique_vals = str(len(data_column)) if unique_vals is not None: n_unique_vals = str(len(unique_vals)) if len(unique_vals) < 5: column_range = str(unique_vals) else: column_range = ('[' + str(unique_vals[0]) + '...' + str(unique_vals[-1]) + ']') elif np.issubdtype(data_column.dtype, np.number): column_range = "[{:.2f}, {:.2f}]".format( np.min(data_column), np.max(data_column)) else: column_range = "" column_strings[column_type].append( (column, column_type, n_unique_vals, column_range)) Printer.Print("Showing Statistics for", " ".join(array_data.keyword_list)) for column_type in column_strings: column_strings[column_type].sort() # Sort the elements for row_data in column_strings[column_type]: TablePrinter.addRow(row_data) TablePrinter.show() result_object = ResultObject(None, None, None, CommandStatus.Success) return result_object
def evaluate(self, array_data, target): result = ResultObject(None, None, None, CommandStatus.Error) in_array = array_data.data N = in_array.shape[0] if StatContainer.conditional_array is not None and len( StatContainer.conditional_array.data) == N: in_array = in_array[StatContainer.conditional_array.data] if in_array.size == 0: Printer.Print("No data") return result nan_idx = StatContainer.getNanIdx(in_array) non_nan_idx = np.logical_not(nan_idx) non_nan_array = in_array[non_nan_idx] numbers = findNumbers(target.data, 1) try: unique_arr, inv, counts = np.unique(non_nan_array, return_inverse=True, return_counts=True) except: return result if numbers != [] and numbers[0].data > 0: num = int(numbers[0].data) idx = None if not np.issubdtype(non_nan_array.dtype, np.number): num = min(unique_arr.size, num) if self._condition[0] == "top": Printer.Print("Finding top", num) if np.issubdtype(non_nan_array.dtype, np.number): best_idx = np.argpartition(non_nan_array, -num)[-num:] idx = np.full(non_nan_array.size, False) idx[best_idx] = True if num <= 30: if StatContainer.row_labels is not None: df_new = pd.DataFrame( {array_data.name: non_nan_array[best_idx]}) df_new[ StatContainer.row_labels. name] = StatContainer.row_labels.data[best_idx] TablePrinter.printDataFrame(df_new) TablePrinter.sort(0, ascending=False) else: Printer.Print("Top values:") Printer.Print(non_nan_array[best_idx]) else: best_idx = np.argpartition(counts, -num)[-num:] idx = np.isin(inv, best_idx) if num <= 30: Printer.Print("Top values:") Printer.Print(unique_arr[best_idx]) elif self._condition[0] == "bottom": Printer.Print("Finding bottom", num) if np.issubdtype(non_nan_array.dtype, np.number): worst_idx = np.argpartition(non_nan_array, -num)[:num] idx = np.full(non_nan_array.size, False) idx[worst_idx] = True if num <= 30: if StatContainer.row_labels is not None: df_new = pd.DataFrame( {array_data.name: non_nan_array[worst_idx]}) df_new[StatContainer.row_labels. name] = StatContainer.row_labels.data[ worst_idx] TablePrinter.printDataFrame(df_new) TablePrinter.sort(0, ascending=True) else: Printer.Print("Worst values:") Printer.Print(non_nan_array[worst_idx]) else: worst_idx = np.argpartition(counts, num)[:num] idx = np.isin(inv, worst_idx) if num <= 30: Printer.Print("Worst values:") Printer.Print(unique_arr[worst_idx]) elif self._condition[0] == "first": Printer.Print(array_data.data[:num]) result = ResultObject(None, None, None, CommandStatus.Success) else: Printer.Print("Did not find the right condition") if idx is not None: out1 = np.full(in_array.size, False) out1[non_nan_idx] = idx if StatContainer.conditional_array is not None and len( StatContainer.conditional_array.data) == N: out = np.full(N, False) out[StatContainer.conditional_array.data] = out1 else: out = out1 result = ResultObject(out, [], DataType.logical_array, CommandStatus.Success, True) result.createName(array_data.keyword_list, command_name=self._condition[0], set_keyword_list=True) elif self._condition[0] == "first": if unique_arr.size < 50: Printer.Print(unique_arr) else: Printer.Print(non_nan_array[:10]) result = ResultObject(None, None, None, CommandStatus.Success) return result
def evaluate(self, array_datas): """ Calculate label-wise mean array store it to history Parameters: """ result_object = ResultObject(None, None, None, CommandStatus.Error) if isinstance(array_datas, list) and len(array_datas) == 0: return result_object command_status, df, kl1, cname = DataGuru.transformArray_to_dataFrame( array_datas) if command_status == CommandStatus.Error: return ResultObject(None, None, None, CommandStatus.Error) if StatContainer.ground_truth is None: gtVals = np.ones(df.shape[0]) gtName = 'ground_truth' else: gtVals = StatContainer.filterGroundTruth() gtName = StatContainer.ground_truth.name # Remove nans: df[gtName] = gtVals df.dropna(inplace=True) gtVals = df[gtName] uniqVals = StatContainer.isCategorical(gtVals, uniqueCutoff=1000) binned_ground_truth = True if uniqVals is None and np.issubdtype(gtVals.dtype, np.number): # Convert to categorical df[gtName] = pd.cut(gtVals, 10) binned_ground_truth = True # Create groupwise arrays result_objects = [] if uniqVals is not None: df_new = self.performOperation(df, gtName) df_new = df_new.reset_index() for col in df_new.columns: arr = df_new[col] kName = [] if col == '': kName = array_datas[0].keyword_list else: # kName.append(cname) kName.append(col) result_object = ResultObject(arr, [], DataType.array, CommandStatus.Success) command_name = 'labelwise.' + self._condition[0] result_object.createName(kName, command_name=command_name, set_keyword_list=True) result_objects.append(result_object) TablePrinter.printDataFrame(df_new) else: Printer.Print("The array is not of numeric type so cannot", "calculate groupwise " + self._condition[0]) result_objects.append(result_object) return result_objects