def show_loaddata_table(gate_names, as_columns=True): '''Utility function to create a table that can be read by CP LoadData. gate_names -- list of gate names to apply as_columns -- use True to output each gate as a column with 0's and 1's use False to output only the rows that fall within all gates. ''' for g in gate_names: for t in p.gates[g].get_tables(): assert t == p.image_table, 'this function only takes per-image gates' columns = list(dbconnect.image_key_columns() + dbconnect.well_key_columns()) + p.image_file_cols + p.image_path_cols if as_columns: query_columns = columns + ['(%s) AS %s'%(str(p.gates[g]), g) for g in gate_names] columns += gate_names data = db.execute('SELECT %s FROM %s' %(','.join(query_columns), p.image_table)) else: # display only values within the given gates where_clause = ' AND '.join([str(p.gates[g]) for g in gate_names]) data = db.execute('SELECT %s FROM %s WHERE %s' %(','.join(columns), p.image_table, where_clause)) if data == []: wx.MessageBox('Sorry, no data points fall within the combined selected gates.', 'No data to show') return None grid = TableViewer(None, title="Gated Data") grid.table_from_array(np.array(data, dtype='object'), columns, grouping='image', key_indices=range(len(dbconnect.image_key_columns()))) grid.Show() return grid
def get_image_keys_at_row(self, row): # XXX: needs to be updated to work for per_well data if self.table_name == p.image_table: key = self.get_row_key(row) if key is None: return None return [key] # return [tuple([self.GetValue(row, col) for col in self.key_indices])] elif self.table_name == p.object_table: key = self.get_row_key(row) if key is None: return None return [key[:-1]] else: # BAD: assumes that columns with the same name as the image key # columns ARE image key columns (not true if looking at unrelated # image table) key = [] for col in dbconnect.image_key_columns(): if col not in self.col_labels: return None else: col_index = self.col_labels.tolist().index(col) key += [self.GetValue(row, col_index)] return [tuple(key)]
def do_by_steps(tables, filter_name, area_score=False): filter_clause = '1 = 1' join_clause = '' if filter_name is not None: filter = p._filters[filter_name] if isinstance(filter, cpa.sqltools.OldFilter): join_table = '(%s) as filter' % str(filter) else: if p.object_table in tables: join_table = None else: join_table = p.object_table filter_clause = str(filter) if join_table: join_clause = 'JOIN %s USING (%s)' % (join_table, ','.join(image_key_columns())) wheres = _where_clauses(p, dm, filter_name) num_clauses = len(wheres) counts = {} # iterate over where clauses to go through whole set for idx, where_clause in enumerate(wheres): if filter_clause is not None: where_clause += ' AND ' + filter_clause if area_score: data = db.execute('SELECT %s, %s, %s FROM %s ' '%s WHERE %s' %(UniqueImageClause(p.object_table), ",".join(db.GetColnamesForClassifier()), _objectify(p, p.area_scoring_column), tables, join_clause, where_clause), silent=(idx > 10)) area_score = data[-1] #separate area from data data = data[:-1] else: data = db.execute('SELECT %s, %s FROM %s ' '%s WHERE %s' %(UniqueImageClause(p.object_table), ",".join(db.GetColnamesForClassifier()), tables, join_clause, where_clause), silent=(idx > 10)) cell_data, image_keys = processData(data) predicted_classes = classifier.Predict(cell_data) for i in range(0, len(predicted_classes)): row_cls = tuple(np.append(image_keys[i], predicted_classes[i])) oneCount = np.array([1]) if area_score: oneCount = np.append(oneCount, area_score[i]) if row_cls in counts: counts[row_cls] += oneCount else: counts[row_cls] = oneCount if cb: cb(min(1, idx/float(num_clauses))) #progress return counts
def FilterObjectsFromClassN(classNum, classifier, filterKeys, uncertain): ''' uncertain: allows to search for uncertain (regarding the probs assigned by the classifier) cell images classNum: 1-based index of the class to retrieve obKeys from classifier: trained classifier object filterKeys: (optional) A list of specific imKeys OR obKeys (NOT BOTH) to classify. * WARNING: If this list is too long, you may exceed the size limit to MySQL queries. * Useful when fetching N objects from a particular class. Use the DataModel to get batches of random objects, and sift through them here until N objects of the desired class have been accumulated. * Also useful for classifying a specific image or group of images. RETURNS: A list of object keys that fall in the specified class (but not all objects?), if Properties.area_scoring_column is specified, area sums are also reported for each class ''' if filterKeys != [] and filterKeys is not None: if isinstance(filterKeys, str): whereclause = filterKeys #+ " AND" else: isImKey = len(filterKeys[0]) == len(image_key_columns()) if isImKey: whereclause = GetWhereClauseForImages(filterKeys) #+ " AND" else: whereclause = GetWhereClauseForObjects(filterKeys) #+ " AND" else: whereclause = "" if p.area_scoring_column: data = db.execute('SELECT %s, %s FROM %s WHERE %s'%(UniqueObjectClause(p.object_table), ",".join(db.GetColnamesForClassifier()), _objectify(p, p.area_scoring_column), p.object_table, whereclause)) area_score = data[-1] #separate area from data data = data[:-1] else: data = db.execute('SELECT %s, %s FROM %s WHERE %s'%(UniqueObjectClause(p.object_table), ",".join(db.GetColnamesForClassifier()), p.object_table, whereclause)) cell_data, object_keys = processData(data)#, p.check_tables=='yes') res = [] # list if uncertain: # Our requirement: if the two largest scores are smaller than threshold probabilities = classifier.PredictProba(cell_data) # threshold = 0.1 # TODO: This threshold should be adjustable sorted_p = np.sort(probabilities)[:,-2:]# sorted array diff = sorted_p[:,1] - sorted_p[:,0] indices = np.where(diff < threshold)[0] # get all indices where this is true res = [object_keys[i] for i in indices] else: predicted_classes = classifier.Predict(cell_data) res = object_keys[predicted_classes == classNum * np.ones(predicted_classes.shape)].tolist() #convert to list return map(tuple,res) # ... and then to tuples
def FilterObjectsFromClassN(classNum, classifier, filterKeys, uncertain): ''' uncertain: allows to search for uncertain (regarding the probs assigned by the classifier) cell images classNum: 1-based index of the class to retrieve obKeys from classifier: trained classifier object filterKeys: (optional) A list of specific imKeys OR obKeys (NOT BOTH) to classify. * WARNING: If this list is too long, you may exceed the size limit to MySQL queries. * Useful when fetching N objects from a particular class. Use the DataModel to get batches of random objects, and sift through them here until N objects of the desired class have been accumulated. * Also useful for classifying a specific image or group of images. RETURNS: A list of object keys that fall in the specified class (but not all objects?), if Properties.area_scoring_column is specified, area sums are also reported for each class ''' if filterKeys != [] and filterKeys is not None: if isinstance(filterKeys, str): whereclause = filterKeys #+ " AND" else: isImKey = len(filterKeys[0]) == len(image_key_columns()) if isImKey: whereclause = GetWhereClauseForImages(filterKeys) #+ " AND" else: whereclause = GetWhereClauseForObjects(filterKeys) #+ " AND" else: whereclause = "" if p.area_scoring_column: data = db.execute('SELECT %s, %s FROM %s WHERE %s'%(UniqueObjectClause(p.object_table), ",".join(db.GetColnamesForClassifier()), _objectify(p, p.area_scoring_column), p.object_table, whereclause)) area_score = data[-1] #separate area from data data = data[:-1] else: data = db.execute('SELECT %s, %s FROM %s WHERE %s'%(UniqueObjectClause(p.object_table), ",".join(db.GetColnamesForClassifier()), p.object_table, whereclause)) cell_data, object_keys = processData(data) res = [] # list if uncertain: # Our requirement: if the two largest scores are smaller than threshold probabilities = classifier.PredictProba(cell_data) # threshold = 0.1 # TODO: This threshold should be adjustable sorted_p = np.sort(probabilities)[:,-2:]# sorted array diff = sorted_p[:,1] - sorted_p[:,0] indices = np.where(diff < threshold)[0] # get all indices where this is true res = [object_keys[i] for i in indices] else: predicted_classes = classifier.Predict(cell_data) res = object_keys[predicted_classes == classNum * np.ones(predicted_classes.shape)].tolist() #convert to list return map(tuple,res) # ... and then to tuples
def show_loaddata_table(gate_names, as_columns=True): '''Utility function to create a table that can be read by CP LoadData. gate_names -- list of gate names to apply as_columns -- use True to output each gate as a column with 0's and 1's use False to output only the rows that fall within all gates. ''' for g in gate_names: for t in p.gates[g].get_tables(): assert t == p.image_table, 'this function only takes per-image gates' columns = list( dbconnect.image_key_columns() + dbconnect.well_key_columns()) + p.image_file_cols + p.image_path_cols if as_columns: query_columns = columns + [ '(%s) AS %s' % (str(p.gates[g]), g) for g in gate_names ] columns += gate_names data = db.execute('SELECT %s FROM %s' % (','.join(query_columns), p.image_table)) else: # display only values within the given gates where_clause = ' AND '.join([str(p.gates[g]) for g in gate_names]) data = db.execute('SELECT %s FROM %s WHERE %s' % (','.join(columns), p.image_table, where_clause)) if data == []: wx.MessageBox( 'Sorry, no data points fall within the combined selected gates.', 'No data to show') return None grid = TableViewer(None, title="Gated Data") grid.table_from_array(np.array(data, dtype='object'), columns, grouping='image', key_indices=range(len( dbconnect.image_key_columns()))) grid.Show() return grid
def FilterObjectsFromClassN(classNum, classifier, filterKeys): ''' classNum: 1-based index of the class to retrieve obKeys from classifier: trained classifier object filterKeys: (optional) A list of specific imKeys OR obKeys (NOT BOTH) to classify. * WARNING: If this list is too long, you may exceed the size limit to MySQL queries. * Useful when fetching N objects from a particular class. Use the DataModel to get batches of random objects, and sift through them here until N objects of the desired class have been accumulated. * Also useful for classifying a specific image or group of images. RETURNS: A list of object keys that fall in the specified class (but not all objects?), if Properties.area_scoring_column is specified, area sums are also reported for each class ''' if filterKeys != [] and filterKeys is not None: if isinstance(filterKeys, str): whereclause = filterKeys #+ " AND" else: isImKey = len(filterKeys[0]) == len(image_key_columns()) if isImKey: whereclause = GetWhereClauseForImages(filterKeys) #+ " AND" else: whereclause = GetWhereClauseForObjects(filterKeys) #+ " AND" else: whereclause = "" if p.area_scoring_column: data = db.execute('SELECT %s, %s FROM %s WHERE %s'%(UniqueObjectClause(p.object_table), ",".join(db.GetColnamesForClassifier()), _objectify(p, p.area_scoring_column), p.object_table, whereclause)) area_score = data[-1] #separate area from data data = data[:-1] else: data = db.execute('SELECT %s, %s FROM %s WHERE %s'%(UniqueObjectClause(p.object_table), ",".join(db.GetColnamesForClassifier()), p.object_table, whereclause)) number_of_features = len(db.GetColnamesForClassifier()) cell_data = np.array([row[-number_of_features:] for row in data]) #last number_of_features columns in row object_keys = np.array([row[:-number_of_features] for row in data]) #all elements in row before last (number_of_features) elements predicted_classes = classifier.Predict(cell_data) res = object_keys[predicted_classes == classNum * np.ones(predicted_classes.shape)].tolist() #convert to list return map(tuple,res) # ... and then to tuples
def set_table(self, table_name): if table_name == p.image_table: self.grouping = 'Image' elif table_name == p.object_table: self.grouping = 'Object' else: self.grouping = None self.table_name = table_name self.cache = odict() self.col_labels = np.array(db.GetColumnNames(self.table_name)) self.shown_columns = np.arange(len(self.col_labels)) self.order_by = [self.col_labels[0]] self.order_direction = 'ASC' self.key_indices = None if self.table_name == p.image_table: self.key_indices = [self.col_labels.tolist().index(v) for v in dbconnect.image_key_columns()] if self.table_name == p.object_table: self.key_indices = [self.col_labels.tolist().index(v) for v in dbconnect.object_key_columns()]
def FilterObjectsFromClassN(self, classN = None, keys = None): ''' Filter the input objects to output the keys of those in classN, using a defined SVM model classifier. ''' # Retrieve instance of the database connection db = dbconnect.DBConnect.getInstance() object_data = {} if isinstance(keys, str): object_data[0] = db.GetCellDataForClassifier(keys) elif keys != []: if len(keys) == len(dbconnect.image_key_columns()): # Retrieve instance of the data model and retrieve objects in the requested image dm = DataModel.getInstance() obKeys = dm.GetObjectsFromImage(keys[0]) else: obKeys = keys for key in obKeys: object_data[key] = db.GetCellDataForClassifier(key) sorted_keys = sorted(object_data.keys()) values_array = np.array([object_data[key] for key in sorted_keys]) scaled_values = self.ScaleData(values_array) pred_labels = self.model.predict(scaled_values) # Group the object keys per class classObjects = {} for index in range(1, len(self.classBins)+1): classObjects[float(index)] = [] for index, label in enumerate(pred_labels): classObjects[np.int(label)+1].append(sorted_keys[index]) # Return either a summary of all classes and their corresponding objects # or just the objects for a specific class if classN is None: return classObjects else: return classObjects[classN]
def OnSavePerImageCountsToCSV(self, evt): defaultFileName = 'Per_Image_Counts.csv' saveDialog = wx.FileDialog(self, message="Save as:", defaultDir=os.getcwd(), defaultFile=defaultFileName, wildcard='csv|*', style=(wx.SAVE | wx.FD_OVERWRITE_PROMPT | wx.FD_CHANGE_DIR)) if saveDialog.ShowModal()==wx.ID_OK: colHeaders = list(dbconnect.image_key_columns()) pos = len(colHeaders) if p.plate_id: colHeaders += [p.plate_id] if p.well_id: colHeaders += [p.well_id] colHeaders += ['total_count'] colHeaders += ['count_'+bin.label for bin in self.GetParent().classBins] data = list(self.GetParent().keysAndCounts) for row in data: if p.table_id: where = '%s=%s AND %s=%s'%(p.table_id, row[0], p.image_id, row[1]) total = sum(row[2:]) else: where = '%s=%s'%(p.image_id, row[0]) total = sum(row[1:]) row.insert(pos, total) # Plate and Well are written separately IF they are found in the props file # TODO: ANY column could be reported by this mechanism if p.well_id: res = db.execute('SELECT %s FROM %s WHERE %s'%(p.well_id, p.image_table, where), silent=True) well = res[0][0] row.insert(pos, well) if p.plate_id: res = db.execute('SELECT %s FROM %s WHERE %s'%(p.plate_id, p.image_table, where), silent=True) plate = res[0][0] row.insert(pos, plate) self.SaveCSV(saveDialog.GetPath(), data, colHeaders) saveDialog.Destroy()
def FilterObjectsFromClassN(self, classN=None, keys=None): ''' Filter the input objects to output the keys of those in classN, using a defined SVM model classifier. ''' # Retrieve instance of the database connection db = dbconnect.DBConnect.getInstance() object_data = {} if isinstance(keys, str): object_data[0] = db.GetCellDataForClassifier(keys) elif keys != []: if len(keys) == len(dbconnect.image_key_columns()): # Retrieve instance of the data model and retrieve objects in the requested image dm = DataModel.getInstance() obKeys = dm.GetObjectsFromImage(keys[0]) else: obKeys = keys for key in obKeys: object_data[key] = db.GetCellDataForClassifier(key) sorted_keys = sorted(object_data.keys()) values_array = np.array([object_data[key] for key in sorted_keys]) scaled_values = self.ScaleData(values_array) pred_labels = self.model.predict(scaled_values) # Group the object keys per class classObjects = {} for index in range(1, len(self.classBins) + 1): classObjects[float(index)] = [] for index, label in enumerate(pred_labels): classObjects[np.int(label) + 1].append(sorted_keys[index]) # Return either a summary of all classes and their corresponding objects # or just the objects for a specific class if classN is None: return classObjects else: return classObjects[classN]
def set_table(self, table_name): if table_name == p.image_table: self.grouping = 'Image' elif table_name == p.object_table: self.grouping = 'Object' else: self.grouping = None self.table_name = table_name self.cache = odict() self.col_labels = np.array(db.GetColumnNames(self.table_name)) self.shown_columns = np.arange(len(self.col_labels)) self.order_by = [self.col_labels[0]] self.order_direction = 'ASC' self.key_indices = None if self.table_name == p.image_table: self.key_indices = [ self.col_labels.tolist().index(v) for v in dbconnect.image_key_columns() ] if self.table_name == p.object_table: self.key_indices = [ self.col_labels.tolist().index(v) for v in dbconnect.object_key_columns() ]
def LoadCSV(self, csvfile, group='Image'): try: self.grid.Destroy() except: pass try: # Remove the previous column show/hide menu (should be the third menu) self.GetMenuBar().Remove(2) self.colmenu.Destroy() except: pass r = csv.reader(open(csvfile)) labels = r.next() dtable = dbconnect.get_data_table_from_csv_reader(r) coltypes = db.InferColTypesFromData(dtable, len(labels)) for i in range(len(coltypes)): if coltypes[i] == 'INT': coltypes[i] = int elif coltypes[i] == 'FLOAT': coltypes[i] = float else: coltypes[i] = str r = csv.reader(open(csvfile)) r.next() # skip col-headers data = [] for row in r: data += [[coltypes[i](v) for i,v in enumerate(row)]] data = np.array(data, dtype=object) if group == DO_NOT_LINK_TO_IMAGES: keycols = [] elif group == 'Image': keycols = range(len(dbconnect.image_key_columns())) else: keycols = range(len(dm.GetGroupColumnNames(group))) self.grid = HugeTableGrid(self, data, labels, key_col_indices=keycols, grouping=group, chMap=p.image_channel_colors) self.Title = '%s (%s)'%(csvfile, group) self.file = csvfile self.CreateColumnMenu() self.RescaleGrid()
def do_normalization(self): if not self.validate(): # Should be unreachable wx.MessageBox('Your normalization settings are invalid. Can\'t perform normalization.') long_cols = [col for col in self.col_choices.GetCheckedStrings() if len(col) + 4 > 64] if long_cols: dlg = wx.MessageDialog(self, 'The following columns contain more ' 'than 64 characters when a normalization suffix (4 ' 'characters) is appended. This may cause a problem when ' 'writing to the database.\n %s'%('\n'.join(long_cols)), 'Warning', wx.OK|wx.CANCEL|wx.ICON_EXCLAMATION) if dlg.ShowModal() == wx.ID_CANCEL: return dlg.Destroy() imkey_cols = dbconnect.image_key_columns() obkey_cols = dbconnect.object_key_columns() wellkey_cols = dbconnect.well_key_columns() im_clause = dbconnect.UniqueImageClause well_clause = dbconnect.UniqueWellClause input_table = self.table_choice.GetStringSelection() meas_cols = self.col_choices.GetCheckedStrings() wants_norm_meas = self.norm_meas_checkbox.IsChecked() wants_norm_factor = self.norm_factor_checkbox.IsChecked() output_table = self.output_table.Value FIRST_MEAS_INDEX = len(imkey_cols + (wellkey_cols or tuple())) if p.db_type == 'mysql': BATCH_SIZE = 100 else: BATCH_SIZE = 1 if input_table == p.object_table: FIRST_MEAS_INDEX += 1 # Original if wellkey_cols: if input_table == p.image_table: WELL_KEY_INDEX = len(imkey_cols) else: WELL_KEY_INDEX = len(imkey_cols) + 1 if db.table_exists(output_table): dlg = wx.MessageDialog(self, 'Are you sure you want to overwrite the table "%s"?'%(output_table), "Overwrite table?", wx.YES_NO|wx.NO_DEFAULT|wx.ICON_EXCLAMATION) if dlg.ShowModal() == wx.ID_NO: dlg.Destroy() return dlg.Destroy() # # First Get the data from the db. # if input_table == p.image_table: if wellkey_cols: # If there are well columns, fetch them. query = "SELECT %s, %s, %s FROM %s"%( im_clause(), well_clause(), ', '.join(meas_cols), input_table) else: query = "SELECT %s, %s FROM %s"%( im_clause(), ', '.join(meas_cols), input_table) elif input_table == p.object_table: if p.image_table and wellkey_cols: # If we have x and y from cells, we can use that for classifier if p.cell_x_loc and p.cell_y_loc: FIRST_MEAS_INDEX += 2 # Cell X and Y Location are fixed to for classifier # If there are well columns, fetch them from the per-image table. query = "SELECT %s, %s, %s, %s, %s FROM %s, %s WHERE %s"%( dbconnect.UniqueObjectClause(p.object_table), well_clause(p.image_table), p.cell_x_loc, p.cell_y_loc, ', '.join(['%s.%s'%(p.object_table, col) for col in meas_cols]), p.image_table, p.object_table, ' AND '.join(['%s.%s=%s.%s'%(p.image_table, c, p.object_table, c) for c in imkey_cols]) ) else: # If there are well columns, fetch them from the per-image table. query = "SELECT %s, %s, %s FROM %s, %s WHERE %s"%( dbconnect.UniqueObjectClause(p.object_table), well_clause(p.image_table), ', '.join(['%s.%s'%(p.object_table, col) for col in meas_cols]), p.image_table, p.object_table, ' AND '.join(['%s.%s=%s.%s'%(p.image_table, c, p.object_table, c) for c in imkey_cols]) ) else: if p.cell_x_loc and p.cell_y_loc: FIRST_MEAS_INDEX += 2 # Cell X and Y Location are fixed to for classifier query = "SELECT %s, %s, %s, %s FROM %s"%( im_clause(), p.cell_x_loc, p.cell_y_loc, ', '.join(meas_cols), input_table) else: query = "SELECT %s, %s FROM %s"%( im_clause(), ', '.join(meas_cols), input_table) if p.negative_control: # if the user defined negative control, we can use that to fetch the wellkeys neg_query = query + ' AND ' + p.negative_control # fetch all the negative control elements if wellkey_cols: query += " ORDER BY %s"%(well_clause(p.image_table)) dlg = wx.ProgressDialog('Computing normalized values', 'Querying database for raw data.', parent=self, style = wx.PD_CAN_ABORT|wx.PD_APP_MODAL) dlg.Pulse() # # MAKE THE QUERY # input_data = np.array(db.execute(query), dtype=object) if p.negative_control: import pandas as pd negative_control = pd.DataFrame(db.execute(neg_query), dtype=float) logging.info("# of objects in negative control: " + str(negative_control.shape[0])) logging.info("# of objects queried: " + str(input_data.shape[0])) neg_mean_plate = negative_control.groupby([WELL_KEY_INDEX]).mean() neg_std_plate = negative_control.groupby([WELL_KEY_INDEX]).std() output_columns = np.ones(input_data[:,FIRST_MEAS_INDEX:].shape) * np.nan output_factors = np.ones(input_data[:,FIRST_MEAS_INDEX:].shape) * np.nan for colnum, col in enumerate(input_data[:,FIRST_MEAS_INDEX:].T): keep_going, skip = dlg.Pulse("Normalizing column %d of %d"%(colnum+1, len(meas_cols))) if not keep_going: dlg.Destroy() return norm_data = col.copy() for step_num, step_panel in enumerate(self.norm_steps): d = step_panel.get_configuration_dict() if d[norm.P_GROUPING] in (norm.G_QUADRANT, norm.G_WELL_NEIGHBORS): # Reshape data if normalization step is plate sensitive. assert p.plate_id and p.well_id well_keys = input_data[:, range(WELL_KEY_INDEX, FIRST_MEAS_INDEX - 2) ] wellkeys_and_vals = np.hstack((well_keys, np.array([norm_data]).T)) new_norm_data = [] for plate, plate_grp in groupby(wellkeys_and_vals, lambda(row): row[0]): keys_and_vals = np.array(list(plate_grp)) plate_data, wks, ind = FormatPlateMapData(keys_and_vals) pnorm_data = norm.do_normalization_step(plate_data, **d) new_norm_data += pnorm_data.flatten()[ind.flatten().tolist()].tolist() norm_data = new_norm_data elif d[norm.P_GROUPING] == norm.G_PLATE: assert p.plate_id and p.well_id if d[norm.P_AGG_TYPE] == norm.M_NEGCTRL: mean_plate_col = neg_mean_plate[colnum + FIRST_MEAS_INDEX] std_plate_col = neg_std_plate[colnum + FIRST_MEAS_INDEX] print mean_plate_col print std_plate_col well_keys = input_data[:, range(WELL_KEY_INDEX, FIRST_MEAS_INDEX - 2)] wellkeys_and_vals = np.hstack((well_keys, np.array([norm_data]).T)) new_norm_data = [] # print wellkeys_and_vals for plate, plate_grp in groupby(wellkeys_and_vals, lambda(row): row[0]): plate_data = np.array(list(plate_grp))[:,-1].flatten() pnorm_data = norm.do_normalization_step(plate_data, **d) if d[norm.P_AGG_TYPE] == norm.M_NEGCTRL: try: plate_mean = mean_plate_col[plate] plate_std = std_plate_col[plate] except: plate_mean = mean_plate_col[int(plate)] plate_std = std_plate_col[int(plate)] try: pnorm_data = (pnorm_data - plate_mean) / plate_std print pnorm_data except: logging.error("Plate std is zero, division by zero!") new_norm_data += pnorm_data.tolist() norm_data = new_norm_data
else: norm_data = norm.do_normalization_step(norm_data, **d) output_columns[:, colnum] = np.array(norm_data) output_factors[:, colnum] = col.astype(float) / np.array(norm_data, dtype=float) dlg.Destroy() return # Abort here for coding norm_table_cols = [] # Write new table db.execute('DROP TABLE IF EXISTS %s' % (output_table)) if input_table == p.image_table: norm_table_cols += dbconnect.image_key_columns() col_defs = ', '.join([ '%s %s' % (col, db.GetColumnTypeString(p.image_table, col)) for col in dbconnect.image_key_columns() ]) elif input_table == p.object_table: norm_table_cols += obkey_cols col_defs = ', '.join([ '%s %s' % (col, db.GetColumnTypeString(p.object_table, col)) for col in obkey_cols ]) if wellkey_cols: norm_table_cols += wellkey_cols col_defs += ', ' + ', '.join([ '%s %s' % (col, db.GetColumnTypeString(p.image_table, col)) for col in wellkey_cols
def prompt_user_to_link_table(parent, table): '''Prompts the user for information about the given table so it may be linked into the tables that CPA already accesses. returns the given table name or None if the user cancels ''' dlg = wx.SingleChoiceDialog(parent, 'What kind of data is in this table (%s)?'%(table), 'Select table type', ['per-well', 'per-image', 'per-object', 'other'], wx.CHOICEDLG_STYLE) show_table_button = wx.Button(dlg, -1, 'Show table') dlg.Sizer.Children[2].GetSizer().Insert(0, show_table_button, 0, wx.ALL, 10) dlg.Sizer.Children[2].GetSizer().InsertStretchSpacer(1, 1) def on_show_table(evt): from tableviewer import TableViewer tableview = TableViewer(get_main_frame_or_none()) tableview.Show() tableview.load_db_table(table) show_table_button.Bind(wx.EVT_BUTTON, on_show_table) if dlg.ShowModal() != wx.ID_OK: dlg.Destroy() return None new_table_type = dlg.GetStringSelection() if new_table_type == 'per-well': link_table_to_try = p.image_table link_cols_to_try = dbconnect.well_key_columns() elif new_table_type == 'per-image': dlg = wx.MessageDialog(parent, 'Does this per-image table represent a ' 'new set of images in your experiment?', 'New per-image table', wx.YES_NO) if dlg.ShowModal() == wx.ID_YES: wx.MessageDialog('Sorry, CPA does not currently support multiple\n' 'per-image tables unless they are referring to the\n' 'same images.\n\n' 'Please see the manual for more information', 'Multiple per-image tables not supported') dlg.Destroy() return None link_table_to_try = p.image_table link_cols_to_try = dbconnect.image_key_columns() elif new_table_type == 'per-object': dlg = wx.MessageDialog(parent, 'Does this per-object table represent a ' 'new set of objects in your experiment?', 'New per-object table', wx.YES_NO) if dlg.ShowModal() == wx.ID_YES: wx.MessageDialog('Sorry, CPA does not currently support multiple\n' 'per-object tables unless they are referring to the\n' 'same objects.\n\n' 'Please see the manual for more information', 'Multiple per-object tables not supported') if p.object_table: if table == p.object_table: raise link_table_to_try = p.object_table link_cols_to_try = dbconnect.object_key_columns() else: # There should never be an object table without another object # table existing first. Connecting this table to the image_table is # asking for trouble. return None else: dlg = wx.SingleChoiceDialog(parent, 'Which of your tables is "%s" linked ' 'to?'%(table), 'Select linking table', db.get_linkable_tables(), wx.CHOICEDLG_STYLE) if dlg.ShowModal() != wx.ID_OK: dlg.Destroy() return None link_table_to_try = dlg.GetStringSelection() link_cols_to_try = [] dlg = LinkTablesDialog(parent, table, link_table_to_try, link_cols_to_try, link_cols_to_try) if dlg.ShowModal() != wx.ID_OK: dlg.Destroy() return None col_pairs = dlg.get_column_pairs() src_cols = [col_pair[0][1] for col_pair in col_pairs] dest_cols = [col_pair[1][1] for col_pair in col_pairs] db.do_link_tables(table, link_table_to_try, src_cols, dest_cols) # return the newly linked table return table
coltypes = db.InferColTypesFromData(dtable, len(labels)) for i in range(len(coltypes)): if coltypes[i] == 'INT': coltypes[i] = int elif coltypes[i] == 'FLOAT': coltypes[i] = float else: coltypes[i] = str r = csv.reader(open(csvfile)) r.next() # skip col-headers data = [] for row in r: data += [[coltypes[i](v) for i,v in enumerate(row)]] data = np.array(data, dtype=object) group = 'Image' if len(sys.argv)==4: group = sys.argv[3] if group == 'Image': keycols = range(len(dbconnect.image_key_columns())) else: keycols = range(len(dm.GetGroupColumnNames(group))) grid = DataGrid(data, labels, grouping=group, key_col_indices=keycols, chMap=p.image_channel_colors, title=csvfile, autosave=False) grid.Show() app.MainLoop()
def do_normalization(self): if not self.validate(): # Should be unreachable wx.MessageBox( 'Your normalization settings are invalid. Can\'t perform normalization.' ) long_cols = [ col for col in self.col_choices.GetCheckedStrings() if len(col) + 4 > 64 ] if long_cols: dlg = wx.MessageDialog( self, 'The following columns contain more ' 'than 64 characters when a normalization suffix (4 ' 'characters) is appended. This may cause a problem when ' 'writing to the database.\n %s' % ('\n'.join(long_cols)), 'Warning', wx.OK | wx.CANCEL | wx.ICON_EXCLAMATION) if dlg.ShowModal() == wx.ID_CANCEL: return dlg.Destroy() imkey_cols = dbconnect.image_key_columns() obkey_cols = dbconnect.object_key_columns() wellkey_cols = dbconnect.well_key_columns() im_clause = dbconnect.UniqueImageClause well_clause = dbconnect.UniqueWellClause input_table = self.table_choice.GetStringSelection() meas_cols = self.col_choices.GetCheckedStrings() wants_norm_meas = self.norm_meas_checkbox.IsChecked() wants_norm_factor = self.norm_factor_checkbox.IsChecked() output_table = self.output_table.Value FIRST_MEAS_INDEX = len(imkey_cols + (wellkey_cols or tuple())) if p.db_type == 'mysql': BATCH_SIZE = 100 else: BATCH_SIZE = 1 if input_table == p.object_table: FIRST_MEAS_INDEX += 1 # Original if wellkey_cols: if input_table == p.image_table: WELL_KEY_INDEX = len(imkey_cols) else: WELL_KEY_INDEX = len(imkey_cols) + 1 if db.table_exists(output_table): dlg = wx.MessageDialog( self, 'Are you sure you want to overwrite the table "%s"?' % (output_table), "Overwrite table?", wx.YES_NO | wx.NO_DEFAULT | wx.ICON_EXCLAMATION) if dlg.ShowModal() == wx.ID_NO: dlg.Destroy() return dlg.Destroy() # # First Get the data from the db. # if input_table == p.image_table: if wellkey_cols: # If there are well columns, fetch them. query = "SELECT %s, %s, %s FROM %s" % (im_clause( ), well_clause(), ', '.join(meas_cols), input_table) else: query = "SELECT %s, %s FROM %s" % ( im_clause(), ', '.join(meas_cols), input_table) elif input_table == p.object_table: if p.image_table and wellkey_cols: # If we have x and y from cells, we can use that for classifier if p.cell_x_loc and p.cell_y_loc: FIRST_MEAS_INDEX += 2 # Cell X and Y Location are fixed to for classifier # If there are well columns, fetch them from the per-image table. query = "SELECT %s, %s, %s, %s, %s FROM %s, %s WHERE %s" % ( dbconnect.UniqueObjectClause( p.object_table), well_clause(p.image_table), p.cell_x_loc, p.cell_y_loc, ', '.join([ '%s.%s' % (p.object_table, col) for col in meas_cols ]), p.image_table, p.object_table, ' AND '.join([ '%s.%s=%s.%s' % (p.image_table, c, p.object_table, c) for c in imkey_cols ])) else: # If there are well columns, fetch them from the per-image table. query = "SELECT %s, %s, %s FROM %s, %s WHERE %s" % ( dbconnect.UniqueObjectClause(p.object_table), well_clause(p.image_table), ', '.join([ '%s.%s' % (p.object_table, col) for col in meas_cols ]), p.image_table, p.object_table, ' AND '.join([ '%s.%s=%s.%s' % (p.image_table, c, p.object_table, c) for c in imkey_cols ])) else: if p.cell_x_loc and p.cell_y_loc: FIRST_MEAS_INDEX += 2 # Cell X and Y Location are fixed to for classifier query = "SELECT %s, %s, %s, %s FROM %s" % ( im_clause(), p.cell_x_loc, p.cell_y_loc, ', '.join(meas_cols), input_table) else: query = "SELECT %s, %s FROM %s" % ( im_clause(), ', '.join(meas_cols), input_table) if p.negative_control: # if the user defined negative control, we can use that to fetch the wellkeys neg_query = query + ' AND ' + p.negative_control # fetch all the negative control elements if wellkey_cols: query += " ORDER BY %s" % (well_clause(p.image_table)) dlg = wx.ProgressDialog('Computing normalized values', 'Querying database for raw data.', parent=self, style=wx.PD_CAN_ABORT | wx.PD_APP_MODAL) dlg.Pulse() # # MAKE THE QUERY # input_data = np.array(db.execute(query), dtype=object) if p.negative_control: import pandas as pd negative_control = pd.DataFrame(db.execute(neg_query), dtype=float) logging.info("# of objects in negative control: " + str(negative_control.shape[0])) logging.info("# of objects queried: " + str(input_data.shape[0])) neg_mean_plate = negative_control.groupby([WELL_KEY_INDEX]).mean() neg_std_plate = negative_control.groupby([WELL_KEY_INDEX]).std() output_columns = np.ones(input_data[:, FIRST_MEAS_INDEX:].shape) * np.nan output_factors = np.ones(input_data[:, FIRST_MEAS_INDEX:].shape) * np.nan for colnum, col in enumerate(input_data[:, FIRST_MEAS_INDEX:].T): keep_going, skip = dlg.Pulse("Normalizing column %d of %d" % (colnum + 1, len(meas_cols))) if not keep_going: dlg.Destroy() return norm_data = col.copy() for step_num, step_panel in enumerate(self.norm_steps): d = step_panel.get_configuration_dict() if d[norm.P_GROUPING] in (norm.G_QUADRANT, norm.G_WELL_NEIGHBORS): # Reshape data if normalization step is plate sensitive. assert p.plate_id and p.well_id well_keys = input_data[:, range(WELL_KEY_INDEX, FIRST_MEAS_INDEX - 2)] wellkeys_and_vals = np.hstack( (well_keys, np.array([norm_data]).T)) new_norm_data = [] for plate, plate_grp in groupby(wellkeys_and_vals, lambda (row): row[0]): keys_and_vals = np.array(list(plate_grp)) plate_data, wks, ind = FormatPlateMapData( keys_and_vals) pnorm_data = norm.do_normalization_step( plate_data, **d) new_norm_data += pnorm_data.flatten()[ ind.flatten().tolist()].tolist() norm_data = new_norm_data elif d[norm.P_GROUPING] == norm.G_PLATE: assert p.plate_id and p.well_id if d[norm.P_AGG_TYPE] == norm.M_NEGCTRL: mean_plate_col = neg_mean_plate[colnum + FIRST_MEAS_INDEX] std_plate_col = neg_std_plate[colnum + FIRST_MEAS_INDEX] print mean_plate_col print std_plate_col well_keys = input_data[:, range(WELL_KEY_INDEX, FIRST_MEAS_INDEX - 2)] wellkeys_and_vals = np.hstack( (well_keys, np.array([norm_data]).T)) new_norm_data = [] # print wellkeys_and_vals for plate, plate_grp in groupby(wellkeys_and_vals, lambda (row): row[0]): plate_data = np.array(list(plate_grp))[:, -1].flatten() pnorm_data = norm.do_normalization_step( plate_data, **d) if d[norm.P_AGG_TYPE] == norm.M_NEGCTRL: try: plate_mean = mean_plate_col[plate] plate_std = std_plate_col[plate] except: plate_mean = mean_plate_col[int(plate)] plate_std = std_plate_col[int(plate)] try: pnorm_data = (pnorm_data - plate_mean) / plate_std print pnorm_data except: logging.error( "Plate std is zero, division by zero!") new_norm_data += pnorm_data.tolist() norm_data = new_norm_data
def do_by_steps(tables, filter_name, area_score=False): filter_clause = '1 = 1' join_clause = '' if filter_name is not None: filter = p._filters[filter_name] if isinstance(filter, cpa.sqltools.OldFilter): join_table = '(%s) as filter' % str(filter) else: if p.object_table in tables: join_table = None else: join_table = p.object_table filter_clause = str(filter) if join_table: join_clause = 'JOIN %s USING (%s)' % (join_table, ','.join( image_key_columns())) wheres = _where_clauses(p, dm, filter_name) num_clauses = len(wheres) counts = {} # iterate over where clauses to go through whole set for idx, where_clause in enumerate(wheres): if filter_clause is not None: where_clause += ' AND ' + filter_clause if area_score: data = db.execute('SELECT %s, %s, %s FROM %s ' '%s WHERE %s' % (UniqueImageClause(p.object_table), ",".join( db.GetColnamesForClassifier()), _objectify(p, p.area_scoring_column), tables, join_clause, where_clause), silent=(idx > 10)) area_score = data[-1] #separate area from data data = data[:-1] else: data = db.execute('SELECT %s, %s FROM %s ' '%s WHERE %s' % (UniqueObjectClause(p.object_table), ",".join(db.GetColnamesForClassifier()), tables, join_clause, where_clause), silent=(idx > 10)) cell_data, image_keys = processData(data) for i in range(cell_data.shape[0]): for j in range(cell_data.shape[1]): try: float(cell_data[i, j]) except: print(i, j, cell_data[i, j], type(cell_data[i, j])) predicted_classes = classifier.Predict(cell_data) for i in range(0, len(predicted_classes)): row_cls = tuple( np.append(image_keys[i][0], predicted_classes[i])) oneCount = np.array([1]) if area_score: oneCount = np.append(oneCount, area_score[i]) if row_cls in counts: counts[row_cls] += oneCount else: counts[row_cls] = oneCount if cb: cb(min(1, idx / float(num_clauses))) #progress return counts
def prompt_user_to_link_table(parent, table): '''Prompts the user for information about the given table so it may be linked into the tables that CPA already accesses. returns the given table name or None if the user cancels ''' dlg = wx.SingleChoiceDialog(parent, 'What kind of data is in this table (%s)?'%(table), 'Select table type', ['per-well', 'per-image', 'per-object', 'other'], wx.CHOICEDLG_STYLE) show_table_button = wx.Button(dlg, -1, 'Show table') dlg.Sizer.Children[2].GetSizer().Insert(0, show_table_button, 0, wx.ALL, 10) dlg.Sizer.Children[2].GetSizer().InsertStretchSpacer(1, 1) def on_show_table(evt): from tableviewer import TableViewer tableview = TableViewer(get_main_frame_or_none()) tableview.Show() tableview.load_db_table(table) show_table_button.Bind(wx.EVT_BUTTON, on_show_table) if dlg.ShowModal() != wx.ID_OK: dlg.Destroy() return None new_table_type = dlg.GetStringSelection() if new_table_type == 'per-well': link_table_to_try = p.image_table link_cols_to_try = dbconnect.well_key_columns() elif new_table_type == 'per-image': dlg = wx.MessageDialog(parent, 'Does this per-image table represent a ' 'new set of images in your experiment?', 'New per-image table', wx.YES_NO) if dlg.ShowModal() == wx.ID_YES: wx.MessageDialog(parent,'Sorry, CPA does not currently support multiple\n' 'per-image tables unless they are referring to the\n' 'same images.\n\n' 'Please see the manual for more information', 'Multiple per-image tables not supported') dlg.Destroy() return None link_table_to_try = p.image_table link_cols_to_try = dbconnect.image_key_columns() elif new_table_type == 'per-object': dlg = wx.MessageDialog(parent, 'Does this per-object table represent a ' 'new set of objects in your experiment?', 'New per-object table', wx.YES_NO) if dlg.ShowModal() == wx.ID_YES: wx.MessageDialog(parent,'Sorry, CPA does not currently support multiple\n' 'per-object tables unless they are referring to the\n' 'same objects.\n\n' 'Please see the manual for more information', 'Multiple per-object tables not supported') if p.object_table: if table == p.object_table: raise link_table_to_try = p.object_table link_cols_to_try = dbconnect.object_key_columns() else: # There should never be an object table without another object # table existing first. Connecting this table to the image_table is # asking for trouble. return None else: dlg = wx.SingleChoiceDialog(parent, 'Which of your tables is "%s" linked ' 'to?'%(table), 'Select linking table', db.get_linkable_tables(), wx.CHOICEDLG_STYLE) if dlg.ShowModal() != wx.ID_OK: dlg.Destroy() return None link_table_to_try = dlg.GetStringSelection() link_cols_to_try = [] dlg = LinkTablesDialog(parent, table, link_table_to_try, link_cols_to_try, link_cols_to_try) if dlg.ShowModal() != wx.ID_OK: dlg.Destroy() return None col_pairs = dlg.get_column_pairs() src_cols = [col_pair[0][1] for col_pair in col_pairs] dest_cols = [col_pair[1][1] for col_pair in col_pairs] db.do_link_tables(table, link_table_to_try, src_cols, dest_cols) # return the newly linked table return table
def do_normalization(self): if not self.validate(): # Should be unreachable wx.MessageBox('Your normalization settings are invalid. Can\'t perform normalization.') long_cols = [col for col in self.col_choices.GetCheckedStrings() if len(col) + 4 > 64] if long_cols: dlg = wx.MessageDialog(self, 'The following columns contain more ' 'than 64 characters when a normalization suffix (4 ' 'characters) is appended. This may cause a problem when ' 'writing to the database.\n %s'%('\n'.join(long_cols)), 'Warning', wx.OK|wx.CANCEL|wx.ICON_EXCLAMATION) if dlg.ShowModal() == wx.ID_CANCEL: return dlg.Destroy() imkey_cols = dbconnect.image_key_columns() obkey_cols = dbconnect.object_key_columns() wellkey_cols = dbconnect.well_key_columns() im_clause = dbconnect.UniqueImageClause well_clause = dbconnect.UniqueWellClause input_table = self.table_choice.GetStringSelection() meas_cols = self.col_choices.GetCheckedStrings() wants_norm_meas = self.norm_meas_checkbox.IsChecked() wants_norm_factor = self.norm_factor_checkbox.IsChecked() output_table = self.output_table.Value FIRST_MEAS_INDEX = len(imkey_cols + (wellkey_cols or tuple())) if p.db_type == 'mysql': BATCH_SIZE = 100 else: BATCH_SIZE = 1 if input_table == p.object_table: FIRST_MEAS_INDEX += 1 # Original if wellkey_cols: if input_table == p.image_table: WELL_KEY_INDEX = len(imkey_cols) else: WELL_KEY_INDEX = len(imkey_cols) + 1 if db.table_exists(output_table): dlg = wx.MessageDialog(self, 'Are you sure you want to overwrite the table "%s"?'%(output_table), "Overwrite table?", wx.YES_NO|wx.NO_DEFAULT|wx.ICON_EXCLAMATION) if dlg.ShowModal() == wx.ID_NO: dlg.Destroy() return dlg.Destroy() # # First Get the data from the db. # if input_table == p.image_table: if wellkey_cols: # If there are well columns, fetch them. query = "SELECT %s, %s, %s FROM %s"%( im_clause(), well_clause(), ', '.join(meas_cols), input_table) else: query = "SELECT %s, %s FROM %s"%( im_clause(), ', '.join(meas_cols), input_table) elif input_table == p.object_table: if p.image_table and wellkey_cols: # If we have x and y from cells, we can use that for classifier if p.cell_x_loc and p.cell_y_loc: FIRST_MEAS_INDEX += 2 # Cell X and Y Location are fixed to for classifier # If there are well columns, fetch them from the per-image table. query = "SELECT %s, %s, %s, %s, %s FROM %s, %s WHERE %s"%( dbconnect.UniqueObjectClause(p.object_table), well_clause(p.image_table), p.cell_x_loc, p.cell_y_loc, ', '.join(['%s.%s'%(p.object_table, col) for col in meas_cols]), p.image_table, p.object_table, ' AND '.join(['%s.%s=%s.%s'%(p.image_table, c, p.object_table, c) for c in imkey_cols]) ) else: # If there are well columns, fetch them from the per-image table. query = "SELECT %s, %s, %s FROM %s, %s WHERE %s"%( dbconnect.UniqueObjectClause(p.object_table), well_clause(p.image_table), ', '.join(['%s.%s'%(p.object_table, col) for col in meas_cols]), p.image_table, p.object_table, ' AND '.join(['%s.%s=%s.%s'%(p.image_table, c, p.object_table, c) for c in imkey_cols]) ) else: if p.cell_x_loc and p.cell_y_loc: FIRST_MEAS_INDEX += 2 # Cell X and Y Location are fixed to for classifier query = "SELECT %s, %s, %s, %s FROM %s"%( im_clause(), p.cell_x_loc, p.cell_y_loc, ', '.join(meas_cols), input_table) else: query = "SELECT %s, %s FROM %s"%( im_clause(), ', '.join(meas_cols), input_table) if p.negative_control: # if the user defined negative control, we can use that to fetch the wellkeys neg_query = query + ' AND ' + p.negative_control # fetch all the negative control elements if wellkey_cols: query += " ORDER BY %s"%(well_clause(p.image_table)) dlg = wx.ProgressDialog('Computing normalized values', 'Querying database for raw data.', parent=self, style = wx.PD_CAN_ABORT|wx.PD_APP_MODAL) dlg.Pulse() # # MAKE THE QUERY # input_data = np.array(db.execute(query), dtype=object) if p.negative_control: import pandas as pd negative_control = pd.DataFrame(db.execute(neg_query), dtype=float) logging.info("# of objects in negative control: " + str(negative_control.shape[0])) logging.info("# of objects queried: " + str(input_data.shape[0])) neg_mean_plate = negative_control.groupby([WELL_KEY_INDEX]).mean() neg_std_plate = negative_control.groupby([WELL_KEY_INDEX]).std() output_columns = np.ones(input_data[:,FIRST_MEAS_INDEX:].shape) * np.nan output_factors = np.ones(input_data[:,FIRST_MEAS_INDEX:].shape) * np.nan for colnum, col in enumerate(input_data[:,FIRST_MEAS_INDEX:].T): keep_going, skip = dlg.Pulse("Normalizing column %d of %d"%(colnum+1, len(meas_cols))) if not keep_going: dlg.Destroy() return norm_data = col.copy() for step_num, step_panel in enumerate(self.norm_steps): d = step_panel.get_configuration_dict() if d[norm.P_GROUPING] in (norm.G_QUADRANT, norm.G_WELL_NEIGHBORS): # Reshape data if normalization step is plate sensitive. assert p.plate_id and p.well_id well_keys = input_data[:, range(WELL_KEY_INDEX, FIRST_MEAS_INDEX - 2) ] wellkeys_and_vals = np.hstack((well_keys, np.array([norm_data]).T)) new_norm_data = [] for plate, plate_grp in groupby(wellkeys_and_vals, lambda row: row[0]): keys_and_vals = np.array(list(plate_grp)) plate_data, wks, ind = FormatPlateMapData(keys_and_vals) pnorm_data = norm.do_normalization_step(plate_data, **d) new_norm_data += pnorm_data.flatten()[ind.flatten().tolist()].tolist() norm_data = new_norm_data elif d[norm.P_GROUPING] == norm.G_PLATE: assert p.plate_id and p.well_id if d[norm.P_AGG_TYPE] == norm.M_NEGCTRL: mean_plate_col = neg_mean_plate[colnum + FIRST_MEAS_INDEX] std_plate_col = neg_std_plate[colnum + FIRST_MEAS_INDEX] print(mean_plate_col) print(std_plate_col) well_keys = input_data[:, range(WELL_KEY_INDEX, FIRST_MEAS_INDEX - 2)] wellkeys_and_vals = np.hstack((well_keys, np.array([norm_data]).T)) new_norm_data = [] # print wellkeys_and_vals for plate, plate_grp in groupby(wellkeys_and_vals, lambda row: row[0]): plate_data = np.array(list(plate_grp))[:,-1].flatten() pnorm_data = norm.do_normalization_step(plate_data, **d) if d[norm.P_AGG_TYPE] == norm.M_NEGCTRL: try: plate_mean = mean_plate_col[plate] plate_std = std_plate_col[plate] except: plate_mean = mean_plate_col[int(plate)] plate_std = std_plate_col[int(plate)] try: pnorm_data = (pnorm_data - plate_mean) / plate_std print(pnorm_data) except: logging.error("Plate std is zero, division by zero!") new_norm_data += pnorm_data.tolist() norm_data = new_norm_data else: norm_data = norm.do_normalization_step(norm_data, **d) output_columns[:,colnum] = np.array(norm_data) output_factors[:,colnum] = col.astype(float) / np.array(norm_data,dtype=float) dlg.Destroy() return # Abort here for coding norm_table_cols = [] # Write new table db.execute('DROP TABLE IF EXISTS %s'%(output_table)) if input_table == p.image_table: norm_table_cols += dbconnect.image_key_columns() col_defs = ', '.join(['%s %s'%(col, db.GetColumnTypeString(p.image_table, col)) for col in dbconnect.image_key_columns()]) elif input_table == p.object_table: norm_table_cols += obkey_cols col_defs = ', '.join(['%s %s'%(col, db.GetColumnTypeString(p.object_table, col)) for col in obkey_cols]) if wellkey_cols: norm_table_cols += wellkey_cols col_defs += ', '+ ', '.join(['%s %s'%(col, db.GetColumnTypeString(p.image_table, col)) for col in wellkey_cols]) if input_table == p.object_table: if p.cell_x_loc and p.cell_y_loc: norm_table_cols += [p.cell_x_loc, p.cell_y_loc] col_defs += ', %s %s'%(p.cell_x_loc, db.GetColumnTypeString(p.object_table, p.cell_x_loc)) + ', ' + '%s %s'%(p.cell_y_loc, db.GetColumnTypeString(p.object_table, p.cell_y_loc)) if wants_norm_meas: col_defs += ', '+ ', '.join(['%s_NmM %s'%(col, db.GetColumnTypeString(input_table, col)) for col in meas_cols]) if wants_norm_factor: col_defs += ', '+ ', '.join(['%s_NmF %s'%(col, db.GetColumnTypeString(input_table, col)) for col in meas_cols]) for col in meas_cols: if wants_norm_meas: norm_table_cols += ['%s_NmM'%(col)] if wants_norm_factor: norm_table_cols += ['%s_NmF'%(col)] db.execute('CREATE TABLE %s (%s)'%(output_table, col_defs)) dlg = wx.ProgressDialog('Writing to "%s"'%(output_table), "Writing normalized values to database", maximum = output_columns.shape[0], parent=self, style = wx.PD_CAN_ABORT|wx.PD_APP_MODAL|wx.PD_ELAPSED_TIME|wx.PD_ESTIMATED_TIME|wx.PD_REMAINING_TIME) cmd = 'INSERT INTO %s VALUES '%(output_table) cmdi = cmd for i, (val, factor) in enumerate(zip(output_columns, output_factors)): cmdi += '(' + ','.join(['"%s"']*len(norm_table_cols)) + ')' if wants_norm_meas and wants_norm_factor: cmdi = cmdi%tuple(list(input_data[i, :FIRST_MEAS_INDEX]) + ['NULL' if (np.isnan(x) or np.isinf(x)) else x for x in val] + ['NULL' if (np.isnan(x) or np.isinf(x)) else x for x in factor]) elif wants_norm_meas: cmdi = cmdi%tuple(list(input_data[i, :FIRST_MEAS_INDEX]) + ['NULL' if (np.isnan(x) or np.isinf(x)) else x for x in val]) elif wants_norm_factor: cmdi = cmdi%tuple(list(input_data[i, :FIRST_MEAS_INDEX]) + ['NULL' if (np.isnan(x) or np.isinf(x)) else x for x in factor]) if (i+1) % BATCH_SIZE == 0 or i==len(output_columns)-1: db.execute(str(cmdi)) cmdi = cmd # update status dialog (keep_going, skip) = dlg.Update(i) if not keep_going: break else: cmdi += ',\n' dlg.Destroy() db.Commit() # # Update table linkage # if db.get_linking_tables(input_table, output_table) is not None: db.do_unlink_table(output_table) if input_table == p.image_table: db.do_link_tables(output_table, input_table, imkey_cols, imkey_cols) elif input_table == p.object_table: db.do_link_tables(output_table, input_table, obkey_cols, obkey_cols) # # Show the resultant table # import tableviewer tv = tableviewer.TableViewer(ui.get_main_frame_or_none()) tv.Show() tv.load_db_table(output_table)
def do_normalization(self): if not self.validate(): # Should be unreachable wx.MessageBox( 'Your normalization settings are invalid. Can\'t perform normalization.' ) long_cols = [ col for col in self.col_choices.GetCheckedStrings() if len(col) + 4 > 64 ] if long_cols: dlg = wx.MessageDialog( self, 'The following columns contain more ' 'than 64 characters when a normalization suffix (4 ' 'characters) is appended. This may cause a problem when ' 'writing to the database.\n %s' % ('\n'.join(long_cols)), 'Warning', wx.OK | wx.CANCEL | wx.ICON_EXCLAMATION) if dlg.ShowModal() == wx.ID_CANCEL: return dlg.Destroy() imkey_cols = dbconnect.image_key_columns() obkey_cols = dbconnect.object_key_columns() wellkey_cols = dbconnect.well_key_columns() im_clause = dbconnect.UniqueImageClause well_clause = dbconnect.UniqueWellClause input_table = self.table_choice.GetStringSelection() meas_cols = self.col_choices.GetCheckedStrings() wants_norm_meas = self.norm_meas_checkbox.IsChecked() wants_norm_factor = self.norm_factor_checkbox.IsChecked() output_table = self.output_table.Value FIRST_MEAS_INDEX = len(imkey_cols + (wellkey_cols or tuple())) if p.db_type == 'mysql': BATCH_SIZE = 100 else: BATCH_SIZE = 1 if input_table == p.object_table: FIRST_MEAS_INDEX += 1 # Original if wellkey_cols: if input_table == p.image_table: WELL_KEY_INDEX = len(imkey_cols) else: WELL_KEY_INDEX = len(imkey_cols) + 1 if db.table_exists(output_table): dlg = wx.MessageDialog( self, 'Are you sure you want to overwrite the table "%s"?' % (output_table), "Overwrite table?", wx.YES_NO | wx.NO_DEFAULT | wx.ICON_EXCLAMATION) if dlg.ShowModal() == wx.ID_NO: dlg.Destroy() return dlg.Destroy() # # First Get the data from the db. # if input_table == p.image_table: if wellkey_cols: # If there are well columns, fetch them. query = "SELECT %s, %s, %s FROM %s" % (im_clause( ), well_clause(), ', '.join(meas_cols), input_table) else: query = "SELECT %s, %s FROM %s" % ( im_clause(), ', '.join(meas_cols), input_table) elif input_table == p.object_table: if p.image_table and wellkey_cols: # If we have x and y from cells, we can use that for classifier if p.cell_x_loc and p.cell_y_loc: FIRST_MEAS_INDEX += 2 # Cell X and Y Location are fixed to for classifier # If there are well columns, fetch them from the per-image table. query = "SELECT %s, %s, %s, %s, %s FROM %s, %s WHERE %s" % ( dbconnect.UniqueObjectClause( p.object_table), well_clause(p.image_table), p.cell_x_loc, p.cell_y_loc, ', '.join([ '%s.%s' % (p.object_table, col) for col in meas_cols ]), p.image_table, p.object_table, ' AND '.join([ '%s.%s=%s.%s' % (p.image_table, c, p.object_table, c) for c in imkey_cols ])) else: # If there are well columns, fetch them from the per-image table. query = "SELECT %s, %s, %s FROM %s, %s WHERE %s" % ( dbconnect.UniqueObjectClause(p.object_table), well_clause(p.image_table), ', '.join([ '%s.%s' % (p.object_table, col) for col in meas_cols ]), p.image_table, p.object_table, ' AND '.join([ '%s.%s=%s.%s' % (p.image_table, c, p.object_table, c) for c in imkey_cols ])) else: if p.cell_x_loc and p.cell_y_loc: FIRST_MEAS_INDEX += 2 # Cell X and Y Location are fixed to for classifier query = "SELECT %s, %s, %s, %s FROM %s" % ( im_clause(), p.cell_x_loc, p.cell_y_loc, ', '.join(meas_cols), input_table) else: query = "SELECT %s, %s FROM %s" % ( im_clause(), ', '.join(meas_cols), input_table) if p.negative_control: # if the user defined negative control, we can use that to fetch the wellkeys neg_query = query + ' AND ' + p.negative_control # fetch all the negative control elements if wellkey_cols: query += " ORDER BY %s" % (well_clause(p.image_table)) dlg = wx.ProgressDialog('Computing normalized values', 'Querying database for raw data.', parent=self, style=wx.PD_CAN_ABORT | wx.PD_APP_MODAL) dlg.Pulse() # # MAKE THE QUERY # input_data = np.array(db.execute(query), dtype=object) if p.negative_control: import pandas as pd negative_control = pd.DataFrame(db.execute(neg_query), dtype=float) logging.info("# of objects in negative control: " + str(negative_control.shape[0])) logging.info("# of objects queried: " + str(input_data.shape[0])) neg_mean_plate = negative_control.groupby([WELL_KEY_INDEX]).mean() neg_std_plate = negative_control.groupby([WELL_KEY_INDEX]).std() output_columns = np.ones(input_data[:, FIRST_MEAS_INDEX:].shape) * np.nan output_factors = np.ones(input_data[:, FIRST_MEAS_INDEX:].shape) * np.nan for colnum, col in enumerate(input_data[:, FIRST_MEAS_INDEX:].T): keep_going, skip = dlg.Pulse("Normalizing column %d of %d" % (colnum + 1, len(meas_cols))) if not keep_going: dlg.Destroy() return norm_data = col.copy() for step_num, step_panel in enumerate(self.norm_steps): d = step_panel.get_configuration_dict() if d[norm.P_GROUPING] in (norm.G_QUADRANT, norm.G_WELL_NEIGHBORS): # Reshape data if normalization step is plate sensitive. assert p.plate_id and p.well_id well_keys = input_data[:, range(WELL_KEY_INDEX, FIRST_MEAS_INDEX - 2)] wellkeys_and_vals = np.hstack( (well_keys, np.array([norm_data]).T)) new_norm_data = [] for plate, plate_grp in groupby(wellkeys_and_vals, lambda row: row[0]): keys_and_vals = np.array(list(plate_grp)) plate_data, wks, ind = FormatPlateMapData( keys_and_vals) pnorm_data = norm.do_normalization_step( plate_data, **d) new_norm_data += pnorm_data.flatten()[ ind.flatten().tolist()].tolist() norm_data = new_norm_data elif d[norm.P_GROUPING] == norm.G_PLATE: assert p.plate_id and p.well_id if d[norm.P_AGG_TYPE] == norm.M_NEGCTRL: mean_plate_col = neg_mean_plate[colnum + FIRST_MEAS_INDEX] std_plate_col = neg_std_plate[colnum + FIRST_MEAS_INDEX] print(mean_plate_col) print(std_plate_col) well_keys = input_data[:, range(WELL_KEY_INDEX, FIRST_MEAS_INDEX - 2)] wellkeys_and_vals = np.hstack( (well_keys, np.array([norm_data]).T)) new_norm_data = [] # print wellkeys_and_vals for plate, plate_grp in groupby(wellkeys_and_vals, lambda row: row[0]): plate_data = np.array(list(plate_grp))[:, -1].flatten() pnorm_data = norm.do_normalization_step( plate_data, **d) if d[norm.P_AGG_TYPE] == norm.M_NEGCTRL: try: plate_mean = mean_plate_col[plate] plate_std = std_plate_col[plate] except: plate_mean = mean_plate_col[int(plate)] plate_std = std_plate_col[int(plate)] try: pnorm_data = (pnorm_data - plate_mean) / plate_std print(pnorm_data) except: logging.error( "Plate std is zero, division by zero!") new_norm_data += pnorm_data.tolist() norm_data = new_norm_data else: norm_data = norm.do_normalization_step(norm_data, **d) output_columns[:, colnum] = np.array(norm_data) output_factors[:, colnum] = col.astype(float) / np.array(norm_data, dtype=float) dlg.Destroy() return # Abort here for coding norm_table_cols = [] # Write new table db.execute('DROP TABLE IF EXISTS %s' % (output_table)) if input_table == p.image_table: norm_table_cols += dbconnect.image_key_columns() col_defs = ', '.join([ '%s %s' % (col, db.GetColumnTypeString(p.image_table, col)) for col in dbconnect.image_key_columns() ]) elif input_table == p.object_table: norm_table_cols += obkey_cols col_defs = ', '.join([ '%s %s' % (col, db.GetColumnTypeString(p.object_table, col)) for col in obkey_cols ]) if wellkey_cols: norm_table_cols += wellkey_cols col_defs += ', ' + ', '.join([ '%s %s' % (col, db.GetColumnTypeString(p.image_table, col)) for col in wellkey_cols ]) if input_table == p.object_table: if p.cell_x_loc and p.cell_y_loc: norm_table_cols += [p.cell_x_loc, p.cell_y_loc] col_defs += ', %s %s' % ( p.cell_x_loc, db.GetColumnTypeString(p.object_table, p.cell_x_loc) ) + ', ' + '%s %s' % (p.cell_y_loc, db.GetColumnTypeString( p.object_table, p.cell_y_loc)) if wants_norm_meas: col_defs += ', ' + ', '.join([ '%s_NmM %s' % (col, db.GetColumnTypeString(input_table, col)) for col in meas_cols ]) if wants_norm_factor: col_defs += ', ' + ', '.join([ '%s_NmF %s' % (col, db.GetColumnTypeString(input_table, col)) for col in meas_cols ]) for col in meas_cols: if wants_norm_meas: norm_table_cols += ['%s_NmM' % (col)] if wants_norm_factor: norm_table_cols += ['%s_NmF' % (col)] db.execute('CREATE TABLE %s (%s)' % (output_table, col_defs)) dlg = wx.ProgressDialog('Writing to "%s"' % (output_table), "Writing normalized values to database", maximum=output_columns.shape[0], parent=self, style=wx.PD_CAN_ABORT | wx.PD_APP_MODAL | wx.PD_ELAPSED_TIME | wx.PD_ESTIMATED_TIME | wx.PD_REMAINING_TIME) cmd = 'INSERT INTO %s VALUES ' % (output_table) cmdi = cmd for i, (val, factor) in enumerate(zip(output_columns, output_factors)): cmdi += '(' + ','.join(['"%s"'] * len(norm_table_cols)) + ')' if wants_norm_meas and wants_norm_factor: cmdi = cmdi % tuple( list(input_data[i, :FIRST_MEAS_INDEX]) + [ 'NULL' if (np.isnan(x) or np.isinf(x)) else x for x in val ] + [ 'NULL' if (np.isnan(x) or np.isinf(x)) else x for x in factor ]) elif wants_norm_meas: cmdi = cmdi % tuple( list(input_data[i, :FIRST_MEAS_INDEX]) + [ 'NULL' if (np.isnan(x) or np.isinf(x)) else x for x in val ]) elif wants_norm_factor: cmdi = cmdi % tuple( list(input_data[i, :FIRST_MEAS_INDEX]) + [ 'NULL' if (np.isnan(x) or np.isinf(x)) else x for x in factor ]) if (i + 1) % BATCH_SIZE == 0 or i == len(output_columns) - 1: db.execute(str(cmdi)) cmdi = cmd # update status dialog (keep_going, skip) = dlg.Update(i) if not keep_going: break else: cmdi += ',\n' dlg.Destroy() db.Commit() # # Update table linkage # if db.get_linking_tables(input_table, output_table) is not None: db.do_unlink_table(output_table) if input_table == p.image_table: db.do_link_tables(output_table, input_table, imkey_cols, imkey_cols) elif input_table == p.object_table: db.do_link_tables(output_table, input_table, obkey_cols, obkey_cols) # # Show the resultant table # import tableviewer tv = tableviewer.TableViewer(ui.get_main_frame_or_none()) tv.Show() tv.load_db_table(output_table)
new_norm_data += pnorm_data.tolist() norm_data = new_norm_data else: norm_data = norm.do_normalization_step(norm_data, **d) output_columns[:,colnum] = np.array(norm_data) output_factors[:,colnum] = col.astype(float) / np.array(norm_data,dtype=float) dlg.Destroy() return # Abort here for coding norm_table_cols = [] # Write new table db.execute('DROP TABLE IF EXISTS %s'%(output_table)) if input_table == p.image_table: norm_table_cols += dbconnect.image_key_columns() col_defs = ', '.join(['%s %s'%(col, db.GetColumnTypeString(p.image_table, col)) for col in dbconnect.image_key_columns()]) elif input_table == p.object_table: norm_table_cols += obkey_cols col_defs = ', '.join(['%s %s'%(col, db.GetColumnTypeString(p.object_table, col)) for col in obkey_cols]) if wellkey_cols: norm_table_cols += wellkey_cols col_defs += ', '+ ', '.join(['%s %s'%(col, db.GetColumnTypeString(p.image_table, col)) for col in wellkey_cols]) if input_table == p.object_table: if p.cell_x_loc and p.cell_y_loc: norm_table_cols += [p.cell_x_loc, p.cell_y_loc] col_defs += ', %s %s'%(p.cell_x_loc, db.GetColumnTypeString(p.object_table, p.cell_x_loc)) + ', ' + '%s %s'%(p.cell_y_loc, db.GetColumnTypeString(p.object_table, p.cell_y_loc))