def show_loaddata_table(gate_names, as_columns=True):
    '''Utility function to create a table that can be read by CP LoadData.
    gate_names -- list of gate names to apply
    as_columns -- use True to output each gate as a column with 0's and 1's
                  use False to output only the rows that fall within all gates.
    '''
    for g in gate_names:
        for t in p.gates[g].get_tables():
            assert t == p.image_table, 'this function only takes per-image gates'
    columns = list(dbconnect.image_key_columns() + dbconnect.well_key_columns()) + p.image_file_cols + p.image_path_cols
    if as_columns:
        query_columns = columns + ['(%s) AS %s'%(str(p.gates[g]), g) for g in gate_names]
        columns += gate_names
        data = db.execute('SELECT %s FROM %s'
                          %(','.join(query_columns), p.image_table))
    else:
        # display only values within the given gates
        where_clause = ' AND '.join([str(p.gates[g]) for g in gate_names])
        data = db.execute('SELECT %s FROM %s WHERE %s'
                          %(','.join(columns), p.image_table, where_clause))
    if data == []:
        wx.MessageBox('Sorry, no data points fall within the combined selected gates.', 'No data to show')
        return None
    grid = TableViewer(None, title="Gated Data")
    grid.table_from_array(np.array(data, dtype='object'), columns, grouping='image', 
                          key_indices=range(len(dbconnect.image_key_columns())))
    grid.Show()
    return grid
Exemplo n.º 2
0
def show_loaddata_table(gate_names, as_columns=True):
    '''Utility function to create a table that can be read by CP LoadData.
    gate_names -- list of gate names to apply
    as_columns -- use True to output each gate as a column with 0's and 1's
                  use False to output only the rows that fall within all gates.
    '''
    for g in gate_names:
        for t in p.gates[g].get_tables():
            assert t == p.image_table, 'this function only takes per-image gates'
    columns = list(
        dbconnect.image_key_columns() +
        dbconnect.well_key_columns()) + p.image_file_cols + p.image_path_cols
    if as_columns:
        query_columns = columns + [
            '(%s) AS %s' % (str(p.gates[g]), g) for g in gate_names
        ]
        columns += gate_names
        data = db.execute('SELECT %s FROM %s' %
                          (','.join(query_columns), p.image_table))
    else:
        # display only values within the given gates
        where_clause = ' AND '.join([str(p.gates[g]) for g in gate_names])
        data = db.execute('SELECT %s FROM %s WHERE %s' %
                          (','.join(columns), p.image_table, where_clause))
    if data == []:
        wx.MessageBox(
            'Sorry, no data points fall within the combined selected gates.',
            'No data to show')
        return None
    grid = TableViewer(None, title="Gated Data")
    grid.table_from_array(np.array(data, dtype='object'),
                          columns,
                          grouping='image',
                          key_indices=range(len(
                              dbconnect.image_key_columns())))
    grid.Show()
    return grid
Exemplo n.º 3
0
def FormatPlateMapData(keys_and_vals, categorical=False):
    '''
    keys_and_vals -- a list of lists of well-keys and values
                     eg: [['p1', 'A01', 0.2], 
                          ['p1', 'A02', 0.9], ...]
    returns a 2-tuple containing:
       -an array in the shape of the plate containing the given values with 
        NaNs filling empty slots. If multiple sites per-well are given, then
        the array will be shaped (rows, cols, sites)
       -an array in the shape of the plate containing the given keys with 
        (UnknownPlate, UnknownWell) filling empty slots
    '''
    from itertools import groupby
    keys_and_vals = np.array(keys_and_vals)
    nkeycols = len(dbconnect.well_key_columns())
    shape = list(p.plate_shape)
    if p.plate_type == '5600': 
        well_keys = keys_and_vals[:,:-1] # first column(s) are keys
        data = keys_and_vals[:,-1]       # last column is data
        assert data.ndim == 1
        if len(data) < 5600: raise Exception(
            '''The measurement you chose to plot was missing for some spots. 
            Because CPA doesn't know the well labelling convention used by this
            microarray, we can't be sure how to plot the data. If you are 
            plotting an object measurement, you may have some spots with 0 
            objects and therefore no entry in the table.''')
        assert len(data) == 5600
        data = np.array(list(meander(data.reshape(shape)))).reshape(shape)
        sort_indices = np.array(list(meander(np.arange(np.prod(shape)).reshape(shape)))).reshape(shape)
        well_keys = np.array(list(meander(well_keys.reshape(shape + [nkeycols] )))).reshape(shape + [nkeycols])
        return data, well_keys, sort_indices

    # compute the number of sites-per-well as the max number of rows with the same well-key
    nsites = max([len(list(grp))
                  for k, grp in groupby(keys_and_vals, 
                                        lambda row: tuple(row[:nkeycols]))
                  ])
    if nsites > 1:
        # add a sites dimension to the array shape if there's >1 site per well
        shape += [nsites]
    data = np.ones(shape) * np.nan
    if categorical:
        data = data.astype('object')
    if p.plate_id:
        dummy_key = ('UnknownPlate', 'UnknownWell')
    else:
        dummy_key = ('UnknownWell',)
    well_keys = np.array([dummy_key] * np.prod(shape), 
                         dtype=object).reshape(shape + [nkeycols])
    sort_indices = np.ones(data.shape)*np.nan
    
    dm = DataModel.getInstance()
    ind = keys_and_vals.argsort(axis=0)
    for i, (k, well_grp) in enumerate(groupby(keys_and_vals[ind[:,len(dummy_key)-1],:], 
                                              lambda row: tuple(row[:len(dummy_key)]))):
        (row, col) = dm.get_well_position_from_name(k[-1])
        well_data = np.array(list(well_grp))[:,-1]
        if len(well_data) == 1:
            data[row, col] = well_data[0]
            sort_indices[row,col] = ind[:,len(dummy_key)-1][i]
        else:
            data[row, col] = well_data
            sort_indices[row,col] = ind[:,len(dummy_key)-1][i*nsites + np.array(range(nsites))] 
        well_keys[row, col] = k
        
    return data, well_keys, sort_indices
Exemplo n.º 4
0
    def UpdatePlateMaps(self):
        self.measurement = self.measurementsChoice.Value
        measurement = self.measurement
        table       = self.sourceChoice.Value
        self.aggMethod   = self.aggregationMethodsChoice.Value
        categorical = measurement not in get_numeric_columns_from_table(table)
        fltr        = self.filterChoice.Value
        self.colorBar.ClearNotifyWindows()

        q = sql.QueryBuilder()
        well_key_cols = [sql.Column(p.image_table, col) for col in well_key_columns()]
        select = list(well_key_cols)
        if not categorical:
            if self.aggMethod=='mean':
                select += [sql.Column(table, measurement, 'AVG')]
            elif self.aggMethod=='stdev':
                select += [sql.Column(table, measurement, 'STDDEV')]
            elif self.aggMethod=='cv%':
                # stddev(col) / avg(col) * 100
                select += [sql.Expression(
                              sql.Column(table, measurement, 'STDDEV'), ' / ',
                              sql.Column(table, measurement, 'AVG'), ' * 100')]
            elif self.aggMethod=='sum':
                select += [sql.Column(table, measurement, 'SUM')]
            elif self.aggMethod=='min':
                select += [sql.Column(table, measurement, 'MIN')]
            elif self.aggMethod=='max':
                select += [sql.Column(table, measurement, 'MAX')]
            elif self.aggMethod=='median':
                select += [sql.Column(table, measurement, 'MEDIAN')]
            elif self.aggMethod=='none':
                select += [sql.Column(table, measurement)]
        else:
            select += [sql.Column(table, measurement)]
        
        q.set_select_clause(select)
        q.set_group_columns(well_key_cols)
        if fltr not in (FilterComboBox.NO_FILTER, FilterComboBox.NEW_FILTER, ''):
            if fltr in p._filters:
                q.add_filter(p._filters[fltr])
            elif fltr in p.gates:
                q.add_filter(p.gates[fltr].as_filter())
            else:
                raise Exception('Could not find filter "%s" in gates or filters'%(fltr))
        wellkeys_and_values = db.execute(str(q))
        wellkeys_and_values = np.array(wellkeys_and_values, dtype=object)

        # Replace measurement None's with nan
        for row in wellkeys_and_values:
            if row[-1] is None:
                row[-1] = np.nan

        data = []
        key_lists = []
        dmax = -np.inf
        dmin = np.inf
        if p.plate_id:
            for plateChoice, plateMap in zip(self.plateMapChoices, self.plateMaps):
                plate = plateChoice.Value
                plateMap.SetPlate(plate)
                self.colorBar.AddNotifyWindow(plateMap)
                self.keys_and_vals = [v for v in wellkeys_and_values if str(v[0])==plate]
                platedata, wellkeys, ignore = FormatPlateMapData(self.keys_and_vals, categorical)
                data += [platedata]
                key_lists += [wellkeys]
                if not categorical:
                    dmin = np.nanmin([float(kv[-1]) for kv in self.keys_and_vals]+[dmin])
                    dmax = np.nanmax([float(kv[-1]) for kv in self.keys_and_vals]+[dmax])
        else:
            self.colorBar.AddNotifyWindow(self.plateMaps[0])
            platedata, wellkeys, ignore = FormatPlateMapData(wellkeys_and_values, categorical)
            data += [platedata]
            key_lists += [wellkeys]
            if not categorical:
                dmin = np.nanmin([float(kv[-1]) for kv in wellkeys_and_values])
                dmax = np.nanmax([float(kv[-1]) for kv in wellkeys_and_values])
            
        if not categorical:
            if len(wellkeys_and_values) > 0:
                # Compute the global extents if there is any data whatsoever
                gmin = np.nanmin([float(vals[-1]) for vals in wellkeys_and_values])
                gmax = np.nanmax([float(vals[-1]) for vals in wellkeys_and_values])
                if np.isinf(dmin) or np.isinf(dmax):
                    gmin = gmax = dmin = dmax = 1.
                    # Warn if there was no data for this plate (and no filter was used)
                    if fltr == FilterComboBox.NO_FILTER:
                        wx.MessageBox('No numeric data was found in "%s.%s" for plate "%s"'
                                      %(table, measurement, plate), 'Warning')
            else:
                gmin = gmax = 1.
                if fltr == FilterComboBox.NO_FILTER:
                    wx.MessageBox('No numeric data was found in %s.%s'
                                  %(table, measurement), 'Warning')

        if categorical:
            self.colorBar.Hide()
        else:
            self.colorBar.Show()
            self.colorBar.SetLocalExtents([dmin,dmax])
            self.colorBar.SetGlobalExtents([gmin,gmax])
        self.rightSizer.Layout()

        for keys, d, plateMap in zip(key_lists, data, self.plateMaps):
            plateMap.SetWellKeys(keys)
            if categorical:
                plateMap.SetData(np.ones(d.shape) * np.nan)
                plateMap.SetTextData(d)
            else:
                plateMap.SetData(d, data_range=self.colorBar.GetLocalExtents(), 
                                 clip_interval=self.colorBar.GetLocalInterval(), 
                                 clip_mode=self.colorBar.GetClipMode())

        for keys, d, plateMap in zip(key_lists, data, self.plateMaps):
            plateMap.SetWellKeys(keys)
            if categorical:
                plateMap.SetData(np.ones(d.shape) * np.nan)
                plateMap.SetTextData(d)
            else:
                plateMap.SetData(d, data_range=self.colorBar.GetLocalExtents(), 
                                 clip_interval=self.colorBar.GetLocalInterval(), 
                                 clip_mode=self.colorBar.GetClipMode())
def prompt_user_to_link_table(parent, table):
    '''Prompts the user for information about the given table so it may be
    linked into the tables that CPA already accesses.
    returns the given table name or None if the user cancels
    '''
    dlg = wx.SingleChoiceDialog(parent, 'What kind of data is in this table (%s)?'%(table),
                                'Select table type', ['per-well', 'per-image', 'per-object', 'other'], 
                                wx.CHOICEDLG_STYLE)
    show_table_button = wx.Button(dlg, -1, 'Show table')
    dlg.Sizer.Children[2].GetSizer().Insert(0, show_table_button, 0, wx.ALL, 10)
    dlg.Sizer.Children[2].GetSizer().InsertStretchSpacer(1, 1)
    def on_show_table(evt):
        from tableviewer import TableViewer
        tableview = TableViewer(get_main_frame_or_none())
        tableview.Show()
        tableview.load_db_table(table)
    show_table_button.Bind(wx.EVT_BUTTON, on_show_table)
    if dlg.ShowModal() != wx.ID_OK:
        dlg.Destroy()
        return None
    new_table_type = dlg.GetStringSelection()

    if new_table_type == 'per-well':
        link_table_to_try = p.image_table
        link_cols_to_try = dbconnect.well_key_columns()
    elif new_table_type == 'per-image':
        dlg = wx.MessageDialog(parent, 'Does this per-image table represent a '
                               'new set of images in your experiment?', 
                               'New per-image table', wx.YES_NO)
        if dlg.ShowModal() == wx.ID_YES:
            wx.MessageDialog('Sorry, CPA does not currently support multiple\n'
                             'per-image tables unless they are referring to the\n'
                             'same images.\n\n'
                             'Please see the manual for more information',
                             'Multiple per-image tables not supported')
            dlg.Destroy()
            return None
        link_table_to_try = p.image_table
        link_cols_to_try = dbconnect.image_key_columns()
    elif new_table_type == 'per-object':
        dlg = wx.MessageDialog(parent, 'Does this per-object table represent a '
                               'new set of objects in your experiment?', 
                               'New per-object table', wx.YES_NO)
        if dlg.ShowModal() == wx.ID_YES:
            wx.MessageDialog('Sorry, CPA does not currently support multiple\n'
                             'per-object tables unless they are referring to the\n'
                             'same objects.\n\n'
                             'Please see the manual for more information',
                             'Multiple per-object tables not supported')
        if p.object_table:
            if table == p.object_table:
                raise
            link_table_to_try = p.object_table
            link_cols_to_try = dbconnect.object_key_columns()
        else:
            # There should never be an object table without another object 
            # table existing first. Connecting this table to the image_table is
            # asking for trouble.            
            return None

    else:
        dlg = wx.SingleChoiceDialog(parent, 'Which of your tables is "%s" linked '
                                    'to?'%(table), 'Select linking table', 
                                    db.get_linkable_tables(), wx.CHOICEDLG_STYLE)
        if dlg.ShowModal() != wx.ID_OK:
            dlg.Destroy()
            return None
        link_table_to_try = dlg.GetStringSelection()
        link_cols_to_try = []

    dlg = LinkTablesDialog(parent, table, link_table_to_try, 
                           link_cols_to_try, link_cols_to_try)
    if dlg.ShowModal() != wx.ID_OK:
        dlg.Destroy()
        return None
    col_pairs = dlg.get_column_pairs()
    
    src_cols = [col_pair[0][1] for col_pair in col_pairs]
    dest_cols = [col_pair[1][1] for col_pair in col_pairs]

    db.do_link_tables(table, link_table_to_try, src_cols, dest_cols)
    # return the newly linked table
    return table
Exemplo n.º 6
0
def prompt_user_to_link_table(parent, table):
    '''Prompts the user for information about the given table so it may be
    linked into the tables that CPA already accesses.
    returns the given table name or None if the user cancels
    '''
    dlg = wx.SingleChoiceDialog(parent, 'What kind of data is in this table (%s)?'%(table),
                                'Select table type', ['per-well', 'per-image', 'per-object', 'other'], 
                                wx.CHOICEDLG_STYLE)
    show_table_button = wx.Button(dlg, -1, 'Show table')
    dlg.Sizer.Children[2].GetSizer().Insert(0, show_table_button, 0, wx.ALL, 10)
    dlg.Sizer.Children[2].GetSizer().InsertStretchSpacer(1, 1)
    def on_show_table(evt):
        from tableviewer import TableViewer
        tableview = TableViewer(get_main_frame_or_none())
        tableview.Show()
        tableview.load_db_table(table)
    show_table_button.Bind(wx.EVT_BUTTON, on_show_table)
    if dlg.ShowModal() != wx.ID_OK:
        dlg.Destroy()
        return None
    new_table_type = dlg.GetStringSelection()

    if new_table_type == 'per-well':
        link_table_to_try = p.image_table
        link_cols_to_try = dbconnect.well_key_columns()
    elif new_table_type == 'per-image':
        dlg = wx.MessageDialog(parent, 'Does this per-image table represent a '
                               'new set of images in your experiment?', 
                               'New per-image table', wx.YES_NO)
        if dlg.ShowModal() == wx.ID_YES:
            wx.MessageDialog(parent,'Sorry, CPA does not currently support multiple\n'
                             'per-image tables unless they are referring to the\n'
                             'same images.\n\n'
                             'Please see the manual for more information',
                             'Multiple per-image tables not supported')
            dlg.Destroy()
            return None
        link_table_to_try = p.image_table
        link_cols_to_try = dbconnect.image_key_columns()
    elif new_table_type == 'per-object':
        dlg = wx.MessageDialog(parent, 'Does this per-object table represent a '
                               'new set of objects in your experiment?', 
                               'New per-object table', wx.YES_NO)
        if dlg.ShowModal() == wx.ID_YES:
            wx.MessageDialog(parent,'Sorry, CPA does not currently support multiple\n'
                             'per-object tables unless they are referring to the\n'
                             'same objects.\n\n'
                             'Please see the manual for more information',
                             'Multiple per-object tables not supported')
        if p.object_table:
            if table == p.object_table:
                raise
            link_table_to_try = p.object_table
            link_cols_to_try = dbconnect.object_key_columns()
        else:
            # There should never be an object table without another object 
            # table existing first. Connecting this table to the image_table is
            # asking for trouble.            
            return None

    else:
        dlg = wx.SingleChoiceDialog(parent, 'Which of your tables is "%s" linked '
                                    'to?'%(table), 'Select linking table', 
                                    db.get_linkable_tables(), wx.CHOICEDLG_STYLE)
        if dlg.ShowModal() != wx.ID_OK:
            dlg.Destroy()
            return None
        link_table_to_try = dlg.GetStringSelection()
        link_cols_to_try = []

    dlg = LinkTablesDialog(parent, table, link_table_to_try, 
                           link_cols_to_try, link_cols_to_try)
    if dlg.ShowModal() != wx.ID_OK:
        dlg.Destroy()
        return None
    col_pairs = dlg.get_column_pairs()
    
    src_cols = [col_pair[0][1] for col_pair in col_pairs]
    dest_cols = [col_pair[1][1] for col_pair in col_pairs]

    db.do_link_tables(table, link_table_to_try, src_cols, dest_cols)
    # return the newly linked table
    return table
    def do_normalization(self):
        if not self.validate():
            # Should be unreachable
            wx.MessageBox(
                'Your normalization settings are invalid. Can\'t perform normalization.'
            )

        long_cols = [
            col for col in self.col_choices.GetCheckedStrings()
            if len(col) + 4 > 64
        ]
        if long_cols:
            dlg = wx.MessageDialog(
                self, 'The following columns contain more '
                'than 64 characters when a normalization suffix (4 '
                'characters) is appended. This may cause a problem when '
                'writing to the database.\n %s' % ('\n'.join(long_cols)),
                'Warning', wx.OK | wx.CANCEL | wx.ICON_EXCLAMATION)
            if dlg.ShowModal() == wx.ID_CANCEL:
                return
            dlg.Destroy()

        imkey_cols = dbconnect.image_key_columns()
        obkey_cols = dbconnect.object_key_columns()
        wellkey_cols = dbconnect.well_key_columns()
        im_clause = dbconnect.UniqueImageClause
        well_clause = dbconnect.UniqueWellClause
        input_table = self.table_choice.GetStringSelection()
        meas_cols = self.col_choices.GetCheckedStrings()
        wants_norm_meas = self.norm_meas_checkbox.IsChecked()
        wants_norm_factor = self.norm_factor_checkbox.IsChecked()
        output_table = self.output_table.Value
        FIRST_MEAS_INDEX = len(imkey_cols + (wellkey_cols or tuple()))
        if p.db_type == 'mysql':
            BATCH_SIZE = 100
        else:
            BATCH_SIZE = 1
        if input_table == p.object_table:
            FIRST_MEAS_INDEX += 1  # Original
        if wellkey_cols:
            if input_table == p.image_table:
                WELL_KEY_INDEX = len(imkey_cols)
            else:
                WELL_KEY_INDEX = len(imkey_cols) + 1

        if db.table_exists(output_table):
            dlg = wx.MessageDialog(
                self, 'Are you sure you want to overwrite the table "%s"?' %
                (output_table), "Overwrite table?",
                wx.YES_NO | wx.NO_DEFAULT | wx.ICON_EXCLAMATION)
            if dlg.ShowModal() == wx.ID_NO:
                dlg.Destroy()
                return
            dlg.Destroy()

        #
        # First Get the data from the db.
        #
        if input_table == p.image_table:
            if wellkey_cols:
                # If there are well columns, fetch them.
                query = "SELECT %s, %s, %s FROM %s" % (im_clause(
                ), well_clause(), ', '.join(meas_cols), input_table)
            else:
                query = "SELECT %s, %s FROM %s" % (
                    im_clause(), ', '.join(meas_cols), input_table)
        elif input_table == p.object_table:
            if p.image_table and wellkey_cols:

                # If we have x and y from cells, we can use that for classifier
                if p.cell_x_loc and p.cell_y_loc:
                    FIRST_MEAS_INDEX += 2  # Cell X and Y Location are fixed to for classifier
                    # If there are well columns, fetch them from the per-image table.
                    query = "SELECT %s, %s, %s, %s, %s FROM %s, %s WHERE %s" % (
                        dbconnect.UniqueObjectClause(
                            p.object_table), well_clause(p.image_table),
                        p.cell_x_loc, p.cell_y_loc, ', '.join([
                            '%s.%s' % (p.object_table, col)
                            for col in meas_cols
                        ]), p.image_table, p.object_table, ' AND '.join([
                            '%s.%s=%s.%s' %
                            (p.image_table, c, p.object_table, c)
                            for c in imkey_cols
                        ]))

                else:
                    # If there are well columns, fetch them from the per-image table.
                    query = "SELECT %s, %s, %s FROM %s, %s WHERE %s" % (
                        dbconnect.UniqueObjectClause(p.object_table),
                        well_clause(p.image_table), ', '.join([
                            '%s.%s' % (p.object_table, col)
                            for col in meas_cols
                        ]), p.image_table, p.object_table, ' AND '.join([
                            '%s.%s=%s.%s' %
                            (p.image_table, c, p.object_table, c)
                            for c in imkey_cols
                        ]))

            else:

                if p.cell_x_loc and p.cell_y_loc:
                    FIRST_MEAS_INDEX += 2  # Cell X and Y Location are fixed to for classifier

                    query = "SELECT %s, %s, %s, %s FROM %s" % (
                        im_clause(), p.cell_x_loc, p.cell_y_loc,
                        ', '.join(meas_cols), input_table)

                else:
                    query = "SELECT %s, %s FROM %s" % (
                        im_clause(), ', '.join(meas_cols), input_table)

        if p.negative_control:  # if the user defined negative control, we can use that to fetch the wellkeys
            neg_query = query + ' AND ' + p.negative_control  # fetch all the negative control elements

        if wellkey_cols:
            query += " ORDER BY %s" % (well_clause(p.image_table))

        dlg = wx.ProgressDialog('Computing normalized values',
                                'Querying database for raw data.',
                                parent=self,
                                style=wx.PD_CAN_ABORT | wx.PD_APP_MODAL)
        dlg.Pulse()
        #
        # MAKE THE QUERY
        #

        input_data = np.array(db.execute(query), dtype=object)
        if p.negative_control:
            import pandas as pd
            negative_control = pd.DataFrame(db.execute(neg_query), dtype=float)
            logging.info("# of objects in negative control: " +
                         str(negative_control.shape[0]))
            logging.info("# of objects queried: " + str(input_data.shape[0]))
            neg_mean_plate = negative_control.groupby([WELL_KEY_INDEX]).mean()
            neg_std_plate = negative_control.groupby([WELL_KEY_INDEX]).std()

        output_columns = np.ones(input_data[:,
                                            FIRST_MEAS_INDEX:].shape) * np.nan
        output_factors = np.ones(input_data[:,
                                            FIRST_MEAS_INDEX:].shape) * np.nan
        for colnum, col in enumerate(input_data[:, FIRST_MEAS_INDEX:].T):
            keep_going, skip = dlg.Pulse("Normalizing column %d of %d" %
                                         (colnum + 1, len(meas_cols)))
            if not keep_going:
                dlg.Destroy()
                return
            norm_data = col.copy()
            for step_num, step_panel in enumerate(self.norm_steps):
                d = step_panel.get_configuration_dict()
                if d[norm.P_GROUPING] in (norm.G_QUADRANT,
                                          norm.G_WELL_NEIGHBORS):
                    # Reshape data if normalization step is plate sensitive.
                    assert p.plate_id and p.well_id
                    well_keys = input_data[:,
                                           range(WELL_KEY_INDEX,
                                                 FIRST_MEAS_INDEX - 2)]
                    wellkeys_and_vals = np.hstack(
                        (well_keys, np.array([norm_data]).T))
                    new_norm_data = []
                    for plate, plate_grp in groupby(wellkeys_and_vals,
                                                    lambda row: row[0]):
                        keys_and_vals = np.array(list(plate_grp))
                        plate_data, wks, ind = FormatPlateMapData(
                            keys_and_vals)
                        pnorm_data = norm.do_normalization_step(
                            plate_data, **d)
                        new_norm_data += pnorm_data.flatten()[
                            ind.flatten().tolist()].tolist()
                    norm_data = new_norm_data
                elif d[norm.P_GROUPING] == norm.G_PLATE:
                    assert p.plate_id and p.well_id

                    if d[norm.P_AGG_TYPE] == norm.M_NEGCTRL:
                        mean_plate_col = neg_mean_plate[colnum +
                                                        FIRST_MEAS_INDEX]
                        std_plate_col = neg_std_plate[colnum +
                                                      FIRST_MEAS_INDEX]
                        print(mean_plate_col)
                        print(std_plate_col)

                    well_keys = input_data[:,
                                           range(WELL_KEY_INDEX,
                                                 FIRST_MEAS_INDEX - 2)]
                    wellkeys_and_vals = np.hstack(
                        (well_keys, np.array([norm_data]).T))
                    new_norm_data = []
                    # print wellkeys_and_vals
                    for plate, plate_grp in groupby(wellkeys_and_vals,
                                                    lambda row: row[0]):
                        plate_data = np.array(list(plate_grp))[:, -1].flatten()
                        pnorm_data = norm.do_normalization_step(
                            plate_data, **d)

                        if d[norm.P_AGG_TYPE] == norm.M_NEGCTRL:
                            try:
                                plate_mean = mean_plate_col[plate]
                                plate_std = std_plate_col[plate]
                            except:
                                plate_mean = mean_plate_col[int(plate)]
                                plate_std = std_plate_col[int(plate)]

                            try:
                                pnorm_data = (pnorm_data -
                                              plate_mean) / plate_std
                                print(pnorm_data)
                            except:
                                logging.error(
                                    "Plate std is zero, division by zero!")

                        new_norm_data += pnorm_data.tolist()
                    norm_data = new_norm_data
                else:
                    norm_data = norm.do_normalization_step(norm_data, **d)

            output_columns[:, colnum] = np.array(norm_data)
            output_factors[:,
                           colnum] = col.astype(float) / np.array(norm_data,
                                                                  dtype=float)

        dlg.Destroy()
        return  # Abort here for coding

        norm_table_cols = []
        # Write new table
        db.execute('DROP TABLE IF EXISTS %s' % (output_table))
        if input_table == p.image_table:
            norm_table_cols += dbconnect.image_key_columns()
            col_defs = ', '.join([
                '%s %s' % (col, db.GetColumnTypeString(p.image_table, col))
                for col in dbconnect.image_key_columns()
            ])
        elif input_table == p.object_table:
            norm_table_cols += obkey_cols
            col_defs = ', '.join([
                '%s %s' % (col, db.GetColumnTypeString(p.object_table, col))
                for col in obkey_cols
            ])
        if wellkey_cols:
            norm_table_cols += wellkey_cols
            col_defs += ', ' + ', '.join([
                '%s %s' % (col, db.GetColumnTypeString(p.image_table, col))
                for col in wellkey_cols
            ])

        if input_table == p.object_table:
            if p.cell_x_loc and p.cell_y_loc:
                norm_table_cols += [p.cell_x_loc, p.cell_y_loc]
                col_defs += ', %s %s' % (
                    p.cell_x_loc,
                    db.GetColumnTypeString(p.object_table, p.cell_x_loc)
                ) + ', ' + '%s %s' % (p.cell_y_loc,
                                      db.GetColumnTypeString(
                                          p.object_table, p.cell_y_loc))

        if wants_norm_meas:
            col_defs += ', ' + ', '.join([
                '%s_NmM %s' % (col, db.GetColumnTypeString(input_table, col))
                for col in meas_cols
            ])
        if wants_norm_factor:
            col_defs += ', ' + ', '.join([
                '%s_NmF %s' % (col, db.GetColumnTypeString(input_table, col))
                for col in meas_cols
            ])

        for col in meas_cols:
            if wants_norm_meas:
                norm_table_cols += ['%s_NmM' % (col)]
            if wants_norm_factor:
                norm_table_cols += ['%s_NmF' % (col)]
        db.execute('CREATE TABLE %s (%s)' % (output_table, col_defs))

        dlg = wx.ProgressDialog('Writing to "%s"' % (output_table),
                                "Writing normalized values to database",
                                maximum=output_columns.shape[0],
                                parent=self,
                                style=wx.PD_CAN_ABORT | wx.PD_APP_MODAL
                                | wx.PD_ELAPSED_TIME | wx.PD_ESTIMATED_TIME
                                | wx.PD_REMAINING_TIME)

        cmd = 'INSERT INTO %s VALUES ' % (output_table)
        cmdi = cmd
        for i, (val, factor) in enumerate(zip(output_columns, output_factors)):
            cmdi += '(' + ','.join(['"%s"'] * len(norm_table_cols)) + ')'
            if wants_norm_meas and wants_norm_factor:
                cmdi = cmdi % tuple(
                    list(input_data[i, :FIRST_MEAS_INDEX]) + [
                        'NULL' if (np.isnan(x) or np.isinf(x)) else x
                        for x in val
                    ] + [
                        'NULL' if (np.isnan(x) or np.isinf(x)) else x
                        for x in factor
                    ])
            elif wants_norm_meas:
                cmdi = cmdi % tuple(
                    list(input_data[i, :FIRST_MEAS_INDEX]) + [
                        'NULL' if (np.isnan(x) or np.isinf(x)) else x
                        for x in val
                    ])
            elif wants_norm_factor:
                cmdi = cmdi % tuple(
                    list(input_data[i, :FIRST_MEAS_INDEX]) + [
                        'NULL' if (np.isnan(x) or np.isinf(x)) else x
                        for x in factor
                    ])
            if (i + 1) % BATCH_SIZE == 0 or i == len(output_columns) - 1:
                db.execute(str(cmdi))
                cmdi = cmd
                # update status dialog
                (keep_going, skip) = dlg.Update(i)
                if not keep_going:
                    break
            else:
                cmdi += ',\n'
        dlg.Destroy()
        db.Commit()

        #
        # Update table linkage
        #
        if db.get_linking_tables(input_table, output_table) is not None:
            db.do_unlink_table(output_table)

        if input_table == p.image_table:
            db.do_link_tables(output_table, input_table, imkey_cols,
                              imkey_cols)
        elif input_table == p.object_table:
            db.do_link_tables(output_table, input_table, obkey_cols,
                              obkey_cols)

        #
        # Show the resultant table
        #
        import tableviewer
        tv = tableviewer.TableViewer(ui.get_main_frame_or_none())
        tv.Show()
        tv.load_db_table(output_table)
Exemplo n.º 8
0
    def do_normalization(self):
        if not self.validate():
            # Should be unreachable
            wx.MessageBox(
                'Your normalization settings are invalid. Can\'t perform normalization.'
            )

        long_cols = [
            col for col in self.col_choices.GetCheckedStrings()
            if len(col) + 4 > 64
        ]
        if long_cols:
            dlg = wx.MessageDialog(
                self, 'The following columns contain more '
                'than 64 characters when a normalization suffix (4 '
                'characters) is appended. This may cause a problem when '
                'writing to the database.\n %s' % ('\n'.join(long_cols)),
                'Warning', wx.OK | wx.CANCEL | wx.ICON_EXCLAMATION)
            if dlg.ShowModal() == wx.ID_CANCEL:
                return
            dlg.Destroy()

        imkey_cols = dbconnect.image_key_columns()
        obkey_cols = dbconnect.object_key_columns()
        wellkey_cols = dbconnect.well_key_columns()
        im_clause = dbconnect.UniqueImageClause
        well_clause = dbconnect.UniqueWellClause
        input_table = self.table_choice.GetStringSelection()
        meas_cols = self.col_choices.GetCheckedStrings()
        wants_norm_meas = self.norm_meas_checkbox.IsChecked()
        wants_norm_factor = self.norm_factor_checkbox.IsChecked()
        output_table = self.output_table.Value
        FIRST_MEAS_INDEX = len(imkey_cols + (wellkey_cols or tuple()))
        if p.db_type == 'mysql':
            BATCH_SIZE = 100
        else:
            BATCH_SIZE = 1
        if input_table == p.object_table:
            FIRST_MEAS_INDEX += 1  # Original
        if wellkey_cols:
            if input_table == p.image_table:
                WELL_KEY_INDEX = len(imkey_cols)
            else:
                WELL_KEY_INDEX = len(imkey_cols) + 1

        if db.table_exists(output_table):
            dlg = wx.MessageDialog(
                self, 'Are you sure you want to overwrite the table "%s"?' %
                (output_table), "Overwrite table?",
                wx.YES_NO | wx.NO_DEFAULT | wx.ICON_EXCLAMATION)
            if dlg.ShowModal() == wx.ID_NO:
                dlg.Destroy()
                return
            dlg.Destroy()

        #
        # First Get the data from the db.
        #
        if input_table == p.image_table:
            if wellkey_cols:
                # If there are well columns, fetch them.
                query = "SELECT %s, %s, %s FROM %s" % (im_clause(
                ), well_clause(), ', '.join(meas_cols), input_table)
            else:
                query = "SELECT %s, %s FROM %s" % (
                    im_clause(), ', '.join(meas_cols), input_table)
        elif input_table == p.object_table:
            if p.image_table and wellkey_cols:

                # If we have x and y from cells, we can use that for classifier
                if p.cell_x_loc and p.cell_y_loc:
                    FIRST_MEAS_INDEX += 2  # Cell X and Y Location are fixed to for classifier
                    # If there are well columns, fetch them from the per-image table.
                    query = "SELECT %s, %s, %s, %s, %s FROM %s, %s WHERE %s" % (
                        dbconnect.UniqueObjectClause(
                            p.object_table), well_clause(p.image_table),
                        p.cell_x_loc, p.cell_y_loc, ', '.join([
                            '%s.%s' % (p.object_table, col)
                            for col in meas_cols
                        ]), p.image_table, p.object_table, ' AND '.join([
                            '%s.%s=%s.%s' %
                            (p.image_table, c, p.object_table, c)
                            for c in imkey_cols
                        ]))

                else:
                    # If there are well columns, fetch them from the per-image table.
                    query = "SELECT %s, %s, %s FROM %s, %s WHERE %s" % (
                        dbconnect.UniqueObjectClause(p.object_table),
                        well_clause(p.image_table), ', '.join([
                            '%s.%s' % (p.object_table, col)
                            for col in meas_cols
                        ]), p.image_table, p.object_table, ' AND '.join([
                            '%s.%s=%s.%s' %
                            (p.image_table, c, p.object_table, c)
                            for c in imkey_cols
                        ]))

            else:

                if p.cell_x_loc and p.cell_y_loc:
                    FIRST_MEAS_INDEX += 2  # Cell X and Y Location are fixed to for classifier

                    query = "SELECT %s, %s, %s, %s FROM %s" % (
                        im_clause(), p.cell_x_loc, p.cell_y_loc,
                        ', '.join(meas_cols), input_table)

                else:
                    query = "SELECT %s, %s FROM %s" % (
                        im_clause(), ', '.join(meas_cols), input_table)

        if p.negative_control:  # if the user defined negative control, we can use that to fetch the wellkeys
            neg_query = query + ' AND ' + p.negative_control  # fetch all the negative control elements

        if wellkey_cols:
            query += " ORDER BY %s" % (well_clause(p.image_table))

        dlg = wx.ProgressDialog('Computing normalized values',
                                'Querying database for raw data.',
                                parent=self,
                                style=wx.PD_CAN_ABORT | wx.PD_APP_MODAL)
        dlg.Pulse()
        #
        # MAKE THE QUERY
        #

        input_data = np.array(db.execute(query), dtype=object)
        if p.negative_control:
            import pandas as pd
            negative_control = pd.DataFrame(db.execute(neg_query), dtype=float)
            logging.info("# of objects in negative control: " +
                         str(negative_control.shape[0]))
            logging.info("# of objects queried: " + str(input_data.shape[0]))
            neg_mean_plate = negative_control.groupby([WELL_KEY_INDEX]).mean()
            neg_std_plate = negative_control.groupby([WELL_KEY_INDEX]).std()

        output_columns = np.ones(input_data[:,
                                            FIRST_MEAS_INDEX:].shape) * np.nan
        output_factors = np.ones(input_data[:,
                                            FIRST_MEAS_INDEX:].shape) * np.nan
        for colnum, col in enumerate(input_data[:, FIRST_MEAS_INDEX:].T):
            keep_going, skip = dlg.Pulse("Normalizing column %d of %d" %
                                         (colnum + 1, len(meas_cols)))
            if not keep_going:
                dlg.Destroy()
                return
            norm_data = col.copy()
            for step_num, step_panel in enumerate(self.norm_steps):
                d = step_panel.get_configuration_dict()
                if d[norm.P_GROUPING] in (norm.G_QUADRANT,
                                          norm.G_WELL_NEIGHBORS):
                    # Reshape data if normalization step is plate sensitive.
                    assert p.plate_id and p.well_id
                    well_keys = input_data[:,
                                           range(WELL_KEY_INDEX,
                                                 FIRST_MEAS_INDEX - 2)]
                    wellkeys_and_vals = np.hstack(
                        (well_keys, np.array([norm_data]).T))
                    new_norm_data = []
                    for plate, plate_grp in groupby(wellkeys_and_vals, lambda
                                                    (row): row[0]):
                        keys_and_vals = np.array(list(plate_grp))
                        plate_data, wks, ind = FormatPlateMapData(
                            keys_and_vals)
                        pnorm_data = norm.do_normalization_step(
                            plate_data, **d)
                        new_norm_data += pnorm_data.flatten()[
                            ind.flatten().tolist()].tolist()
                    norm_data = new_norm_data
                elif d[norm.P_GROUPING] == norm.G_PLATE:
                    assert p.plate_id and p.well_id

                    if d[norm.P_AGG_TYPE] == norm.M_NEGCTRL:
                        mean_plate_col = neg_mean_plate[colnum +
                                                        FIRST_MEAS_INDEX]
                        std_plate_col = neg_std_plate[colnum +
                                                      FIRST_MEAS_INDEX]
                        print mean_plate_col
                        print std_plate_col

                    well_keys = input_data[:,
                                           range(WELL_KEY_INDEX,
                                                 FIRST_MEAS_INDEX - 2)]
                    wellkeys_and_vals = np.hstack(
                        (well_keys, np.array([norm_data]).T))
                    new_norm_data = []
                    # print wellkeys_and_vals
                    for plate, plate_grp in groupby(wellkeys_and_vals, lambda
                                                    (row): row[0]):
                        plate_data = np.array(list(plate_grp))[:, -1].flatten()
                        pnorm_data = norm.do_normalization_step(
                            plate_data, **d)

                        if d[norm.P_AGG_TYPE] == norm.M_NEGCTRL:
                            try:
                                plate_mean = mean_plate_col[plate]
                                plate_std = std_plate_col[plate]
                            except:
                                plate_mean = mean_plate_col[int(plate)]
                                plate_std = std_plate_col[int(plate)]

                            try:
                                pnorm_data = (pnorm_data -
                                              plate_mean) / plate_std
                                print pnorm_data
                            except:
                                logging.error(
                                    "Plate std is zero, division by zero!")

                        new_norm_data += pnorm_data.tolist()
                    norm_data = new_norm_data
    def do_normalization(self):
        if not self.validate():
            # Should be unreachable
            wx.MessageBox('Your normalization settings are invalid. Can\'t perform normalization.')
            
        long_cols = [col for col in self.col_choices.GetCheckedStrings() 
                     if len(col) + 4 > 64]
        if long_cols:
            dlg = wx.MessageDialog(self, 'The following columns contain more '
                    'than 64 characters when a normalization suffix (4 '
                    'characters) is appended. This may cause a problem when '
                    'writing to the database.\n %s'%('\n'.join(long_cols)), 
                    'Warning', wx.OK|wx.CANCEL|wx.ICON_EXCLAMATION)
            if dlg.ShowModal() == wx.ID_CANCEL:
                return
            dlg.Destroy()

        imkey_cols = dbconnect.image_key_columns()
        obkey_cols = dbconnect.object_key_columns()
        wellkey_cols = dbconnect.well_key_columns()
        im_clause = dbconnect.UniqueImageClause
        well_clause = dbconnect.UniqueWellClause
        input_table = self.table_choice.GetStringSelection()
        meas_cols = self.col_choices.GetCheckedStrings()
        wants_norm_meas = self.norm_meas_checkbox.IsChecked()
        wants_norm_factor = self.norm_factor_checkbox.IsChecked()
        output_table = self.output_table.Value
        FIRST_MEAS_INDEX = len(imkey_cols + (wellkey_cols or tuple()))
        if p.db_type == 'mysql':
            BATCH_SIZE = 100
        else:
            BATCH_SIZE = 1
        if input_table == p.object_table: 
            FIRST_MEAS_INDEX += 1 # Original
        if wellkey_cols:
            if input_table == p.image_table:
                WELL_KEY_INDEX = len(imkey_cols)
            else:
                WELL_KEY_INDEX = len(imkey_cols) + 1
                
        if db.table_exists(output_table):
            dlg = wx.MessageDialog(self, 'Are you sure you want to overwrite the table "%s"?'%(output_table), 
                                   "Overwrite table?", wx.YES_NO|wx.NO_DEFAULT|wx.ICON_EXCLAMATION)
            if dlg.ShowModal() == wx.ID_NO:
                dlg.Destroy()
                return 
            dlg.Destroy()

        #
        # First Get the data from the db.
        #
        if input_table == p.image_table:
            if wellkey_cols:
                # If there are well columns, fetch them.
                query = "SELECT %s, %s, %s FROM %s"%(
                            im_clause(), well_clause(), ', '.join(meas_cols), 
                            input_table)
            else:
                query = "SELECT %s, %s FROM %s"%(
                            im_clause(), ', '.join(meas_cols),
                            input_table)
        elif input_table == p.object_table:
            if p.image_table and wellkey_cols:

                # If we have x and y from cells, we can use that for classifier
                if p.cell_x_loc and p.cell_y_loc:
                    FIRST_MEAS_INDEX += 2 # Cell X and Y Location are fixed to for classifier
                    # If there are well columns, fetch them from the per-image table.
                    query = "SELECT %s, %s, %s, %s, %s FROM %s, %s WHERE %s"%(
                                dbconnect.UniqueObjectClause(p.object_table),
                                well_clause(p.image_table),
                                p.cell_x_loc,
                                p.cell_y_loc,
                                ', '.join(['%s.%s'%(p.object_table, col) for col in meas_cols]),
                                p.image_table, p.object_table,
                                ' AND '.join(['%s.%s=%s.%s'%(p.image_table, c, p.object_table, c) 
                                              for c in imkey_cols]) )

                else:
                    # If there are well columns, fetch them from the per-image table.
                    query = "SELECT %s, %s, %s FROM %s, %s WHERE %s"%(
                                dbconnect.UniqueObjectClause(p.object_table),
                                well_clause(p.image_table), 
                                ', '.join(['%s.%s'%(p.object_table, col) for col in meas_cols]),
                                p.image_table, p.object_table,
                                ' AND '.join(['%s.%s=%s.%s'%(p.image_table, c, p.object_table, c) 
                                              for c in imkey_cols]) )

            else:

                if p.cell_x_loc and p.cell_y_loc:
                    FIRST_MEAS_INDEX += 2 # Cell X and Y Location are fixed to for classifier
                    
                    query = "SELECT %s, %s, %s, %s FROM %s"%(
                            im_clause(), p.cell_x_loc, p.cell_y_loc, ', '.join(meas_cols),
                            input_table)

                else:
                    query = "SELECT %s, %s FROM %s"%(
                            im_clause(), ', '.join(meas_cols),
                            input_table)

        if p.negative_control: # if the user defined negative control, we can use that to fetch the wellkeys
                    neg_query = query + ' AND ' + p.negative_control # fetch all the negative control elements

        if wellkey_cols:
            query += " ORDER BY %s"%(well_clause(p.image_table))
            
            
        dlg = wx.ProgressDialog('Computing normalized values',
                                'Querying database for raw data.',
                                parent=self,
                                style = wx.PD_CAN_ABORT|wx.PD_APP_MODAL)
        dlg.Pulse()
        #
        # MAKE THE QUERY
        # 

        input_data = np.array(db.execute(query), dtype=object)  
        if p.negative_control:
            import pandas as pd
            negative_control = pd.DataFrame(db.execute(neg_query), dtype=float)
            logging.info("# of objects in negative control: " + str(negative_control.shape[0]))
            logging.info("# of objects queried: " + str(input_data.shape[0]))
            neg_mean_plate = negative_control.groupby([WELL_KEY_INDEX]).mean()
            neg_std_plate = negative_control.groupby([WELL_KEY_INDEX]).std()

        output_columns = np.ones(input_data[:,FIRST_MEAS_INDEX:].shape) * np.nan
        output_factors = np.ones(input_data[:,FIRST_MEAS_INDEX:].shape) * np.nan
        for colnum, col in enumerate(input_data[:,FIRST_MEAS_INDEX:].T):
            keep_going, skip = dlg.Pulse("Normalizing column %d of %d"%(colnum+1, len(meas_cols))) 
            if not keep_going:
                dlg.Destroy()
                return
            norm_data = col.copy()
            for step_num, step_panel in enumerate(self.norm_steps):
                d = step_panel.get_configuration_dict()
                if d[norm.P_GROUPING] in (norm.G_QUADRANT, norm.G_WELL_NEIGHBORS):
                    # Reshape data if normalization step is plate sensitive.
                    assert p.plate_id and p.well_id
                    well_keys = input_data[:, range(WELL_KEY_INDEX, FIRST_MEAS_INDEX - 2) ] 
                    wellkeys_and_vals = np.hstack((well_keys, np.array([norm_data]).T))
                    new_norm_data    = []
                    for plate, plate_grp in groupby(wellkeys_and_vals, lambda row: row[0]):
                        keys_and_vals = np.array(list(plate_grp))
                        plate_data, wks, ind = FormatPlateMapData(keys_and_vals)
                        pnorm_data = norm.do_normalization_step(plate_data, **d)
                        new_norm_data += pnorm_data.flatten()[ind.flatten().tolist()].tolist()
                    norm_data = new_norm_data
                elif d[norm.P_GROUPING] == norm.G_PLATE:
                    assert p.plate_id and p.well_id

                    if d[norm.P_AGG_TYPE] == norm.M_NEGCTRL:
                        mean_plate_col = neg_mean_plate[colnum + FIRST_MEAS_INDEX]
                        std_plate_col = neg_std_plate[colnum + FIRST_MEAS_INDEX]  
                        print(mean_plate_col)
                        print(std_plate_col)            

                    well_keys = input_data[:, range(WELL_KEY_INDEX, FIRST_MEAS_INDEX - 2)]
                    wellkeys_and_vals = np.hstack((well_keys, np.array([norm_data]).T))
                    new_norm_data    = []
                    # print wellkeys_and_vals
                    for plate, plate_grp in groupby(wellkeys_and_vals, lambda row: row[0]):
                        plate_data = np.array(list(plate_grp))[:,-1].flatten()
                        pnorm_data = norm.do_normalization_step(plate_data, **d)

                        if d[norm.P_AGG_TYPE] == norm.M_NEGCTRL:
                            try:
                                plate_mean = mean_plate_col[plate]
                                plate_std = std_plate_col[plate]
                            except:
                                plate_mean = mean_plate_col[int(plate)]
                                plate_std = std_plate_col[int(plate)]

                            try:
                                pnorm_data = (pnorm_data - plate_mean) / plate_std
                                print(pnorm_data)
                            except:
                                logging.error("Plate std is zero, division by zero!")

                        new_norm_data += pnorm_data.tolist()
                    norm_data = new_norm_data
                else:
                    norm_data = norm.do_normalization_step(norm_data, **d)
                    
            output_columns[:,colnum] = np.array(norm_data)
            output_factors[:,colnum] = col.astype(float) / np.array(norm_data,dtype=float)

        dlg.Destroy()
        return # Abort here for coding
                
        norm_table_cols = []
        # Write new table
        db.execute('DROP TABLE IF EXISTS %s'%(output_table))
        if input_table == p.image_table:
            norm_table_cols += dbconnect.image_key_columns()
            col_defs = ', '.join(['%s %s'%(col, db.GetColumnTypeString(p.image_table, col))
                              for col in dbconnect.image_key_columns()])
        elif input_table == p.object_table:
            norm_table_cols += obkey_cols
            col_defs = ', '.join(['%s %s'%(col, db.GetColumnTypeString(p.object_table, col))
                              for col in obkey_cols])
        if wellkey_cols:
            norm_table_cols += wellkey_cols
            col_defs +=  ', '+ ', '.join(['%s %s'%(col, db.GetColumnTypeString(p.image_table, col))
                                        for col in wellkey_cols])

        if input_table == p.object_table:
            if p.cell_x_loc and p.cell_y_loc:
                norm_table_cols += [p.cell_x_loc, p.cell_y_loc]
                col_defs += ', %s %s'%(p.cell_x_loc, db.GetColumnTypeString(p.object_table, p.cell_x_loc)) + ', ' + '%s %s'%(p.cell_y_loc, db.GetColumnTypeString(p.object_table, p.cell_y_loc))

        if wants_norm_meas:
            col_defs += ', '+ ', '.join(['%s_NmM %s'%(col, db.GetColumnTypeString(input_table, col))
                                         for col in meas_cols]) 
        if wants_norm_factor:
            col_defs += ', '+ ', '.join(['%s_NmF %s'%(col, db.GetColumnTypeString(input_table, col))
                                         for col in meas_cols]) 

        for col in meas_cols:
            if wants_norm_meas:
                norm_table_cols += ['%s_NmM'%(col)]
            if wants_norm_factor:
                norm_table_cols += ['%s_NmF'%(col)]
        db.execute('CREATE TABLE %s (%s)'%(output_table, col_defs))
        
        dlg = wx.ProgressDialog('Writing to "%s"'%(output_table),
                               "Writing normalized values to database",
                               maximum = output_columns.shape[0],
                               parent=self,
                               style = wx.PD_CAN_ABORT|wx.PD_APP_MODAL|wx.PD_ELAPSED_TIME|wx.PD_ESTIMATED_TIME|wx.PD_REMAINING_TIME)
            
        cmd = 'INSERT INTO %s VALUES '%(output_table)
        cmdi = cmd
        for i, (val, factor) in enumerate(zip(output_columns, output_factors)):
            cmdi += '(' + ','.join(['"%s"']*len(norm_table_cols)) + ')'
            if wants_norm_meas and wants_norm_factor:
                cmdi = cmdi%tuple(list(input_data[i, :FIRST_MEAS_INDEX]) + 
                                  ['NULL' if (np.isnan(x) or np.isinf(x)) else x for x in val] + 
                                  ['NULL' if (np.isnan(x) or np.isinf(x)) else x for x in factor])
            elif wants_norm_meas:
                cmdi = cmdi%tuple(list(input_data[i, :FIRST_MEAS_INDEX]) + 
                                  ['NULL' if (np.isnan(x) or np.isinf(x)) else x for x in val])
            elif wants_norm_factor:
                cmdi = cmdi%tuple(list(input_data[i, :FIRST_MEAS_INDEX]) + 
                                  ['NULL' if (np.isnan(x) or np.isinf(x)) else x for x in factor])
            if (i+1) % BATCH_SIZE == 0 or i==len(output_columns)-1:
                db.execute(str(cmdi))
                cmdi = cmd
                # update status dialog
                (keep_going, skip) = dlg.Update(i)
                if not keep_going:
                    break
            else:
                cmdi += ',\n'
        dlg.Destroy()
        db.Commit()
        
        #
        # Update table linkage
        #
        if db.get_linking_tables(input_table, output_table) is not None:
            db.do_unlink_table(output_table)
            
        if input_table == p.image_table:
            db.do_link_tables(output_table, input_table, imkey_cols, imkey_cols)
        elif input_table == p.object_table:
            db.do_link_tables(output_table, input_table, obkey_cols, obkey_cols)            
        
        #
        # Show the resultant table        
        #
        import tableviewer
        tv = tableviewer.TableViewer(ui.get_main_frame_or_none())
        tv.Show()
        tv.load_db_table(output_table)
    def do_normalization(self):
        if not self.validate():
            # Should be unreachable
            wx.MessageBox('Your normalization settings are invalid. Can\'t perform normalization.')
            
        long_cols = [col for col in self.col_choices.GetCheckedStrings() 
                     if len(col) + 4 > 64]
        if long_cols:
            dlg = wx.MessageDialog(self, 'The following columns contain more '
                    'than 64 characters when a normalization suffix (4 '
                    'characters) is appended. This may cause a problem when '
                    'writing to the database.\n %s'%('\n'.join(long_cols)), 
                    'Warning', wx.OK|wx.CANCEL|wx.ICON_EXCLAMATION)
            if dlg.ShowModal() == wx.ID_CANCEL:
                return
            dlg.Destroy()

        imkey_cols = dbconnect.image_key_columns()
        obkey_cols = dbconnect.object_key_columns()
        wellkey_cols = dbconnect.well_key_columns()
        im_clause = dbconnect.UniqueImageClause
        well_clause = dbconnect.UniqueWellClause
        input_table = self.table_choice.GetStringSelection()
        meas_cols = self.col_choices.GetCheckedStrings()
        wants_norm_meas = self.norm_meas_checkbox.IsChecked()
        wants_norm_factor = self.norm_factor_checkbox.IsChecked()
        output_table = self.output_table.Value
        FIRST_MEAS_INDEX = len(imkey_cols + (wellkey_cols or tuple()))
        if p.db_type == 'mysql':
            BATCH_SIZE = 100
        else:
            BATCH_SIZE = 1
        if input_table == p.object_table: 
            FIRST_MEAS_INDEX += 1 # Original
        if wellkey_cols:
            if input_table == p.image_table:
                WELL_KEY_INDEX = len(imkey_cols)
            else:
                WELL_KEY_INDEX = len(imkey_cols) + 1
                
        if db.table_exists(output_table):
            dlg = wx.MessageDialog(self, 'Are you sure you want to overwrite the table "%s"?'%(output_table), 
                                   "Overwrite table?", wx.YES_NO|wx.NO_DEFAULT|wx.ICON_EXCLAMATION)
            if dlg.ShowModal() == wx.ID_NO:
                dlg.Destroy()
                return 
            dlg.Destroy()

        #
        # First Get the data from the db.
        #
        if input_table == p.image_table:
            if wellkey_cols:
                # If there are well columns, fetch them.
                query = "SELECT %s, %s, %s FROM %s"%(
                            im_clause(), well_clause(), ', '.join(meas_cols), 
                            input_table)
            else:
                query = "SELECT %s, %s FROM %s"%(
                            im_clause(), ', '.join(meas_cols),
                            input_table)
        elif input_table == p.object_table:
            if p.image_table and wellkey_cols:

                # If we have x and y from cells, we can use that for classifier
                if p.cell_x_loc and p.cell_y_loc:
                    FIRST_MEAS_INDEX += 2 # Cell X and Y Location are fixed to for classifier
                    # If there are well columns, fetch them from the per-image table.
                    query = "SELECT %s, %s, %s, %s, %s FROM %s, %s WHERE %s"%(
                                dbconnect.UniqueObjectClause(p.object_table),
                                well_clause(p.image_table),
                                p.cell_x_loc,
                                p.cell_y_loc,
                                ', '.join(['%s.%s'%(p.object_table, col) for col in meas_cols]),
                                p.image_table, p.object_table,
                                ' AND '.join(['%s.%s=%s.%s'%(p.image_table, c, p.object_table, c) 
                                              for c in imkey_cols]) )

                else:
                    # If there are well columns, fetch them from the per-image table.
                    query = "SELECT %s, %s, %s FROM %s, %s WHERE %s"%(
                                dbconnect.UniqueObjectClause(p.object_table),
                                well_clause(p.image_table), 
                                ', '.join(['%s.%s'%(p.object_table, col) for col in meas_cols]),
                                p.image_table, p.object_table,
                                ' AND '.join(['%s.%s=%s.%s'%(p.image_table, c, p.object_table, c) 
                                              for c in imkey_cols]) )

            else:

                if p.cell_x_loc and p.cell_y_loc:
                    FIRST_MEAS_INDEX += 2 # Cell X and Y Location are fixed to for classifier
                    
                    query = "SELECT %s, %s, %s, %s FROM %s"%(
                            im_clause(), p.cell_x_loc, p.cell_y_loc, ', '.join(meas_cols),
                            input_table)

                else:
                    query = "SELECT %s, %s FROM %s"%(
                            im_clause(), ', '.join(meas_cols),
                            input_table)

        if p.negative_control: # if the user defined negative control, we can use that to fetch the wellkeys
                    neg_query = query + ' AND ' + p.negative_control # fetch all the negative control elements

        if wellkey_cols:
            query += " ORDER BY %s"%(well_clause(p.image_table))
            
            
        dlg = wx.ProgressDialog('Computing normalized values',
                                'Querying database for raw data.',
                                parent=self,
                                style = wx.PD_CAN_ABORT|wx.PD_APP_MODAL)
        dlg.Pulse()
        #
        # MAKE THE QUERY
        # 

        input_data = np.array(db.execute(query), dtype=object)  
        if p.negative_control:
            import pandas as pd
            negative_control = pd.DataFrame(db.execute(neg_query), dtype=float)
            logging.info("# of objects in negative control: " + str(negative_control.shape[0]))
            logging.info("# of objects queried: " + str(input_data.shape[0]))
            neg_mean_plate = negative_control.groupby([WELL_KEY_INDEX]).mean()
            neg_std_plate = negative_control.groupby([WELL_KEY_INDEX]).std()

        output_columns = np.ones(input_data[:,FIRST_MEAS_INDEX:].shape) * np.nan
        output_factors = np.ones(input_data[:,FIRST_MEAS_INDEX:].shape) * np.nan
        for colnum, col in enumerate(input_data[:,FIRST_MEAS_INDEX:].T):
            keep_going, skip = dlg.Pulse("Normalizing column %d of %d"%(colnum+1, len(meas_cols))) 
            if not keep_going:
                dlg.Destroy()
                return
            norm_data = col.copy()
            for step_num, step_panel in enumerate(self.norm_steps):
                d = step_panel.get_configuration_dict()
                if d[norm.P_GROUPING] in (norm.G_QUADRANT, norm.G_WELL_NEIGHBORS):
                    # Reshape data if normalization step is plate sensitive.
                    assert p.plate_id and p.well_id
                    well_keys = input_data[:, range(WELL_KEY_INDEX, FIRST_MEAS_INDEX - 2) ] 
                    wellkeys_and_vals = np.hstack((well_keys, np.array([norm_data]).T))
                    new_norm_data    = []
                    for plate, plate_grp in groupby(wellkeys_and_vals, lambda(row): row[0]):
                        keys_and_vals = np.array(list(plate_grp))
                        plate_data, wks, ind = FormatPlateMapData(keys_and_vals)
                        pnorm_data = norm.do_normalization_step(plate_data, **d)
                        new_norm_data += pnorm_data.flatten()[ind.flatten().tolist()].tolist()
                    norm_data = new_norm_data
                elif d[norm.P_GROUPING] == norm.G_PLATE:
                    assert p.plate_id and p.well_id

                    if d[norm.P_AGG_TYPE] == norm.M_NEGCTRL:
                        mean_plate_col = neg_mean_plate[colnum + FIRST_MEAS_INDEX]
                        std_plate_col = neg_std_plate[colnum + FIRST_MEAS_INDEX]  
                        print mean_plate_col
                        print std_plate_col            

                    well_keys = input_data[:, range(WELL_KEY_INDEX, FIRST_MEAS_INDEX - 2)]
                    wellkeys_and_vals = np.hstack((well_keys, np.array([norm_data]).T))
                    new_norm_data    = []
                    # print wellkeys_and_vals
                    for plate, plate_grp in groupby(wellkeys_and_vals, lambda(row): row[0]):
                        plate_data = np.array(list(plate_grp))[:,-1].flatten()
                        pnorm_data = norm.do_normalization_step(plate_data, **d)

                        if d[norm.P_AGG_TYPE] == norm.M_NEGCTRL:
                            try:
                                plate_mean = mean_plate_col[plate]
                                plate_std = std_plate_col[plate]
                            except:
                                plate_mean = mean_plate_col[int(plate)]
                                plate_std = std_plate_col[int(plate)]

                            try:
                                pnorm_data = (pnorm_data - plate_mean) / plate_std
                                print pnorm_data
                            except:
                                logging.error("Plate std is zero, division by zero!")

                        new_norm_data += pnorm_data.tolist()
                    norm_data = new_norm_data