Пример #1
0
 def test_rep_html_table_struct_complex(self):
     expected_table_output = [['td-1', 0, 0, 0, 0, 0, 0, 0, 0, 0],
                               ['td-2', 'td-2', 'td-2', 'td-2', 'td-2', 'td-2', 'td-2', 'td-2', 'td-2', 'td-2'],
                               ['td-3', 'td-4', 'td-4', 'td-5', 'td-5', 'td-6', 'td-6', 0, 0, 0],
                               ['td-3', 'td-7', 'td-8', 'td-9', 'td-10', 'td-11', 'td-12', 0, 0, 0],
                               ['td-13', 'td-13', 'td-13', 'td-13', 'td-13', 'td-13', 'td-13', 'td-13', 'td-13', 'td-13'],
                               ['td-14', 'td-15', 'td-16', 'td-17', 'td-18', 'td-19', 'td-20', 0, 0, 0],
                               ['td-21', 'td-22', 'td-23', 'td-24', 'td-25', 'td-26', 'td-27', 0, 0, 0],
                               ['td-28', 'td-29', 'td-30', 'td-31', 'td-32', 'td-33', 'td-34', 0, 0, 0],
                               ['td-35', 'td-36', 'td-37', 'td-38', 'td-39', 'td-40', 'td-41', 0, 0, 0],
                               ['td-42', 'td-43', 'td-44', 'td-45', 'td-46', 'td-47', 'td-48', 0, 0, 0],
                               ['td-49', 'td-50', 'td-51', 'td-52', 'td-53', 'td-54', 'td-55', 0, 0, 0],
                               ['td-56', 'td-57', 'td-58', 'td-59', 'td-60', 'td-61', 'td-62', 0, 0, 0],
                               ['td-63', 'td-64', 'td-65', 'td-66', 'td-67', 'td-68', 'td-69', 0, 0, 0],
                               ['td-70', 'td-71', 'td-72', 'td-73', 'td-74', 'td-75', 'td-76', 0, 0, 0],
                               ['td-77', 'td-78', 'td-79', 'td-80', 'td-81', 'td-82', 'td-83', 0, 0, 0]]
     html_table_text = self.load_html_table_complex
     a, b, html_id_table = rep_html_table_struct(html_table_text)
     self.assertEqual(html_id_table, expected_table_output)
Пример #2
0
 def test_rep_html_table_struct_simple(self):
     expected_table_output = [['th-1', 'th-2', 'th-3', 'th-4', 'th-5', 'th-6'],
                              ['td-1', 'td-1', 'td-1', 'td-1', 'td-1', 'td-1'],
                              ['td-2', 'td-3', 'td-4', 'td-5', 'td-6', 'td-7'],
                              ['td-8', 'td-9', 'td-10', 'td-11', 'td-12', 'td-13'],
                              ['td-14', 'td-15', 'td-16', 'td-17', 'td-18', 'td-19'],
                              ['td-20', 'td-21', 'td-22', 'td-23', 'td-24', 'td-25'],
                              ['td-26', 'td-27', 'td-28', 'td-29', 'td-30', 'td-31'],
                              ['td-32', 'td-33', 'td-34', 'td-35', 'td-36', 'td-37'],
                              ['td-38', 'td-39', 'td-40', 'td-41', 'td-42', 'td-43'],
                              ['td-44', 'td-45', 'td-46', 'td-47', 'td-48', 'td-49'],
                              ['td-50', 'td-51', 'td-52', 'td-53', 'td-54', 'td-55'],
                              ['td-56', 'td-57', 'td-58', 'td-59', 'td-60', 'td-61'],
                              ['td-62', 'td-63', 'td-64', 'td-65', 'td-66', 'td-67'],
                              ['td-68', 'td-69', 'td-70', 'td-71', 'td-72', 'td-73'],
                              ['td-74', 'td-75', 'td-76', 'td-77', 'td-78', 'td-79'],
                              ['td-80', 'td-81', 'td-82', 'td-83', 'td-84', 'td-85'],
                              ['td-86', 'td-87', 'td-88', 'td-89', 'td-90', 'td-91'],
                              ['td-92', 'td-93', 'td-94', 'td-95', 'td-96', 'td-97'],
                              ['td-98', 'td-99', 'td-100', 'td-101', 'td-102', 'td-103'],
                              ['td-104', 'td-105', 'td-106', 'td-107', 'td-108', 'td-109']]
     html_table_text = self.load_html_table_simple
     a, b, html_id_table = rep_html_table_struct(html_table_text)
     self.assertEqual(html_id_table, expected_table_output)
def find_data_vals_in_table(data_table_object):
    """Parses neuroelectro.models DataTable object for assigned NeuronConceptMaps and EphysConceptMaps
        and returns a dictionary of data values at the row and column intersection of these

    Args:
        data_table_object: neuroelectro.models DataTable object with associated NeuronConceptMap and EphysConceptMap
                            objects.
    Returns:
        (dict) return_dict: a dictionary with keys corresponding to html tag elements from the entered data table
                            and fields: 'ncm_pk', 'ecm_pk', 'efcm_pk_list', and 'data_dict'

    Example:
        >>> find_data_vals_in_table(data_table_ob)
            {'td-69':
                {'ncm_pk': 1L, 'ecm_pk': 1L, 'ref_text': 'ref_text': u'463\xa0\xb1\xa089\xa0 (15)', 'data_value_dict':
                    {'num_obs': 15, 'max_range': None, 'min_range': None, 'value': 463.0, 'error': 89.0},
                 'efcm_pk_list': []
                 }
            }
    """

    # check that data_table_object has both ephys obs and neuron concept obs
    return_dict = dict()
    try:
        table_soup = BeautifulSoup(data_table_object.table_html, 'lxml')
        ds = m.DataSource.objects.get(data_table=data_table_object)
        ecm_obs = ds.ephysconceptmap_set.all()
        ncm_obs = ds.neuronconceptmap_set.all()
        efcm_obs = ds.expfactconceptmap_set.all()
        # first check if there are ephys and neuron concept maps assigned to table
        if ecm_obs.count() > 0 and ncm_obs.count() > 0:

            # returns a flattened, parsed form of table where data table cells can be easily checked for associated concept maps
            dataTable, numHeaderRows, html_tag_id_table = rep_html_table_struct(data_table_object.table_html)

            # if dataTable or idTable is none, parsing table failed, so return
            if dataTable is None or html_tag_id_table is None:
                return dict()

            # for each neuron concept map
            for ncm in ncm_obs:
                ncm_html_tag_id = ncm.dt_id

                # the same ncm may be linked to multiple data table value cells due to rowspan/colspan issues
                matching_neuron_cells = get_matching_inds(ncm_html_tag_id, html_tag_id_table)
                for ecm in ecm_obs:
                    ecm_html_tag_id = ecm.dt_id
                    if ecm_html_tag_id == '-1' or len(html_tag_id_table) == 0:
                        continue

                    # the same ecm may be linked to multiple data table cells
                    matching_ephys_cells = get_matching_inds(ecm_html_tag_id, html_tag_id_table)

                    # iterate through all matched cells, finding corresponding data value cells at their intersection
                    for c1 in matching_neuron_cells:
                        ncm_row_ind = c1[0]
                        ncm_col_ind = c1[1]
                        for c2 in matching_ephys_cells:
                            ecm_row_ind = c2[0]
                            ecm_col_ind = c2[1]

                            # the max below is saying data cells are usually to the right and bottom of header cells
                            table_cell_row_ind = max(ncm_row_ind, ecm_row_ind)
                            table_cell_col_ind = max(ncm_col_ind, ecm_col_ind)
                            table_cell_html_tag_id = html_tag_id_table[table_cell_row_ind][table_cell_col_ind]
                            data_tag = table_soup.find(id=table_cell_html_tag_id)
                            if data_tag is None:
                                continue
                            ref_text = data_tag.get_text()

                            # regex out the floating point values of data value string
                            data_dict = resolve_data_float(ref_text, True)

                            if data_dict['value']:

                                # check for experimental factor concept maps
                                if efcm_obs is not None:
                                    # get efcm and add it to nedm
                                    efcm_pk_list = []
                                    for efcm in efcm_obs:

                                        efcm_html_tag_id = efcm.dt_id

                                        # get table cells for this efcm
                                        matching_efcm_cells = get_matching_inds(efcm_html_tag_id, html_tag_id_table)

                                        # if any of the efcm cells match up with the current cell, add it to the list
                                        matching_rows = [e[0] for e in matching_efcm_cells]
                                        matching_cols = [e[1] for e in matching_efcm_cells]
                                        if table_cell_row_ind in matching_rows or table_cell_col_ind in matching_cols:
                                            # if efcm is num obs, store value in appropriate place and don't add to metadata list
                                            if efcm.metadata.name == 'NumObs' and data_dict['num_obs'] is None:
                                                data_dict['num_obs'] = efcm.metadata.cont_value.mean
                                            else:
                                                efcm_pk_list.append(efcm.pk)

                                temp_return_dict = dict()
                                temp_return_dict['ncm_pk'] = ncm.pk
                                temp_return_dict['ecm_pk'] = ecm.pk
                                temp_return_dict['ref_text'] = ref_text
                                temp_return_dict['data_value_dict'] = data_dict
                                temp_return_dict['efcm_pk_list'] = efcm_pk_list

                                return_dict[table_cell_html_tag_id] = temp_return_dict

        return return_dict
    except TypeError:
        return dict()