def assocDataTableEphysVal(dataTableOb): """Associates a data table object with ephys concept map objects """ dt = dataTableOb ds = m.DataSource.objects.get(data_table = dt) robot_user = m.get_robot_user() if dt.table_text is None: return ephysSyns = m.EphysPropSyn.objects.all() ephysSynList = [e.term.lower() for e in ephysSyns] tableTag = dt.table_html soup = BeautifulSoup(''.join(tableTag), 'lxml') headerTags = soup.findAll('th') tdTags = soup.findAll('td') allTags = headerTags + tdTags for tag in allTags: origTagText = tag.get_text() tagText = origTagText.strip() if 'id' in tag.attrs.keys(): tag_id = str(tag['id']) else: tag_id = -1 if len(tagText) == 0: continue if has_ascii_letters(tagText) is True: # SJT Note - Currently doesn't mine terms in synapse stop words list matched_ephys_ob = match_ephys_header(tagText, ephysSynList) identified_unit = get_units_from_table_header(tagText) if matched_ephys_ob: save_ref_text = origTagText[0:min(len(origTagText),199)] # create EphysConceptMap object ephysConceptMapOb = m.EphysConceptMap.objects.get_or_create(ref_text = save_ref_text, ephys_prop = matched_ephys_ob, source = ds, dt_id = tag_id, #match_quality = matchVal, changed_by = robot_user, times_validated = 0, identified_unit=identified_unit)[0]
def find_ephys_headers_in_table(table_html, early_stopping = False, early_stop_num = 2): """Given an html table as input, returns a dict of table cells and their found ephys concept maps if no ephys concepts found, returns None """ if table_html is None: return tableTag = table_html soup = BeautifulSoup(''.join(tableTag), 'lxml') headerTags = soup.findAll('th') tdTags = soup.findAll('td') allTags = headerTags + tdTags ret_dict = dict() ephysSyns = m.EphysPropSyn.objects.all() ephysSynList = [e.term.lower() for e in ephysSyns] for tag in allTags: origTagText = tag.get_text() tagText = origTagText.strip() if 'id' in tag.attrs.keys(): tag_id = str(tag['id']) else: tag_id = -1 if len(tagText) == 0: continue if has_ascii_letters(tagText) is True: # SJT Note - Currently doesn't mine terms in synapse stop words list matched_ephys_ob = match_ephys_header(tagText, ephysSynList) # identified_unit = get_units_from_table_header(tagText) if matched_ephys_ob: ret_dict[tagText] = matched_ephys_ob if early_stopping: if len(ret_dict.keys()) >= early_stop_num: return ret_dict if len(ret_dict.keys()) == 0: return None return ret_dict