st.markdown("---") text_citation = st.text_input(label="Enter a citation string", value=citation_selected) parse_citation_clicked = st.button("Parse Citation") if parse_citation_clicked: citation_selected = text_citation response = requests.get(f"http://localhost:8000/parscit/{citation_selected}") json = response.json() text = json["text_tokens"] tags = json["tags"].split() # tokenize the text using white space tokenizer = WordTokenizer(tokenizer="spacy-whitespace") doc = tokenizer.nlp(" ".join(text)) # start index of every token token_indices = [token.idx for token in doc] # get start end index of every word start_end_indices = itertools.zip_longest( token_indices, token_indices[1:], fillvalue=len(" ".join(text)) ) start_end_indices = list(start_end_indices) HTML_WRAPPER = """<div style="overflow-x: auto; border: 1px solid #e6e9ef; border-radius: 0.25rem; padding: 1rem; margin-bottom: 2.5rem">{}</div>""" ents = [] for tag, (start_idx, end_idx) in zip(tags, start_end_indices): ents.append({"start": start_idx, "end": end_idx, "label": tag})
f"{header} ({normalized})" for header, normalized in zip( section_headers, normalized_section_headers) ] st.write("### Sections (Normalized Sections)") st.write( HTML_WRAPPER.format("<br />".join(header_normalized_header)), unsafe_allow_html=True, ) st.write("### Parsed References. ") for reference, tags in zip(references, parsed_reference_strings): HTML_WRAPPER = """<div style="overflow-x: auto; border: 1px solid #e6e9ef; border-radius: 0.25rem; padding: 1rem; margin-bottom: 2.5rem">{}</div>""" tokenizer = WordTokenizer(tokenizer="spacy-whitespace") doc = tokenizer.nlp(reference) # start index of every token token_indices = [token.idx for token in doc] # get start end index of every word start_end_indices = itertools.zip_longest(token_indices, token_indices[1:], fillvalue=len(reference)) start_end_indices = list(start_end_indices) ents = [] for tag, (start_idx, end_idx) in zip(tags.split(), start_end_indices): ents.append({"start": start_idx, "end": end_idx, "label": tag})