def serve_plain_abstract(empty=False): random_abstract = {"abstract": "", "title": ""} doi = "" if not empty: builder = AnnotationBuilder(local=True) # get a random paragraph] random_abstract = builder.get_abstract(good_ones=False) doi = random_abstract['doi'] return [ html.Div([ html.Span("doi: "), html.A( doi, href="https://doi.org/" + doi, target="_blank", id="doi_container")], className="row", style={"paddingBottom": "10px"}), html.Div( random_abstract["title"], style={'fontSize': 'large', "padding": "8px 0px", "borderTop": "1px solid black"} ), html.Div( random_abstract["abstract"], style={"borderBottom": "1px solid black", "padding-bottom": "10px"} ), html.Div(serve_buttons()) ]
def serve_abstract(db, user_key, empty=False, show_labels=None, doi=None, past_tokens=None): """Returns a random abstract and refreshes annotation options""" if empty: tokens = [] existing_labels = [] else: builder = AnnotationBuilder(local=True) # get a random paragraph random_abstract = builder.get_abstract(good_ones=False, doi=doi, user_key=user_key, only_relevant=True) doi = random_abstract['doi'] # tokenize and get initial annotation cems = False # if show_labels is not None and "MAT" in show_labels: # cems = True tokens, existing_labels = builder.get_tokens(random_abstract, user_key, cems) if past_tokens is not None: tokens = past_tokens # labels for token-by-token annotation labels = AnnotationBuilder.LABELS macro_display = "none" passive_labels = [] if show_labels is not None: passive_labels = [pl for pl in labels if pl["value"] in existing_labels and pl["value"] not in show_labels] labels = [label for label in labels if label["value"] in show_labels] if "application" in show_labels: macro_display = "block" return [ html.Div([ html.Span("doi: "), html.A( doi, href="https://doi.org/" + str(doi), target="_blank", id="doi_container")], className="row", style={"paddingBottom": "10px"}), dmi.AnnotationContainer( doi=doi, tokens=tokens, labels=labels, passiveLabels=passive_labels, className="annotation-container", selectedValue=labels[0]['value'], id="annotation_container" ), html.Div(serve_macro_annotation(db, macro_display), id="macro_annotation_container"), html.Div("", className="row instructions", id="annotation_instructions"), html.Div(serve_buttons(), id="buttons_container", className="row") ]
def serve_layout(_, user_key, __): builder = AnnotationBuilder(local=True) my_annotations = builder.get_annotations(user=user_key) children = [] for annotation in my_annotations: children.append(html.Li([ html.Span([token["text"] + " " for token in annotation.tokens[0]], style={"fontWeight": "bold"}), html.Br(), html.Span(annotation.doi), html.Span(" "), serve_ann_options(quote(annotation.doi, safe="")), html.Br(), html.Span(str(annotation.labels)) ])) return html.Div([html.H5("My Annotated Abstracts"), html.Ol(children)])
def load_next_abstract( skip_clicks, confirm_clicks, flag_clicks, tokens, doi, abstract_tags, user_key, annotation_labels, previous_labels): labels = [label["value"] for label in AnnotationBuilder.LABELS] if annotation_labels is not None: labels = annotation_labels.split('&') new_labels = labels if len(previous_labels) > 0: new_labels = list(set(labels).union([label["value"] for label in previous_labels])) builder = AnnotationBuilder(local=True) if builder.get_username(user_key) is not None: if confirm_clicks is not None: tags = [tag["value"].lower() for tag in abstract_tags] if abstract_tags is not None else None annotation = TokenAnnotation(doi=doi, tokens=tokens, labels=new_labels, tags=tags, user=user_key) builder.insert(annotation, builder.ANNOTATION_COLLECTION) builder.update_tags(tags) doi = None elif flag_clicks is not None: macro_ann = MacroAnnotation(doi=doi, relevant=None, flag=True, abs_type=None, user=user_key) builder.insert(macro_ann, builder.MACRO_ANN_COLLECTION) doi = None if skip_clicks is not None: doi = None # to load a new abstract past_tokens = tokens if doi is not None else None # reload tokens from previous annotation return token_ann_app.serve_abstract(db, user_key, show_labels=labels, past_tokens=past_tokens, doi=doi)
def serve_layout(_, user_key, __): builder = AnnotationBuilder() leaderboard = builder.get_leaderboard(user_key) if leaderboard is not None: header = build_row([ "User", "Macro Abstracts", "Token Abstracts", "Total Label Types" ], "three columns table-header") user_rows = [] for user in leaderboard: user_rows.append( build_row([ builder.get_username(user), leaderboard[user]["macro_abstracts"], leaderboard[user]["token_abstracts"], leaderboard[user]["labels"], ], "three columns table-row")) return html.Div([html.H5("Annotation Leaderboard")] + [header] + user_rows) return "Not Authorized"
def serve_abstract(user_key, doi=None): """Returns a random abstract and refreshes annotation options""" builder = AnnotationBuilder(local=False) diff_tokens, message = builder.get_diff_tokens(doi=doi, user=user_key) if diff_tokens is not None: return [ html.Div([ html.Span("doi: "), html.A(doi, href="https://doi.org/" + str(doi), target="_blank", id="doi_container") ], className="row", style={"paddingBottom": "10px"}), dmi.AnnotationContainer(doi=doi, tokens=diff_tokens, labels=[], className="annotation-container", selectedValue=None, id="annotation_container"), ] return message
def load_next_macro_ann( not_rel_click, skip_click, confirm_click, flag_click, doi, abs_type, user_key): flag = False if confirm_click is not None: relevant = True elif not_rel_click is not None: relevant = False elif flag_click is not None: relevant = None flag = True else: # either skip is clicked or first load return macro_ann_app.serve_plain_abstract() builder = AnnotationBuilder(local=True) if builder.get_username(user_key) is not None: macro_ann = MacroAnnotation(doi, relevant, flag, abs_type, user=user_key) builder.insert(macro_ann, builder.MACRO_ANN_COLLECTION) return macro_ann_app.serve_plain_abstract()
def set_user_info(user_key): builder = AnnotationBuilder(local=True) username = builder.get_username(user_key) return annotate_app.serve_auth_info(username)
def _auth_message(n_clicks, user_key): if n_clicks is not None: builder = AnnotationBuilder(local=True) if builder.get_username(user_key) is None: return "Not authorised - did not save!" return ""
y_train = [outcomes[i] for i in train_range] y_test = outcomes[begin:end] cv_list.append(((X_train, y_train), (X_test, y_test))) return cv_list @property def features_outcomes(self): return self._features_outcomes @property def train_test_set(self): features, outcomes = self._features_outcomes cutoff = int(self.train_test_split * len(outcomes)) cumulative_wordcount = np.cumsum(self.words_per_doc) cutoff = min(cumulative_wordcount, key=lambda x: abs(x - cutoff)) X_train = features.tocsr()[:cutoff] X_test = features.tocsr()[cutoff:] y_train = outcomes[:cutoff] y_test = outcomes[cutoff:] return (X_train, y_train), (X_test, y_test) if __name__ == '__main__': builder = AnnotationBuilder() annotations = builder.get_annotations(user='******') annotations = [annotated.to_iob()[0] for annotated in annotations] annotations = [[[((word, pos), tag) for word, pos, tag in sent] for sent in doc] for doc in annotations ] # this line makes my code compatible with Vahe's feature_generator = FeatureGenerator(train_test_split=0.75) features, outcomes = feature_generator.fit_transform(annotations)