def _make_headers_df(headers_response): """ Parses the headers portion of the watson response and creates the header dataframe. :param headers_response: the ``row_header`` or ``column_header`` array as returned from the Watson response, :return: the completed header dataframe """ headers_df = util.make_dataframe(headers_response) headers_df = headers_df[[ "text", "column_index_begin", "column_index_end", "row_index_begin", "row_index_end", "cell_id", "text_normalized" ]] return headers_df
def _make_body_cells_df(body_cells_response): """ parses the body_cells portion of the watson response and creates the body_cells dataframe. :param body_cells_response: the "body cells" array as returned from the watson response :return: the completed body_cells dataframe """ body_cells_df = util.make_dataframe(body_cells_response) if not "attributes.type" in body_cells_df.columns.to_list(): body_cells_df["attributes.type"] = None body_cells_df["attributes.text"] = None body_cells_df = body_cells_df[[ "text", "column_index_begin", "column_index_end", "row_index_begin", "row_index_end", "cell_id", "column_header_ids", "column_header_texts", "row_header_ids", "row_header_texts", "attributes.text", "attributes.type" ]] return body_cells_df
def parse_response( response: Dict[str, Any], original_text: str = None, apply_standard_schema: bool = False) -> Dict[str, pd.DataFrame]: """ Parse a Watson NLU response as a decoded JSON string, e.g. dictionary containing requested features and convert into a dict of Pandas DataFrames. The following features in the response will be converted: * entities * entity_mentions (elements of the "mentions" field of `response["entities"]`) * keywords * relations * semantic_roles * syntax For information on getting started with Watson Natural Language Understanding on IBM Cloud, see https://cloud.ibm.com/docs/natural-language-understanding?topic=natural-language-understanding-getting-started. A Python SDK for authentication and making requests to the service is provided at https://github.com/watson-developer-cloud/python-sdk. Details on the supported features and available options when making the request can be found at https://cloud.ibm.com/apidocs/natural-language-understanding?code=python#analyze-text. .. note:: Additional feature data in response will not be processed >>> response = natural_language_understanding.analyze( ... url="https://raw.githubusercontent.com/CODAIT/text-extensions-for-pandas/master/resources/holy_grail.txt", ... return_analyzed_text=True, ... features=Features( ... entities=EntitiesOptions(sentiment=True), ... keywords=KeywordsOptions(sentiment=True, emotion=True), ... relations=RelationsOptions(), ... semantic_roles=SemanticRolesOptions(), ... syntax=SyntaxOptions(sentences=True, tokens=SyntaxOptionsTokens(lemma=True, part_of_speech=True)) ... )).get_result() >>> dfs = parse_response(response) >>> dfs.keys() dict_keys(['syntax', 'entities', 'keywords', 'relations', 'semantic_roles']) >>> dfs["syntax"].head() span part_of_speech lemma \ 0 [0, 5): 'Monty' PROPN None 1 [6, 12): 'Python' PROPN python sentence 0 [0, 273): 'Monty Python and the Holy Grail is ... 1 [0, 273): 'Monty Python and the Holy Grail is ... :param response: A dictionary of features from the IBM Watson NLU response :param original_text: Optional original text sent in request, if None will look for "analyzed_text" keyword in response :param apply_standard_schema: Return DataFrames with a set schema, whether data was present in the response or not :return: A dictionary mapping feature name to a Pandas DataFrame """ dfs = {} if original_text is None and "analyzed_text" in response: original_text = response["analyzed_text"] # Create the syntax DataFrame syntax_response = response.get("syntax", {}) token_df, sentence_df = _make_syntax_dataframes(syntax_response, original_text) sentence_series = sentence_df.get("sentence_span") if sentence_series is not None: syntax_df = _merge_syntax_dataframes(token_df, sentence_series) else: syntax_df = pd.concat([token_df, sentence_df], axis=1) dfs["syntax"] = util.apply_schema(syntax_df, _syntax_schema, apply_standard_schema) if original_text is None and "span" in dfs["syntax"].columns: char_span = dfs["syntax"]["span"] if isinstance(char_span, SpanArray): original_text = dfs["syntax"]["span"].target_text else: warnings.warn("Did not receive and could not build original text") # Create the entities DataFrames entities = response.get("entities", []) entities_df, entity_mentions_df = _make_entity_dataframes( entities, original_text) dfs["entities"] = util.apply_schema(entities_df, _entities_schema, apply_standard_schema) dfs["entity_mentions"] = util.apply_schema(entity_mentions_df, _entity_mentions_schema, apply_standard_schema) # Create the keywords DataFrame keywords = response.get("keywords", []) keywords_df = util.make_dataframe(keywords) dfs["keywords"] = util.apply_schema(keywords_df, _keywords_schema, apply_standard_schema) # Create the relations DataFrame relations = response.get("relations", []) relations_df = _make_relations_dataframe(relations, original_text, sentence_series) dfs["relations"] = util.apply_schema(relations_df, _relations_schema, apply_standard_schema) # Create the semantic roles DataFrame semantic_roles = response.get("semantic_roles", []) semantic_roles_df = util.make_dataframe(semantic_roles) dfs["semantic_roles"] = util.apply_schema(semantic_roles_df, _semantic_roles_schema, apply_standard_schema) if "warnings" in response: # TODO: check structure of warnings and improve message warnings.warn(str(response["warnings"])) return dfs