def analyze(self, text, level, keyword_level): in_text = nlp_pb2.InputText() in_text.text = text in_text.lang = lang_pb2.kor in_text.split_sentence = True in_text.use_tokenizer = False in_text.level = level in_text.keyword_frequency_level = keyword_level ret = self.stub.Analyze(in_text) # JSON Object 로 만들어 낸다. printer = json_format._Printer(True, True) doc = printer._MessageToJsonObject(ret) print doc # JSON text로 만들어낸다. json_text = json_format.MessageToJson(ret, True, True) print json_text for i in range(len(ret.sentences)): text = ret.sentences[i].text analysis = ret.sentences[i].morps morp = "" for j in range(len(analysis)): morp = morp + " " + analysis[j].lemma + "/" + analysis[j].type morp = morp.encode('utf-8').strip() addstr = 'morp -> ' + morp print addstr ner = ret.sentences[i].nes for j in range(len(ner)): ne = ner[j].text + "/" + ner[j].type ne = ne.encode('utf-8').strip() addNE = 'NE -> ' + ne print addNE
def get_json_data(self): """ :return: The protobuf data created by the analysis as a json object. see get_protobuf_data for more details. The json fields are defined by https://github.com/SaltieRL/carball/tree/master/api """ printer = _Printer() js = printer._MessageToJsonObject(self.protobuf_game) return js
def write_json_out_to_file(self, file: IO): """ Writes the json data to the specified file, as text. NOTES: The data is written as text (i.e. string), and the buffer mode must be 'w'. E.g. open(file_name, 'w') :param file: The file object (or a buffer). """ if 'b' in file.mode: raise IOError("Json files can not be binary use open(path,\"w\")") printer = _Printer() js = printer._MessageToJsonObject(self.protobuf_game) json.dump(js, file, indent=2, cls=CarballJsonEncoder)
def analyze(self, text, level, keyword_level): in_text = nlp_pb2.InputText() in_text.text = text in_text.lang = lang_pb2.kor in_text.split_sentence = True in_text.use_tokenizer = False in_text.level = level in_text.keyword_frequency_level = keyword_level ret = self.stub.Analyze(in_text) # JSON Object 로 만들어 낸다. printer = json_format._Printer(True, True) doc = printer._MessageToJsonObject(ret) #print doc # JSON text로 만들어낸다. json_text = json_format.MessageToDict(ret, True, True) return json_text
def analyze(self, text, level, keyword_level): in_text = nlp_pb2.InputText() in_text.text = text in_text.lang = lang_pb2.kor in_text.split_sentence = True in_text.use_tokenizer = False in_text.level = level in_text.keyword_frequency_level = keyword_level ret = self.stub.Analyze(in_text) # JSON Object 로 만들어 낸다. printer = json_format._Printer(True, True) doc = printer._MessageToJsonObject(ret) ret_txt = text_format.MessageToString(ret, False, False) # print doc # JSON text 로 만들어낸다. json_text = json_format.MessageToJson(ret, True, True) # print json_text readable_text = '' for idx in range(len(ret.sentences)): text = ret.sentences[idx].text analysis = ret.sentences[idx].morps morp = "" for ana_idx in range(len(analysis)): morp += " {0}/{1}".format(analysis[ana_idx].lemma, analysis[ana_idx].type) morp = morp.encode('utf-8').strip() add_morp = "morp -> {0}".format(morp) # print add str readable_text += add_morp + '\n' ner = ret.sentences[idx].nes for ner_idx in range(len(ner)): ne = "{0}/{1}".format(ner[ner_idx].text, ner[ner_idx].type) ne = ne.encode('utf-8').strip() add_ne = 'NE -> ' + ne # print add NE readable_text += add_ne + '\n' return readable_text, json_text, ret
def getjson(message, including_default_value_fields=False, preserving_proto_field_name=False, indent=2, sort_keys=False, use_integers_for_enums=False, descriptor_pool=None): try: message = message() except: pass try: ms = message.ListFields() if not ms: printer = _Printer(including_default_value_fields, preserving_proto_field_name, use_integers_for_enums, descriptor_pool) return printer.ToJsonString(message, indent, sort_keys) except: pass printer = Printer(including_default_value_fields, preserving_proto_field_name, use_integers_for_enums, descriptor_pool) return printer.ToJsonString(message, indent, sort_keys)
def render_html(self): json_obj = {"nodes": [], "links": []} json_printer = _Printer() for op in self._proto.op: op_json = json_printer._MessageToJsonObject(op) op_json["id"] = op_json["name"] op_json["node_type"] = "op" json_obj["nodes"].append(op_json) for tensor in self._proto.tensors: tensor_json = json_printer._MessageToJsonObject(tensor) tensor_json["id"] = tensor_json["name"] if "floatData" in tensor_json and \ len(tensor_json["floatData"]) > THREASHOLD: del tensor_json["floatData"] if "int32Data" in tensor_json and \ len(tensor_json["int32Data"]) > THREASHOLD: del tensor_json["int32Data"] tensor_json["node_type"] = "tensor" json_obj["nodes"].append(tensor_json) node_ids = [node["id"] for node in json_obj["nodes"]] tensor_to_op = {} for op in self._proto.op: for tensor in op.output: tensor_to_op[tensor] = op.name for op in json_obj["nodes"]: if "input" in op: for input in op["input"]: if input in node_ids and op["name"] in node_ids: # for weights json_obj["links"].append({ "source": input, "target": op["name"] }) elif input in tensor_to_op and \ tensor_to_op[input] in node_ids: # for intermediate tensor json_obj["links"].append({ "source": tensor_to_op[input], "target": op["name"] }) else: # for input json_obj["nodes"].append({ "id": input, "name": input, "node_type": "input" }) json_obj["links"].append({ "source": input, "target": op["name"] }) json_msg = json.dumps(json_obj, cls=NPEncoder) cwd = os.path.dirname(__file__) with open(cwd + "/index.html") as f: html = f.read() return html % json_msg
def write_json_out_to_file(self, file): printer = _Printer() js = printer._MessageToJsonObject(self.protobuf_game) json.dump(js, file, indent=2, cls=CarballJsonEncoder)
def analyze(self, text, level, keyword_level): in_text = nlp_pb2.InputText() in_text.text = text in_text.lang = lang_pb2.kor in_text.split_sentence = True in_text.use_tokenizer = False in_text.level = level in_text.keyword_frequency_level = keyword_level ret = self.stub.Analyze(in_text) # JSON Object 로 만들어 낸다. printer = json_format._Printer(True, True) doc = printer._MessageToJsonObject(ret) ret_txt = text_format.MessageToString(ret, False, False) # print doc # JSON text 로 만들어낸다. json_text = json_format.MessageToJson(ret, True, True) # print json_text readable_text = '' for idx in range(len(ret.sentences)): text = ret.sentences[idx].text analysis = ret.sentences[idx].morps morp = "" for ana_idx in range(len(analysis)): if analysis[ana_idx].type in ['VV', 'VA', 'VX', 'VCP']: morp += " {0}다/{1}".format(analysis[ana_idx].lemma, analysis[ana_idx].type) else: morp += " {0}/{1}".format(analysis[ana_idx].lemma, analysis[ana_idx].type) morp = morp.encode('utf-8').strip() add_morp = "morp -> {0}".format(morp) # print add str readable_text += add_morp + '\n' ner = ret.sentences[idx].nes for ner_idx in range(len(ner)): if ner[ner_idx].type == 'VV' or ner[ ner_idx].type == 'VA' or ner[ ner_idx].type == 'VX' or ner[ner_idx].type == 'VCP': ne = "{0}다/{1}".format(ner[ner_idx].text, ner[ner_idx].type) else: ne = "{0}/{1}".format(ner[ner_idx].text, ner[ner_idx].type) ne = ne.encode('utf-8').strip() add_ne = 'NE -> ' + ne # print add NE readable_text += add_ne + '\n' # Make nlp sentence json_data = json.loads(json_text) word_list = list() for sentence in json_data['sentences']: for words in sentence['words']: tagged_text = words['tagged_text'] tagged_text_list = tagged_text.split() for tagged_word in tagged_text_list: word = tagged_word.split("/")[0] tag = tagged_word.split("/")[1] if tag in ['VV', 'VA', 'VX', 'VCP', 'VCN']: word += u"\ub2e4" word_list.append(word) nlp_sent = " ".join(word_list) # Modify json data for sentence in json_data['sentences']: for words in sentence['words']: tagged_text = words['tagged_text'] if '/VV' in tagged_text: words['tagged_text'] = words['tagged_text'].replace( "/VV", u"\ub2e4/VV") if '/VA' in tagged_text: words['tagged_text'] = words['tagged_text'].replace( "/VA", u"\ub2e4/VA") if '/VX' in tagged_text: words['tagged_text'] = words['tagged_text'].replace( "/VX", u"\ub2e4/VX") if '/VCP' in tagged_text: words['tagged_text'] = words['tagged_text'].replace( "/VCP", u"\ub2e4/VCP") if '/VCN' in tagged_text: words['tagged_text'] = words['tagged_text'].replace( "/VCN", u"\ub2e4/VCN") for sentence in json_data['sentences']: for morps in sentence['morps']: if morps['type'] == 'VV': morps['lemma'] += u"\ub2e4" elif morps['type'] == 'VA': morps['lemma'] += u"\ub2e4" elif morps['type'] == 'VX': morps['lemma'] += u"\ub2e4" elif morps['type'] == 'VCP': morps['lemma'] += u"\ub2e4" elif morps['type'] == 'VCN': morps['lemma'] += u"\ub2e4" return nlp_sent, json.dumps(json_data), ret