def load_bd(self, filename: str) -> None: """ Loads sentence view from big_dict structure file Args: filename: file name <str> """ with open(filename) as f: data = json.load(f) column_names = [ "word", "pos_start", "pos_end", "type", "tag", "class", "tone" ] self.columns = column_names for item in data: text = item['constant']['text'] for i in range(0, len(text)): sentence = text[i] cur_sentence = graph.Node(self.graph, {}) cur_sentence["type"] = "sentence" cur_sentence["words"] = graph.NodeList(self.graph) for j in range(0, len(sentence)): word = graph.Node(self.graph, {}) for c_name in column_names: if c_name in ["class", "tone"]: word[c_name] = sentence[j]["variable"][c_name][ 0] elif c_name in ["pos_start", "pos_end"]: word["pos_start"] = str(sentence[j]["pos"][0]) word["pos_end"] = str(sentence[j]["pos"][1]) else: word[c_name] = sentence[j][c_name] cur_sentence["words"].append(word) self.sentences.append(cur_sentence)
def get_text(self, text): words = load_words_save_separators(text) wsents = get_sentences(words, text) self.columns = ["type", "pos_start", "pos_end", "word"] for sent in wsents: cur_sentence = graph.Node(self.graph, {}) cur_sentence["words"] = graph.NodeList(self.graph) cur_sentence["type"] = "sentence" for word_i in sent: word = graph.Node(self.graph, {}) word["type"] = "word" word["word"] = word_i["word"] word["pos_start"] = word_i["pos_start"] word["pos_end"] = word_i["pos_end"] cur_sentence["words"].append(word) self.sentences.append(cur_sentence)
def __init__(self, config=None): BaseView.__init__(self, config=config) self.texts = graph.NodeList(self.graph)
def load_column(self, filename: str, separator=" ", maxlen=1000, default_class=None, expand=False, maxwords=None) -> None: '''loads sentence view from file with columns file''' f = open(filename, "r") column_names = split(f.readline().strip(), separator) cur_sentence = graph.Node(self.graph, {}) cur_sentence["words"] = graph.NodeList(self.graph) self.columns = column_names line_num = 0 self.extended = expand for line in f: line = line.strip() line = line.replace("\ufeff", "") line_num = line_num + 1 if (maxwords is not None) and (line_num > maxwords): return if "<STOP>" in line or line == "" or len( cur_sentence["words"]) > maxlen: if len(cur_sentence["words"]) > 0: self.sentences.append(cur_sentence) cur_sentence = graph.Node(self.graph, {}) cur_sentence["words"] = graph.NodeList(self.graph) cur_sentence["type"] = "sentence" else: word = graph.Node(self.graph, {}) words = split(line, separator) # print (len(words)) # print (len(column_names)) #TODO: This code is horrible: need to simplify #normal situation if len(words) == len(column_names): for i in range(0, len(column_names)): word[column_names[i]] = words[i] if expand == True: nn = graph.Node(self.graph, { "type": column_names[i], "value": words[i] }) word.Connect(nn) #less words then colums - fill with default class if given if len(words) < len(column_names): if default_class is None: print( "Error: on col " + str(line_num) + " not enough data and no default class provided, skipping" ) else: #fill specified columns for i in range(0, len(words)): word[column_names[i]] = words[i] if expand is True: nn = graph.Node(self.graph, { "type": column_names[i], "value": words[i] }) word.Connect(nn) #fill unspecified colums with default value # print (range(len(words), len(self.columns))) # print (words) for i in range(len(words), len(self.columns)): word[column_names[i]] = default_class if expand is True: nn = graph.Node( self.graph, { "type": column_names[i], "value": default_class }) word.Connect(nn) if len(words) > len(column_names): #we have too many classes #fill normal range for i in range(0, len(column_names)): word[column_names[i]] = words[i] if expand == True: nn = graph.Node(self.graph, { "type": column_names[i], "value": words[i] }) word.Connect(nn) #if expand = true, fill load other classes into graph for i in range(len(column_names), len(words)): if expand == True: nn = graph.Node(self.graph, { "type": column_names[-1], "value": words[i] }) word.Connect(nn) # print (word.children({"type":column_names[-1]}).Distinct("value")) word["type"] = "word" cur_sentence["words"].append(word) if len(cur_sentence["words"]) > 0: self.sentences.append(cur_sentence)
def __init__(self, config=None): BaseView.__init__(self, config=config) self.sentences = graph.NodeList(self.graph) self.extended = False self.clean_text = ""