def _Load_Each_JsonData(self, path=None, path_id=0, train=False): assert path is not None, "The Data Path Is Not Allow Empty." insts = [] now_lines = 0 # print() with open(path, encoding="UTF-8") as f: lines = f.readlines() for line in lines: now_lines += 1 if now_lines % 2000 == 0: sys.stdout.write( "\rreading the {} line\t".format(now_lines)) if line == "\n": print("empty line") inst = Instance() line_json = json.loads(line) fact = line_json["fact"].split()[:self.max_train_len] bert_line = "".join(fact) # accu label accu = line_json["meta"]["accusation"] # print(accu) # law label law = line_json["meta"]["relevant_articles"] inst.words = fact inst.bert_line = bert_line[:self.bert_max_char_length] inst.accu_labels = accu inst.law_labels = law inst.words_size = len(inst.words) inst.accu_labels_size = len(inst.accu_labels) inst.law_labels_size = len(inst.law_labels) insts.append(inst) if len(insts) == self.max_count: break sys.stdout.write("\rreading the {} line\t".format(now_lines)) if self.use_bert: insts = self._read_bert_file(insts, path=self.bert_path[path_id]) return insts
def _Load_Each_Data(self, path=None, shuffle=False): """ :param path: :param shuffle: :return: """ #pinyin1 = Pinyin() assert path is not None, "The Data Path Is Not Allow Empty." insts = [] now_lines = 0 with open(path, encoding="UTF-8") as f: inst = Instance() data = [line.strip().split() for line in f] for line in data: #line = line.strip() inst = Instance() #line = line.split() label = line[0] #word = line[2:-1] # No LDA word = line[2:] # Have LDA #pinyin = pinyin1.get_pinyin(word," ", tone_marks= None) #word = " ".join(line[1:]) inst.words = word inst.labels.append(label) inst.words_size = len(inst.words) #inst.pinyin = pinyin insts.append(inst) if len(insts) == self.max_count: break ''' 得到原始的数据,可只用于test中 或者将它写入txt 对比 print('$$$$$$$$$$$$$$$$$') for k in insts: print(k.labels, '****', k.words) print('################') ''' return insts
def _Load_Each_Data(self, path=None, path_id=None): """ :param path: :param shuffle: :return: """ assert path is not None, "The Data Path Is Not Allow Empty." insts = [] now_lines = 0 with open(path, encoding="UTF-8") as f: inst = Instance() for line in f.readlines(): line = line.strip() now_lines += 1 if now_lines % 200 == 0: sys.stdout.write("\rreading the {} line\t".format(now_lines)) if line == "\n": print("empty line") inst = Instance() line = line.split() label = line[0] word = " ".join(line[1:]) if label not in ["0", "1"]: print("Error line: ", " ".join(line)) continue inst.words = self._clean_str(word).split() inst.labels.append(label) inst.words_size = len(inst.words) insts.append(inst) if len(insts) == self.max_count: break # print("\n") if self.use_bert: insts = self._read_bert_file(insts, path=self.bert_path[path_id]) return insts