def from_csv(cls, file_path: str): data_df = pd.read_csv(file_path, usecols=['text']) seq_vocab = TokenDictionary() label_dict = Dictionary() max_seq_len = 0 for i, row in data_df.iterrows(): tokens = row['text'].split(' ') max_seq_len = max(max_seq_len, len(tokens)) seq_vocab.add_items(tokens) labels = row['label'].split(' ') label_dict.add_items(labels) return cls(data_df, max_seq_len, seq_vocab, label_dict)
def get_label_dict(labels: List[str], delimiter=' '): label_dict = Dictionary() for label in labels: label = label.replace('\n', '') label_dict.add_items(label.split(delimiter)) return label_dict