def load_cnndm_data(args, data_type, dump=False): if dump: data = cnndm.load_data(args, data_type) summary = cnndm.load_summary(args, data_type) articles = [] for encoded_words in data['encoded_articles']: # encoded_sentences = [] article = TopicSegment() l = len(encoded_words) - 1 for i, x in enumerate(encoded_words): if x == 101: # CLS sentence = [] elif x == 102: # SEP utt = Utterance(sentence, -1, -1, -1) article.add_utterance(utt) elif x == 100: # UNK break else: sentence.append(x) if i == l: utt = Utterance(sentence, -1, -1, -1) article.add_utterance(utt) articles.append([article]) abstracts = [] for encoded_abstract in summary['encoded_abstracts']: if 103 in encoded_abstract: last_idx = encoded_abstract.index(103) encoded_abstract = encoded_abstract[:last_idx] encoded_abstract.append(102) encoded_abstract.append(103) abstracts.append(encoded_abstract) cnndm_data = [] for x, y in zip(articles, abstracts): cnndm_data.append((x, y, y)) else: path = "/home/alta/summary/pm574/summariser1/lib/model_data/cnndm-191216.{}.pk.bin".format( data_type) with open(path, 'rb') as f: cnndm_data = pickle.load(f, encoding="bytes") return cnndm_data
def load_cnndm_data(args, data_type, dump=False): if dump: data = cnndm.load_data(args, data_type) summary = cnndm.load_summary(args, data_type) articles = [] for encoded_words in data['encoded_articles']: # encoded_sentences = [] article = TopicSegment() l = len(encoded_words) - 1 for i, x in enumerate(encoded_words): if x == 101: # CLS sentence = [] elif x == 102: # SEP utt = Utterance(sentence, -1, -1, -1) article.add_utterance(utt) elif x == 100: # UNK break else: sentence.append(x) if i == l: utt = Utterance(sentence, -1, -1, -1) article.add_utterance(utt) articles.append([article]) abstracts = [] for encoded_abstract in summary['encoded_abstracts']: if 103 in encoded_abstract: last_idx = encoded_abstract.index(103) encoded_abstract = encoded_abstract[:last_idx] encoded_abstract.append(102) encoded_abstract.append(103) abstracts.append(encoded_abstract) cnndm_data = [] for x, y in zip(articles, abstracts): cnndm_data.append((x, y, y)) else: with open(CNNDM_DATA_PATH.format(data_type), 'rb') as f: import pdb pdb.set_trace() cnndm_data = pickle.load(f, encoding="bytes") return cnndm_data