def plot_log_p(filename, codec, model): from questions.likelihood import log_likelihood with open(os.path.join('datasets', filename + '.pkl'), 'rb') as f: lls = [] data = pkl.load(f) for i in trange(len(data)): text = data[i] text = codec.encode(text).to(device) ## TODO: complete the code in the function `log_likelihood` lls.append(log_likelihood(model, text)) lls = np.asarray(lls) with open(os.path.join('submit', filename + '_raw.pkl'), 'wb') as f: pkl.dump(lls, f, protocol=pkl.HIGHEST_PROTOCOL) plt.figure() plt.hist(lls) plt.xlabel('Log-likelihood') plt.xlim([-600, 0]) plt.ylabel('Counts') plt.title(filename) plt.savefig(os.path.join('submit', filename + '.png'), bbox_inches='tight') plt.show() plt.close() print("# Figure written to %s.png." % filename)
def classification(model, text): """ Classify whether the string `text` is randomly generated or not. :param model: The GPT-2 model :param texts: A tensor of shape (1, T), where T is the length of the text :return: True if `text` is a random string. Otherwise return False """ with torch.no_grad(): ## TODO: Return True if `text` is a random string. Or else return False. ll = log_likelihood(model, text) return ll < -300