def make_grad_attn_viz(grads, attention, threshold=0.005): """ Plots gradients, shows :param grads: list of gradient values length T :param attention: TxT matrix of attention weights :return: """ data = [] T = attention.shape[0] for i in range(T): for j in range(T): data.append([i, j, grads[j], attention[i, j]]) data_table = wandb.Table( data=data, columns=['source_step', 'target_step', 'grad', 'attn']) fields_map = { "source step": "source_step", "target step": "target_step", "grad": "grad", "attn": "attn" } return wandb.plot_table( vega_spec_name="kylegoyette/loss-gradient-attention-propagation", data_table=data_table, fields=fields_map)
def bar(table, label, value, title=None): """ Construct a bar plot. Arguments: table (wandb.Table): Table of data. label (string): Name of column to use as each bar's label. value (string): Name of column to use as each bar's value. title (string): Plot title. Returns: A plot object, to be passed to wandb.log() Example: table = wandb.Table(data=[ ['car', random.random()], ['bus', random.random()], ['road', random.random()], ['person', random.random()], ], columns=["class", "acc"]) wandb.log({'bar-plot1': wandb.plot.bar(table, "class", "acc")}) """ return wandb.plot_table("wandb/bar/v0", table, { "label": label, "value": value }, {"title": title})
def confusion_matrix(preds=None, y_true=None, class_names=None): """ Computes a multi-run confusion matrix. Arguments: preds (arr): Array of predicted label indices. y_true (arr): Array of label indices. class_names (arr): Array of class names. Returns: Nothing. To see plots, go to your W&B run page then expand the 'media' tab under 'auto visualizations'. Example: wandb.log({'pr': wandb.plot.confusion_matrix(preds, y_true, labels)}) """ np = util.get_module( "numpy", required= "confusion matrix requires the numpy library, install with `pip install numpy`", ) assert len(preds) == len( y_true), "Number of predictions and label indices must match" if class_names is not None: n_classes = len(class_names) assert max(preds) <= len( class_names), "Higher predicted index than number of classes" assert max(y_true) <= len( class_names), "Higher label class index than number of classes" else: n_classes = max(max(preds), max(y_true)) class_names = ["Class_{}".format(i) for i in range(1, n_classes + 1)] counts = np.zeros((n_classes, n_classes)) for i in range(len(preds)): counts[y_true[i], preds[i]] += 1 data = [] for i in range(n_classes): data.extend([class_names[i], class_names[j], counts[i, j]] for j in range(n_classes)) fields = { "Actual": "Actual", "Predicted": "Predicted", "nPredicted": "Count" } return wandb.plot_table( "wandb/confusion_matrix/v0", wandb.Table(columns=["Actual", "Predicted", "Count"], data=data), fields, )
def line_series(xs, ys, keys=None, title=None, xname=None): """ Construct a line series plot. Arguments: xs (array of arrays, or array): Array of arrays of x values ys (array of arrays): Array of y values title (string): Plot title. xname: Title of x-axis Returns: A plot object, to be passed to wandb.log() Example: ``` When logging a singular array for x, all ys are plotted against that x x = [i for i in range(10)] ys = [ [i for i in range(10)], [i**2 for i in range(10)] ] wandb.log({'line-series-plot1': wandb.plot.line_series(x, ys, "title", "step")}) xs can also contain an array of arrays for having different steps for each metric xs = [[i for i in range(10)], [2*i for i in range(10)]] ys = [ [i for i in range(10)], [i**2 for i in range(10)] ] wandb.log({'line-series-plot1': wandb.plot.line_series(xs, ys, "title", "step")}) ``` """ data = [] if not isinstance(xs[0], Sequence): xs = [xs for _ in range(len(ys))] assert len(xs) == len(ys), "Number of x-lines and y-lines must match" for i, series in enumerate([list(zip(xs[i], ys[i])) for i in range(len(xs))]): for x, y in series: if keys is None: key = "key_{}".format(i) else: key = keys[i] data.append([x, key, y]) table = wandb.Table(data=data, columns=["step", "lineKey", "lineVal"]) return wandb.plot_table( "wandb/lineseries/v0", table, {"step": "step", "lineKey": "lineKey", "lineVal": "lineVal"}, {"title": title, "xname": xname or "x"}, )
def histogram(table, value, title=None): """ Construct a histogram plot. Arguments: table (wandb.Table): Table of data. label (string): Name of column to use as data for bucketing. title (string): Plot title. Returns: A plot object, to be passed to wandb.log() Example: data = [[i, random.random() + math.sin(i / 10)] for i in range(100)] table = wandb.Table(data=data, columns=["step", "height"]) wandb.log({'histogram-plot1': wandb.plot.histogram(table, "height")}) """ return wandb.plot_table('wandb/histogram/v0', table, {'value': value}, {'title': title})
def line_series(xs, ys, keys=None, title=None, xname=None): data = [] if not isinstance(xs[0], Sequence): xs = [xs for _ in range(len(ys))] assert len(xs) == len(ys), "Number of x-lines and y-lines must match" for i, series in enumerate([list(zip(xs[i], ys[i])) for i in range(len(xs))]): for x, y in series: if keys is None: key = "key_{}".format(i) else: key = keys[i] data.append([x, key, y]) table = wandb.Table(data=data, columns=["step", "lineKey", "lineVal"]) return wandb.plot_table( "wandb/lineseries/v0", table, {"step": "step", "lineKey": "lineKey", "lineVal": "lineVal"}, {"title": title, "xname": xname or "x"}, )
def scatter(table, x, y, title=None): """ Construct a scatter plot. Arguments: table (wandb.Table): Table of data. x (string): Name of column to as for x-axis values. y (string): Name of column to as for y-axis values. title (string): Plot title. Returns: A plot object, to be passed to wandb.log() Example: data = [[i, random.random() + math.sin(i / 10)] for i in range(100)] table = wandb.Table(data=data, columns=["step", "height"]) wandb.log({'scatter-plot1': wandb.plot.scatter(table, "step", "height")}) """ return wandb.plot_table("wandb/scatter/v0", table, { "x": x, "y": y }, {"title": title})
def line(table, x, y, stroke=None, title=None): """ Construct a line plot. Arguments: table (wandb.Table): Table of data. x (string): Name of column to as for x-axis values. y (string): Name of column to as for y-axis values. stroke (string): Name of column to map to the line stroke scale. title (string): Plot title. Returns: A plot object, to be passed to wandb.log() Example: data = [[i, random.random() + math.sin(i / 10)] for i in range(100)] table = wandb.Table(data=data, columns=["step", "height"]) wandb.log({'line-plot1': wandb.plot.line(table, "step", "height")}) """ return wandb.plot_table("wandb/line/v0", table, { "x": x, "y": y, "stroke": stroke }, {"title": title})
def roc_curve(y_true=None, y_probas=None, labels=None, classes_to_plot=None): """ Calculates receiver operating characteristic scores and visualizes them as the ROC curve. Arguments: y_true (arr): Test set labels. y_probas (arr): Test set predicted probabilities. labels (list): Named labels for target varible (y). Makes plots easier to read by replacing target values with corresponding index. For example labels= ['dog', 'cat', 'owl'] all 0s are replaced by 'dog', 1s by 'cat'. Returns: Nothing. To see plots, go to your W&B run page then expand the 'media' tab under 'auto visualizations'. Example: wandb.log({'roc-curve': wandb.plot.roc_curve(y_true, y_probas, labels)}) """ np = util.get_module( "numpy", required= "roc requires the numpy library, install with `pip install numpy`") util.get_module( "sklearn", required= "roc requires the scikit library, install with `pip install scikit-learn`" ) from sklearn.metrics import roc_curve if (test_missing(y_true=y_true, y_probas=y_probas) and test_types(y_true=y_true, y_probas=y_probas)): y_true = np.array(y_true) y_probas = np.array(y_probas) classes = np.unique(y_true) probas = y_probas if classes_to_plot is None: classes_to_plot = classes fpr_dict = dict() tpr_dict = dict() indices_to_plot = np.in1d(classes, classes_to_plot) data = [] count = 0 for i, to_plot in enumerate(indices_to_plot): fpr_dict[i], tpr_dict[i], _ = roc_curve(y_true, probas[:, i], pos_label=classes[i]) if to_plot: for j in range(len(fpr_dict[i])): if labels is not None and (isinstance(classes[i], int) or isinstance( classes[0], np.integer)): class_dict = labels[classes[i]] else: class_dict = classes[i] fpr = [ class_dict, round(fpr_dict[i][j], 3), round(tpr_dict[i][j], 3) ] data.append(fpr) count += 1 if count >= chart_limit: wandb.termwarn( "wandb uses only the first %d datapoints to create the plots." % wandb.Table.MAX_ROWS) break table = wandb.Table(columns=['class', 'fpr', 'tpr'], data=data) return wandb.plot_table('wandb/area-under-curve/v0', table, { 'x': 'fpr', 'y': 'tpr', 'class': 'class' }, { 'title': 'ROC', 'x-axis-title': 'False positive rate', 'y-axis-title': 'True positive rate' })
def pr_curve(y_true=None, y_probas=None, labels=None, classes_to_plot=None): """ Computes the tradeoff between precision and recall for different thresholds. A high area under the curve represents both high recall and high precision, where high precision relates to a low false positive rate, and high recall relates to a low false negative rate. High scores for both show that the classifier is returning accurate results (high precision), as well as returning a majority of all positive results (high recall). PR curve is useful when the classes are very imbalanced. Arguments: y_true (arr): Test set labels. y_probas (arr): Test set predicted probabilities. labels (list): Named labels for target varible (y). Makes plots easier to read by replacing target values with corresponding index. For example labels= ['dog', 'cat', 'owl'] all 0s are replaced by 'dog', 1s by 'cat'. Returns: Nothing. To see plots, go to your W&B run page then expand the 'media' tab under 'auto visualizations'. Example: wandb.log({'pr-curve': wandb.plot.pr_curve(y_true, y_probas, labels)}) """ np = util.get_module( "numpy", required="roc requires the numpy library, install with `pip install numpy`", ) scikit = util.get_module( "sklearn", "roc requires the scikit library, install with `pip install scikit-learn`", ) y_true = np.array(y_true) y_probas = np.array(y_probas) if test_missing(y_true=y_true, y_probas=y_probas) and test_types( y_true=y_true, y_probas=y_probas ): classes = np.unique(y_true) probas = y_probas if classes_to_plot is None: classes_to_plot = classes binarized_y_true = scikit.preprocessing.label_binarize(y_true, classes=classes) if len(classes) == 2: binarized_y_true = np.hstack((1 - binarized_y_true, binarized_y_true)) pr_curves = {} indices_to_plot = np.in1d(classes, classes_to_plot) for i, to_plot in enumerate(indices_to_plot): if to_plot: precision, recall, _ = scikit.metrics.precision_recall_curve( y_true, probas[:, i], pos_label=classes[i] ) samples = 20 sample_precision = [] sample_recall = [] for k in range(samples): sample_precision.append( precision[int(len(precision) * k / samples)] ) sample_recall.append(recall[int(len(recall) * k / samples)]) pr_curves[classes[i]] = (sample_precision, sample_recall) data = [] count = 0 for class_name in pr_curves.keys(): precision, recall = pr_curves[class_name] for p, r in zip(precision, recall): # if class_names are ints and labels are set if labels is not None and ( isinstance(class_name, int) or isinstance(class_name, np.integer) ): class_name = labels[class_name] # if class_names are ints and labels are not set # or, if class_names have something other than ints # (string, float, date) - user class_names data.append([class_name, round(p, 3), round(r, 3)]) count += 1 if count >= chart_limit: wandb.termwarn( "wandb uses only the first %d datapoints to create the plots." % wandb.Table.MAX_ROWS ) break table = wandb.Table(columns=["class", "precision", "recall"], data=data) return wandb.plot_table( "wandb/area-under-curve/v0", table, {"x": "recall", "y": "precision", "class": "class"}, {"title": "Precision v. Recall"}, )
def confusion_matrix(probs=None, y_true=None, preds=None, class_names=None, title=None): """ Computes a multi-run confusion matrix. Arguments: probs (2-d arr): Shape [n_examples, n_classes] y_true (arr): Array of label indices. preds (arr): Array of predicted label indices. class_names (arr): Array of class names. Returns: Nothing. To see plots, go to your W&B run page then expand the 'media' tab under 'auto visualizations'. Example: ``` vals = np.random.uniform(size=(10, 5)) probs = np.exp(vals)/np.sum(np.exp(vals), keepdims=True, axis=1) y_true = np.random.randint(0, 5, size=(10)) labels = ["Cat", "Dog", "Bird", "Fish", "Horse"] wandb.log({'confusion_matrix': wandb.plot.confusion_matrix(probs, y_true=y_true, class_names=labels)}) ``` """ np = util.get_module( "numpy", required= "confusion matrix requires the numpy library, install with `pip install numpy`", ) # change warning assert probs is None or len(probs.shape) == 2, ( "confusion_matrix has been updated to accept" " probabilities as the default first argument. Use preds=...") assert (probs is None or preds is None) and not ( probs is None and preds is None ), "Must provide probabilties or predictions but not both to confusion matrix" if probs is not None: preds = np.argmax(probs, axis=1).tolist() assert len(preds) == len( y_true), "Number of predictions and label indices must match" if class_names is not None: n_classes = len(class_names) class_inds = [i for i in range(n_classes)] assert max(preds) <= len( class_names), "Higher predicted index than number of classes" assert max(y_true) <= len( class_names), "Higher label class index than number of classes" else: class_inds = set(preds).union(set(y_true)) n_classes = len(class_inds) class_names = ["Class_{}".format(i) for i in range(1, n_classes + 1)] # get mapping of inds to class index in case user has weird prediction indices class_mapping = {} for i, val in enumerate(sorted(list(class_inds))): class_mapping[val] = i counts = np.zeros((n_classes, n_classes)) for i in range(len(preds)): counts[class_mapping[y_true[i]], class_mapping[preds[i]]] += 1 data = [] for i in range(n_classes): for j in range(n_classes): data.append([class_names[i], class_names[j], counts[i, j]]) fields = { "Actual": "Actual", "Predicted": "Predicted", "nPredictions": "nPredictions", } title = title or "" return wandb.plot_table( "wandb/confusion_matrix/v1", wandb.Table(columns=["Actual", "Predicted", "nPredictions"], data=data), fields, {"title": title}, )
def run(): args = parser.parse_args() hyper_parameter_defaults = dict(opt='RMSProp', nonlin='relu', batch_size=12, learning_rate=0.0002, betas=(0.5, 0.999), alpha=0.9) if args.device is not None: args.device = torch.device(f'cuda:{args.device}') # wandb if args.name is None: run = wandb.init(project="gradientsandtranslation2", config=hyper_parameter_defaults) wandb.config["more"] = "custom" # save run to get readable run name run.save() run.name = os.path.join('NMT', run.name) config = wandb.config config.save_dir = os.path.join('experiments', 'NMT', run.name) run.save() else: run = wandb.init(project="gradientsandtranslation", config=hyper_parameter_defaults, name=args.name) wandb.config["more"] = "custom" run.name = os.path.join('NMT', run.name) config = wandb.config config.save_dir = os.path.join('experiments', 'NMT', args.name) run.save() # update config object with args wandb.config.update(args, allow_val_change=True) # set up language try: spacy_en = spacy.load('en') except OSError as e: print(e) print('Downloading model...') os.system('python -m spacy download en') spacy_en = spacy.load('en') try: spacy_de = spacy.load('de') except OSError as e: print(e) print('Downloading model...') os.system('python -m spacy download de') spacy_de = spacy.load('de') def tokenize_de(text): """ Tokenizes German text from a string into a list of strings (tokens) and reverses it """ return [tok.text for tok in spacy_de.tokenizer(text)] #[::-1] def tokenize_en(text): """ Tokenizes English text from a string into a list of strings (tokens) """ return [tok.text for tok in spacy_en.tokenizer(text)] if args.model == 'Trans': batch_first = True else: batch_first = False SRC = Field(tokenize_de, init_token='<sos>', eos_token='<eos>', lower=True, batch_first=batch_first) TRG = Field(tokenize_en, init_token='<sos>', eos_token='<eos>', lower=True, batch_first=batch_first) train_data, val_data, test_data = Multi30k.splits(exts=('.de', '.en'), fields=(SRC, TRG)) SRC.build_vocab(train_data, min_freq=2) TRG.build_vocab(train_data, min_freq=2) config.SRCPADIDX = SRC.vocab.stoi[SRC.pad_token] config.TRGPADIDX = TRG.vocab.stoi[TRG.pad_token] train_iterator, valid_iterator, test_iterator = BucketIterator.splits( (train_data, val_data, test_data), batch_size=config.batch_size) config.inp_size = len(SRC.vocab) config.out_size = len(TRG.vocab) # create experiment management object experiment = NMTExperiment(config) model = experiment.model wandb.watch(model) criterion = nn.CrossEntropyLoss(ignore_index=config.TRGPADIDX) for i in range(config.nepochs): train_loss = train_nmt(experiment.model, train_iterator, experiment.optimizer, criterion, config, run, SRC, TRG) val_loss = eval_nmt(model, valid_iterator, criterion, config, run, SRC, TRG) # visualize an example for example_idx in [8]: src = vars(train_data.examples[example_idx])['src'] trg = vars(train_data.examples[example_idx])['trg'] translation_inds, translation, attention = translate_sentence( src, SRC, TRG, spacy_de, model, config, max_len=50) src = [SRC.init_token] + src + [SRC.eos_token] attn = attention[0, :, :, :].mean(dim=0).cpu().numpy() attn_data = [] for m in range(attn.shape[0]): for n in range(attn.shape[1]): attn_data.append( [n, m, src[n], translation[m], attn[m, n]]) data_table = wandb.Table( data=attn_data, columns=["s_ind", "t_ind", "s_word", "t_word", "attn"]) fields = { "sindex": "s_ind", "tindex": "t_ind", "sword": "s_word", "tword": "t_word", "attn": "attn" } wandb.log({ "my_nlp_viz_id": wandb.plot_table("kylegoyette/nlp-attention-visualization", data_table, fields) }) print(f'Epoch: {i} Train Loss: {train_loss} Val Loss {val_loss}')
import math # Start a new run run = wandb.init(project='custom-charts', notes='Custom stacked bar chart') offset = random.random() # Set up data to log in custom charts data = [] for i in range(100): data_1.append([i, random.random() + math.log(1 + i) + offset + random.random()]) # Create a table with the columns to plot table = wandb.Table(data=data_1, columns=["step", "height"]) # Map from the table's columns to the chart's fields fields = {"x": "step", "value": "height"} # Use the table to populate the new custom chart preset my_custom_chart = wandb.plot_table(vega_spec_name="carey/stacked_bar_chart", data_table=table, fields=fields, ) # Log the plot to have it show up in the UI wandb.log({"custom_chart": my_custom_chart}) # Finally, end the run. We only need this ine in Jupyter notebooks. run.finish()