def plot_rocs(ys_true, ys_predict, labels, show=True, baseline=False, save=False): for y_true, y_predict, label in zip(ys_true, ys_predict, labels): fpr, tpr, threshold = metrics.roc_curve(y_true, y_predict, pos_label=1) auc = metrics.auc(fpr, tpr) plt.plot(fpr, tpr, label="{} auc={:.3f}".format(label, auc)) plt.plot([0, 1], [0, 1], linestyle="--", lw=2, color="black", label="random-chance", alpha=0.6) if baseline: plt.axvline(x=0.005, color="red", label="0.5% FPR", alpha=0.8) plt.axvline(x=0.01, color="red", label="1% FPR", alpha=0.8) plt.axhline(y=0.9, color="red", label="90% TPR", alpha=0.8) plt.xlabel("false positive rate") plt.ylabel("true positive rate") plt.title("roc curve") plt.legend(loc="lower right") if save: plt.savefit("-".join(labels), quality=95) if show: plt.show()
def plot_loc_from_df(normaldf, new_leapdf, lat, lon, start, end, path): col = files.lat_str(lat) + '_' + files.lon_str(lon) plotdf = p.DataFrame() plotdf['normal'] = normaldf[col] plotdf['new_leap'] = new_leapdf[col] plt.figure() plotdf[(plotdf.index >= start) & (plotdf.index < end)].plot() plt.legend() if path is not None: plt.savefit(path)
def testSmoothDelta(self): nIn = 100; x = np.zeros(nIn) x[nIn/2] = 1 len = 21 xs = smooth.smooth(x, window_len=len) plt.clf() plt.plot(x) plt.plot(xs[len/2:-(len/2)]) plt.savefit("testSmoothDelta.png")
layers.Dense(512, activatoin = 'elu'), layers.Dense(512, activation = 'elu'), layers.Dense(512, activation = 'elu'), layers.Dense(1)]) size_histories['large'] = compile_and_fit(large_model, 'sizes/large') # plot the training and validation losses plotter.plot(size_histories) a = plt.xscale('log') plt.xlim([5, max(plt.xlim())]) plt.ylim([0.5, 0.7]) plt.xlabel('epoch [log scale]') plt.savefit('./results_plot/Overfit_and_Underfit_4.png') plt.clf() #%load_ext tensorboard #%tensorboard --logdir {logdir}/sizes display.IFrame( src="https://tensorboard.dev/experiment/vW7jmmF9TmKmy3rbheMQpw/#scalars&_smoothingWeight=0.97", width="100%", height="800px") # prevent overfitting shutil.rmtree(logdir/'regularizers/Tiny', ignore_errors=True) shutil.copytree(logdir/'sizes/Tiny', logdir/'regularizers/Tiny') regularizer_histories = {}
def main(argv): # Let's check everything is ok if len(argv) < 2: print("Directory name needs to be specified.") sys.exit() elif argv[1] != "-d": print("Directory name flag should be '-d'.") sys.exit() elif len(argv) == 2: print("Data directory needs to be specified.") sys.exit() directory_name = argv[2] cwd = os.getcwd() # current directory try: train = pd.read_csv(cwd + '/' + directory_name + '/train.csv') test = pd.read_csv(cwd + '/' + directory_name + '/test.csv') except: print("Either training file or test file doesn't exist. Please try to save file in the format of 'train.csv' and 'test.csv' format. ") sys.exit() # 1. Analysis one: Extract columns of training and test data. Conventionally, training data has exactly one more column than test data. training_column_name = list(train) test_column_name = list(test) if len(training_column_name) != len(test_column_name)+1: # Number of column for training data is not 1 + number of column for test data print("Training data must have exactly one more column (label) than test data.") sys.exit() print(" ") print("### 1. Column names and count. ") print("Training data columns : " + ", ".join(str(x) for x in training_column_name) + ".") print("Number of columns in the training data: {}.".format(len(training_column_name))) print("Test data columns : " + ", ".join(str(x) for x in test_column_name) + ".") print("Number of columns in the test data: {}.".format(len(test_column_name))) print(" ") print(" ") # 2. Identify the label and concatenate it with test data for i in training_column_name: if i not in test_column_name: possible_label = i print(" ") print("### 2. Label identification. ") user_answer = input("'{}' is not in the test data. It might be a label, is it correct (y/n)? >>> ".format(possible_label)) if user_answer == 'y': label = possible_label elif user_answer == 'n': not_done = True while not_done: user_answer = input("Please enter label name manually >>> ") # User manually specifying the label name if user_answer in training_column_name: not_done = False label = user_answer else: print("'{}' is not in the training column name".format(user_answer)) else: print("Let's start over.") sys.exit() print("Label '{}' will be dropped and the training data and test data will be combined for feature engineering process. ".format(label)) temp_train = train.copy() temp_train = temp_train.drop([label], axis=1) Data = pd.concat([temp_train, test]) print("Combined data is stored under variable named 'Data' (training + test) . ") print(" ") print(" ") # 3. NULL count analysis print(" ") print("### 3. NULL count. ") null_count = [] null_count_ratio = [] for i in list(Data): this_null_count = Data[i].isnull().sum() null_count.append(this_null_count) null_count_ratio.append(100*float(this_null_count)/float(Data.shape[0])) print("Column name: {} ----> Proportion of NULLs: {} / {}".format(i, this_null_count, Data.shape[0])) # Figure 1. bar plots for NULL count plt.figure(1, figsize=(11, 5)) plt.subplot(211) plt.bar(list(Data),null_count) plt.title("NULL in the data") plt.ylabel("Count") plt.subplot(212) plt.bar(list(Data),null_count_ratio) plt.xlabel("Column") plt.ylabel("NULL ratio (%)") plt.show(block=True) plt.savefit("figre_1_NULL_count.png") print(" ") print(" ")
def plotDatasetsFirstSeen(plotBars=True, plotLines=True): """ Plots a bar chart of the yearly distribution of apps in all our datasets according to the "first_seen" attribute """ try: # Pre-calculated distrubtions for AMD, GPlay, AndroZoo'19, Manual 100, and Piggybacking amd_counts = [1.0, 8.0, 248.0, 2949.0, 9299.0, 7365.0, 3059.0, 1623.0, 0, 1.0, 0] gplay_counts = [0, 0, 26.0, 587.0, 1654.0, 5453.0, 2933.0, 6295.0, 3231.0, 7946.0, 1898.0] malware_2019_counts = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 6173.0] manual_counts = [0, 0, 0, 6.0, 9.0, 25.0, 12.0, 30.0, 7.0, 11.0, 0] piggybacking_counts = [0, 14.0, 140.0, 510.0, 1168.0, 922.0, 0, 0, 0, 0, 0] # Miscellaneous information about the figure fig, ax = plt.subplots() all_years = ['2009', '2010', '2011', '2012', '2013', '2014', '2015', '2016', '2017', '2018', '2019'] index = np.arange(len(all_years)) bar_width = 0.35 opacity = 0.8 # Build the data if plotBars: amd_rects = plt.bar(index, amd_counts, bar_width, alpha=opacity, color='#ff4136', label='AMD') gplay_rects = plt.bar(index, gplay_counts, bar_width, alpha=opacity, color='#3d9970', label='GPlay') malware_2019_rects = plt.bar(index, malware_2019_counts, bar_width, alpha=opacity, color='#ff851b', label='AndroZoo\'19') manual_rects = plt.bar(index, manual_counts, bar_width, alpha=opacity, color='#6baed6', label='Manual 100') piggybacking_rects = plt.bar(index, piggybacking_counts, bar_width, alpha=opacity, color='#808389', label='Piggybacking') if plotLines and not plotBars: ax.plot(index, amd_counts, color='#ff4136', marker='o', alpha=opacity, label='AMD') ax.plot(index, gplay_counts, color='#3d9970', marker='^', alpha=opacity, label='GPlay') ax.plot(index, malware_2019_counts, color='#ff851b', marker='s', alpha=opacity, label='AndroZoo\'19') ax.plot(index, manual_counts, color='#6baed6', marker='+', alpha=opacity, label='Manual 100') ax.plot(index, piggybacking_counts, color='#808389', marker='x', alpha=opacity, label='Piggybacking') if plotLines and plotBars: ax.plot(index, amd_counts, color='#ff4136', marker='o', alpha=opacity) ax.plot(index, gplay_counts, color='#3d9970', marker='^', alpha=opacity) ax.plot(index, malware_2019_counts, color='#ff851b', marker='s', alpha=opacity) ax.plot(index, manual_counts, color='#6baed6', marker='+', alpha=opacity) ax.plot(index, piggybacking_counts, color='#808389', marker='x', alpha=opacity) # Set the labels' captions plt.xlabel('"first_seen" by Years') plt.ylabel('Counts of Apps') plt.xticks(index + bar_width, tuple(all_years), rotation=45) plt.legend() plt.tight_layout() #plt.show() if plotLines and plotBars: title = "Lines_Bars" elif plotLines and not plotBars: title = "Lines" elif not plotLines and plotBars: title = "Bars" plt.savefig("%s_first_seen_all.pdf" % title) plt.savefit("%s_first_seen_all.pgf" % title) except Exception as e: prettyPrintError(e) return False return True
#!/usr/bin/env python3 import matplotlib.pyplot as plt plt.style.use('ggplot') customers = ['ABC', 'EDF', 'GHI', 'JKL', 'MNO'] customers_index = range(len(customers)) sale_amounts = [127, 90, 201, 111, 232] fig = plt.figure() axl = fig.add_subplot(1, 1, 1) axl.bar(customers_index, sale_amounts, align='center', color='darkblue') axl.xaxis.set_ticks_position('bottom') axl.yaxis.set_ticks_position('left') plt.xticks(customers_index, customers, rotation=0, fontsize='small') plt.xlabel('Customer Name') plt.ylabel('Sale Amount') plt.title('Sale Amount per Customer') plt.savefit('bar_plot.png', dpi=400, bbox_inches='tight') plt.show()
print('STEP:', i) def closure(): optimizer.zero_grad() out = seq(input) loss = criterion(out, target) print('loss', loss.data.numpy()[0]) loss.backward() return loss optimizer.step(closure) # begin to predict future = 1000 pred = seq(test_input, future = future) loss = criterion(pred[:,:-future], test_target) print('test loss:', loss.data.numpy()[0]) y = pred.data.numpy() # draw the result plt.figure(figsize=(30,10)) plt.title('Predict future values for time sequences\n(Dashlines are predicted values)', fontsize = 30) plt.xlabel('x', fontsize=20) plt.ylabel('y', fontsize=20) plt.xticks(fontsize=20) plt.yticks(fontsize=20) def draw(yi, color): plt.plot(np.arange(input.size(1)), yi[:input.size(1)], color, linewidth = 2.0) plt.plot(np.arange(input.size(1)), input.size(1) + future), yi[input.size(1):], color + ':', linewidth= 2.0) draw(y[0],'r') draw(y[1],'g') draw(y[2],'b') plt.savefit('predict%d.pdf'%i) plt.close()
import matplotlib.pyplot as plt # takes two sequences xvals = [0, 1, 2, 3] yvals = [23, 48, 65, 80] plt.plot(xvals, yvals) # other fun things plt.savefit('whatever.png') plt.close() plt.bar(xvals, yvals) plt.scatter(xvals, yvals, color="red") ply.close() # for examples... fig, ax = plt.subplots() ax.pie([12,32]) axs.bar(yrs, means) ~~~py hots = [] colds = [] for d in datarows: year = int(d["year"]) mean = float(d["annual_mean"]) if mean <= 0: colds.append([year, mean]) else: