Пример #1
0
def plot_rocs(ys_true,
              ys_predict,
              labels,
              show=True,
              baseline=False,
              save=False):
    for y_true, y_predict, label in zip(ys_true, ys_predict, labels):
        fpr, tpr, threshold = metrics.roc_curve(y_true, y_predict, pos_label=1)
        auc = metrics.auc(fpr, tpr)
        plt.plot(fpr, tpr, label="{} auc={:.3f}".format(label, auc))

    plt.plot([0, 1], [0, 1],
             linestyle="--",
             lw=2,
             color="black",
             label="random-chance",
             alpha=0.6)
    if baseline:
        plt.axvline(x=0.005, color="red", label="0.5% FPR", alpha=0.8)
        plt.axvline(x=0.01, color="red", label="1% FPR", alpha=0.8)
        plt.axhline(y=0.9, color="red", label="90% TPR", alpha=0.8)

    plt.xlabel("false positive rate")
    plt.ylabel("true positive rate")
    plt.title("roc curve")
    plt.legend(loc="lower right")
    if save:
        plt.savefit("-".join(labels), quality=95)
    if show:
        plt.show()
Пример #2
0
def plot_loc_from_df(normaldf, new_leapdf, lat, lon, start, end, path):
    col = files.lat_str(lat) + '_' + files.lon_str(lon)
    plotdf = p.DataFrame()
    plotdf['normal'] = normaldf[col]
    plotdf['new_leap'] = new_leapdf[col]
    plt.figure()
    plotdf[(plotdf.index >= start) & (plotdf.index < end)].plot()
    plt.legend()
    if path is not None:
        plt.savefit(path)
Пример #3
0
    def testSmoothDelta(self):
        nIn = 100;
    
        x = np.zeros(nIn)
        x[nIn/2] = 1

        len = 21
        xs = smooth.smooth(x, window_len=len)
        plt.clf()
        plt.plot(x)
        plt.plot(xs[len/2:-(len/2)])
        plt.savefit("testSmoothDelta.png")
	layers.Dense(512, activatoin = 'elu'),
	layers.Dense(512, activation = 'elu'),
	layers.Dense(512, activation = 'elu'),
	layers.Dense(1)])

size_histories['large'] = compile_and_fit(large_model, 'sizes/large')


# plot the training and validation losses

plotter.plot(size_histories)
a = plt.xscale('log')
plt.xlim([5, max(plt.xlim())])
plt.ylim([0.5, 0.7])
plt.xlabel('epoch [log scale]')
plt.savefit('./results_plot/Overfit_and_Underfit_4.png')
plt.clf()

#%load_ext tensorboard
#%tensorboard --logdir {logdir}/sizes

display.IFrame(
    src="https://tensorboard.dev/experiment/vW7jmmF9TmKmy3rbheMQpw/#scalars&_smoothingWeight=0.97",
    width="100%", height="800px")

# prevent overfitting

shutil.rmtree(logdir/'regularizers/Tiny', ignore_errors=True)
shutil.copytree(logdir/'sizes/Tiny', logdir/'regularizers/Tiny')

regularizer_histories = {}
def main(argv):

    # Let's check everything is ok
    if len(argv) < 2:
        print("Directory name needs to be specified.")
        sys.exit()
    elif argv[1] != "-d":
        print("Directory name flag should be '-d'.")
        sys.exit()
    elif len(argv) == 2:
        print("Data directory needs to be specified.")
        sys.exit()
    
    directory_name = argv[2]         
    
    cwd = os.getcwd() # current directory
    try:
        train = pd.read_csv(cwd + '/' + directory_name + '/train.csv')
        test = pd.read_csv(cwd + '/' + directory_name + '/test.csv')
    except:
        print("Either training file or test file doesn't exist. Please try to save file in the format of 'train.csv' and 'test.csv' format. ")
        sys.exit()
    
    # 1. Analysis one: Extract columns of training and test data. Conventionally, training data has exactly one more column than test data.
    training_column_name = list(train)
    test_column_name = list(test)

    if len(training_column_name) != len(test_column_name)+1:
        # Number of column for training data is not 1 + number of column for test data
        print("Training data must have exactly one more column (label) than test data.")
        sys.exit()

    print(" ")
    print("### 1. Column names and count. ")
    print("Training data columns : " + ", ".join(str(x) for x in training_column_name) + ".")
    print("Number of columns in the training data: {}.".format(len(training_column_name)))

    print("Test data columns : " + ", ".join(str(x) for x in test_column_name) + ".")
    print("Number of columns in the test data: {}.".format(len(test_column_name)))
    print(" ")
    print(" ")

    # 2. Identify the label and concatenate it with test data
    for i in training_column_name:
        if i not in test_column_name:
            possible_label = i

    print(" ")
    print("### 2. Label identification. ")
    user_answer = input("'{}' is not in the test data. It might be a label, is it correct (y/n)? >>> ".format(possible_label))
    if user_answer == 'y':
        label = possible_label
    elif user_answer == 'n':
        not_done = True
        while not_done:
            user_answer = input("Please enter label name manually >>> ") # User manually specifying the label name
            if user_answer in training_column_name:
                not_done = False
                label = user_answer
            else:
                print("'{}' is not in the training column name".format(user_answer))
    else:
        print("Let's start over.")
        sys.exit()       
    print("Label '{}' will be dropped and the training data and test data will be combined for feature engineering process. ".format(label))
    
    temp_train = train.copy()
    temp_train = temp_train.drop([label], axis=1)
    Data = pd.concat([temp_train, test])

    print("Combined data is stored under variable named 'Data' (training + test) . ")
    print(" ")
    print(" ")

    # 3. NULL count analysis
    print(" ")
    print("### 3. NULL count. ")
    null_count = []
    null_count_ratio = []
    for i in list(Data):
        this_null_count = Data[i].isnull().sum()
        null_count.append(this_null_count)
        null_count_ratio.append(100*float(this_null_count)/float(Data.shape[0]))
        print("Column name: {} ----> Proportion of NULLs: {} / {}".format(i, this_null_count, Data.shape[0]))

    # Figure 1. bar plots for NULL count
    plt.figure(1, figsize=(11, 5))
    plt.subplot(211)
    plt.bar(list(Data),null_count)
    plt.title("NULL in the data")
    plt.ylabel("Count")
    plt.subplot(212)
    plt.bar(list(Data),null_count_ratio)
    plt.xlabel("Column")
    plt.ylabel("NULL ratio (%)")
    plt.show(block=True)
    plt.savefit("figre_1_NULL_count.png")

    print(" ")
    print(" ")
Пример #6
0
def plotDatasetsFirstSeen(plotBars=True, plotLines=True):
    """
    Plots a bar chart of the yearly distribution of apps in all our datasets according to the "first_seen" attribute
    """
    try:
        # Pre-calculated distrubtions for AMD, GPlay, AndroZoo'19, Manual 100, and Piggybacking
        amd_counts = [1.0, 8.0, 248.0, 2949.0, 9299.0, 7365.0, 3059.0, 1623.0, 0, 1.0, 0]
        gplay_counts = [0, 0, 26.0, 587.0, 1654.0, 5453.0, 2933.0, 6295.0, 3231.0, 7946.0, 1898.0]
        malware_2019_counts = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 6173.0]
        manual_counts = [0, 0, 0, 6.0, 9.0, 25.0, 12.0, 30.0, 7.0, 11.0, 0]
        piggybacking_counts = [0, 14.0, 140.0, 510.0, 1168.0, 922.0, 0, 0, 0, 0, 0]

        # Miscellaneous information about the figure
        fig, ax = plt.subplots()
        all_years = ['2009', '2010', '2011', '2012', '2013', '2014', '2015', '2016', '2017', '2018', '2019']
        index = np.arange(len(all_years))
        bar_width = 0.35
        opacity = 0.8
        # Build the data
        if plotBars:
            amd_rects = plt.bar(index, amd_counts, bar_width, alpha=opacity, color='#ff4136', label='AMD')
            gplay_rects = plt.bar(index, gplay_counts, bar_width, alpha=opacity, color='#3d9970', label='GPlay')
            malware_2019_rects = plt.bar(index, malware_2019_counts, bar_width, alpha=opacity, color='#ff851b', label='AndroZoo\'19')
            manual_rects = plt.bar(index, manual_counts, bar_width, alpha=opacity, color='#6baed6', label='Manual 100')
            piggybacking_rects = plt.bar(index, piggybacking_counts, bar_width, alpha=opacity, color='#808389', label='Piggybacking')
   
        if plotLines and not plotBars:
            ax.plot(index, amd_counts, color='#ff4136', marker='o', alpha=opacity, label='AMD') 
            ax.plot(index, gplay_counts, color='#3d9970', marker='^', alpha=opacity, label='GPlay')
            ax.plot(index, malware_2019_counts, color='#ff851b', marker='s', alpha=opacity, label='AndroZoo\'19')
            ax.plot(index, manual_counts, color='#6baed6', marker='+', alpha=opacity, label='Manual 100')
            ax.plot(index, piggybacking_counts, color='#808389', marker='x', alpha=opacity, label='Piggybacking')
       
        if plotLines and plotBars: 
            ax.plot(index, amd_counts, color='#ff4136', marker='o', alpha=opacity) 
            ax.plot(index, gplay_counts, color='#3d9970', marker='^', alpha=opacity)
            ax.plot(index, malware_2019_counts, color='#ff851b', marker='s', alpha=opacity)
            ax.plot(index, manual_counts, color='#6baed6', marker='+', alpha=opacity)
            ax.plot(index, piggybacking_counts, color='#808389', marker='x', alpha=opacity)
 

        # Set the labels' captions 
        plt.xlabel('"first_seen" by Years')
        plt.ylabel('Counts of Apps')
        plt.xticks(index + bar_width, tuple(all_years), rotation=45)
        plt.legend()
        plt.tight_layout()
        #plt.show()
        if plotLines and plotBars:
            title = "Lines_Bars"
        elif plotLines and not plotBars:
            title = "Lines"
        elif not plotLines and plotBars:
            title = "Bars"

        plt.savefig("%s_first_seen_all.pdf" % title)
        plt.savefit("%s_first_seen_all.pgf" % title)

    except Exception as e:
        prettyPrintError(e)
        return False

    return True
#!/usr/bin/env python3
import matplotlib.pyplot as plt

plt.style.use('ggplot')
customers = ['ABC', 'EDF', 'GHI', 'JKL', 'MNO']
customers_index = range(len(customers))
sale_amounts = [127, 90, 201, 111, 232]
fig = plt.figure()
axl = fig.add_subplot(1, 1, 1)
axl.bar(customers_index, sale_amounts, align='center', color='darkblue')
axl.xaxis.set_ticks_position('bottom')
axl.yaxis.set_ticks_position('left')
plt.xticks(customers_index, customers, rotation=0, fontsize='small')
plt.xlabel('Customer Name')
plt.ylabel('Sale Amount')
plt.title('Sale Amount per Customer')
plt.savefit('bar_plot.png', dpi=400, bbox_inches='tight')
plt.show()
Пример #8
0
        print('STEP:', i)
        def closure():
            optimizer.zero_grad()
            out = seq(input)
            loss = criterion(out, target)
            print('loss', loss.data.numpy()[0])
            loss.backward()
            return loss
        optimizer.step(closure)
        # begin to predict
        future = 1000
        pred = seq(test_input, future = future)
        loss = criterion(pred[:,:-future], test_target)
        print('test loss:', loss.data.numpy()[0])
        y = pred.data.numpy()
        # draw the result
        plt.figure(figsize=(30,10))
        plt.title('Predict future values for time sequences\n(Dashlines are predicted values)', fontsize = 30)
        plt.xlabel('x', fontsize=20)
        plt.ylabel('y', fontsize=20)
        plt.xticks(fontsize=20)
        plt.yticks(fontsize=20)
        def draw(yi, color):
            plt.plot(np.arange(input.size(1)), yi[:input.size(1)], color, linewidth = 2.0)
            plt.plot(np.arange(input.size(1)), input.size(1) + future), yi[input.size(1):], color + ':', linewidth= 2.0)
        draw(y[0],'r')
        draw(y[1],'g')
        draw(y[2],'b')
        plt.savefit('predict%d.pdf'%i)
        plt.close()
Пример #9
0
import matplotlib.pyplot as plt

# takes two sequences

xvals = [0, 1, 2, 3]
yvals = [23, 48, 65, 80]
plt.plot(xvals, yvals)

# other fun things
plt.savefit('whatever.png')
plt.close()
plt.bar(xvals, yvals)
plt.scatter(xvals, yvals, color="red")
ply.close()

# for examples...
fig, ax = plt.subplots()
ax.pie([12,32])

axs.bar(yrs, means)

~~~py
hots = []
colds = []

for d in datarows:
	year = int(d["year"])
	mean = float(d["annual_mean"])
	if mean <= 0:
		colds.append([year, mean])
	else: