示例#1
0
def plotPC(PC1, PC2, labelList):
    """Plots a scatter plot of the any 2 specified dimensions after running PCA."""
    pc1 = [[], [], [], [], [], [], [], [], [], []]
    pc2 = [[], [], [], [], [], [], [], [], [], []]
    for l in range(len(labelList)):
        # l returns a number within a numpy array
        actualNum = labelList[l][0]
        pc1[actualNum].append(PC1[l])
        pc2[actualNum].append(PC2[l])
    fig = plt.figure()
    ax = fig.add_subplot(111)
    colorList = [
        "red", "green", "blue", "black", "gray", "yellow", "cyan", "magenta",
        "burlywood", "purple"
    ]
    for count in range(10):
        plt.scatter(pc1[count],
                    pc2[count],
                    c=colorList[count],
                    lw=0,
                    label=str(count))
    plt.legend(scatterpoints=1)
    ax.set_xlabel("PC1")
    ax.set_ylabel("PC2")
    fig.savefig("2D_10MNistGraph.png")
    plt.close()
示例#2
0
 def plot_comparison(self, x_data, x_variable, y1_data, y1_variable, y2_data, y2_variable):
     import matplotlib as plt
     x, r1, r2, diff, combined, colors = self.compare_results(
         x_data,x_variable,
         y1_data, y1_variable,
         y2_data, y2_variable)
     plt.scatter(x, r1, c=colors)
示例#3
0
def plot_decision_regions(X, y, classifier, resolution=0.02):
    # prepare marker and color map
    markers = ('s', 'x', 'o', '^', 'v')
    colors = ('red', 'blue', 'lightgreen', 'gray', 'cyan')
    cmap = ListedColormap(colors[:len(np.unique(y))])

    # plot decision regions
    x1_min, x1_max = X[:, 0].min() - 1, X[:, 0].max() + 1
    x2_min, x2_max = X[:, 1].min() - 1, X[:, 1].max() + 1
    # generate grid point
    xx1, xx2 = np.meshgrid(np.arange(x1_min, x1_max, resolution),
                           np.arange(x2_min, x2_max, resolution))
    # translate features into array and predict
    Z = classifier.predict(np.array([xx1.ravel(), xx2.ravel()]).T)
    # translate result to grid point
    Z = Z.reshape(xx1.shape)
    # plot contour line of grid point
    plt.contourf(xx1, xx2, Z, alpha=0.4, cmap=cmap)

    plt.xlim(xx1.min(), xx1.max())
    plt.ylim(xx2.min(), xx2.max())

    # plot sample by each class
    for idx, cl in enumerate(np.unique(y)):
        plt.scatter(x=X[y == cl, 0], y=X[y == cl, 1], alpha=0.8, c=cmap(idx),
                    marker=markers[idx], label=cl)
示例#4
0
文件: plot.py 项目: fengjiran/pyml
def plot_boundary(model, x, y, **kwargs):
    assert (x.shape[-1] == 2)
    cmap_light = ListedColormap(['#FFAAAA', '#AAFFAA', '#AAAAFF'])
    cmap_bold = ListedColormap(['#FF0000', '#00FF00', '#0000FF'])
    if 'h' in kwargs:
        h = kwargs['h']
    else:
        h = 0.1

    x_min, x_max = x[:, 0].min() - 1, x[:, 0].max() + 1
    y_min, y_max = x[:, 1].min() - 1, x[:, 1].max() + 1
    x_grid, y_grid = np.meshgrid(np.arange(x_min, x_max, h),
                                 np.arange(y_min, y_max, h))
    Z = model.predict(np.c_[x_grid.ravel(), y_grid.ravel()])

    # Put the result into a color plot
    Z = Z.reshape(x_grid.shape)
    plt.figure()
    plt.pcolormesh(x_grid, y_grid, Z, cmap=cmap_light)

    # Plot also the training points
    plt.scatter(x[:, 0], x[:, 1], c=y, cmap=cmap_bold, edgecolor='k', s=20)
    plt.xlim(x_grid.min(), x_grid.max())
    plt.ylim(y_grid.min(), y_grid.max())

    if 'title' in kwargs:
        plt.suptitle(kwargs['title'])
    if 'accuracy' in kwargs:
        plt.title("Accuracy: %.1f%%" % (kwargs['accuracy'] * 100), fontsize=10)
    plt.show()
        def evaluate_prediction():
            #Accuracy measure
            sklearn.metrics.stocker.confusion_matrix(
                Stockname.Startdate('2017-03-27'),
                Stockname.Startdate('2018-03-27'))
            Startdate = random.StockName.date()
            Enddate = random.StockName.date(after=Startdate)
            plt.scatter(dates, prices, color='black', label='Data')
            plt.plot(StockName.dates,
                     StockName.predict(dates),
                     color='black',
                     label='Observations')
            plt.plot(StockName.dates,
                     StockName.predict(dates),
                     color='black',
                     label='Observations')
            plt.plot(StockName.dates,
                     StockName.predict(dates),
                     color='blue',
                     label='Predicted',
                     attr='bold')
            plt.plot(StockName.dates,
                     StockName.predict(dates),
                     color='yellow',
                     label='Confidence Interval',
                     attr='block')
            plt.plot(StockName.midpoint.random(dates),
                     label='Prediction Start',
                     color='red',
                     attr='dotted')

            plt.xlabel('Date')
            plt.ylabel('Price $')
            plt.title(StockName, 'Model Evaluation from ', Startdate, 'to ',
                      Enddate)
示例#6
0
 def plot_scatter(self, dataframe, x, y, title, xlable, ylable, kurs):
     plt.scatter(x, y)
     #plt.title(title)
     plt.xlabel(xlable)
     plt.ylabel(ylable)
     plt.savefig('./PDFcreater/Plots/{}/{}.png'.format(kurs, title))
     plt.show()
示例#7
0
 def plotinter(self, pts=1000):
     t0 = time.time()
     dt = 1 / pts
     t = arange(self.pts[0], self.pts[-1] + dt, dt)
     plt.plot(self.x(t), self.y(t))
     plt.scatter(self.interx, self.intery, color="red")
     print(time.time() - t0)
示例#8
0
    def __init__(self, x, y):
        self.x = x
        self.y = y
        n = len(x)  # cantidad de datos
        xSum = self.suma(x)
        ySum = self.suma(y)
        xySum = self.columnaXy(x, y, n)
        xCuadrado, sumXCuadrado = self.columnaCuadrado(x, n)

        xMedia = self.media(xSum, n)
        yMedia = self.media(ySum, n)

        num = xySum * n - (xSum + ySum)
        den = n * sumXCuadrado - (xSum**2)
        m = num / den
        b = yMedia - (m * xMedia)

        #graficar
        x1 = np.linspace(min(x) - 1, max(x) + 1)
        linea = m * x1 + b
        plt.plot(x1, linea)
        plt.scatter(x, y)
        plt.xlabel('x')
        plt.ylabel('y')
        plt.grid(True)
        plt.show()
示例#9
0
def tsne_plot(model):
    "Creates and TSNE model and plots it"
    labels = []
    tokens = []
    total_size = len(model.wv.vocab)
    probability = 200.0 / total_size
    # r =
    for word in model.wv.vocab:

        tokens.append(model[word])
        labels.append(word)

    tsne_model = TSNE(perplexity=40,
                      n_components=2,
                      init='pca',
                      n_iter=2500,
                      random_state=23)
    new_values = tsne_model.fit_transform(tokens)

    x = []
    y = []
    for value in new_values:
        x.append(value[0])
        y.append(value[1])

    plt.figure(figsize=(16, 16))
    for i in range(len(x)):
        plt.scatter(x[i], y[i])
        plt.annotate(labels[i],
                     xy=(x[i], y[i]),
                     xytext=(5, 2),
                     textcoords='offset points',
                     ha='right',
                     va='bottom')
    plt.show()
示例#10
0
def graph():
    #Sets some values.
    x1 = datlo['ligand_rms_no_super_X']
    y1 = datlo['interface_delta_X']
    x2 = dathi['ligand_rms_no_super_X']
    y2 = dathi['interface_delta_X']
    #Calls actual max values for ligand_rms_no_super_X and interface_delta_X
    maxrmsd = data['ligand_rms_no_super_X'].max()
    minrmsd = data['ligand_rms_no_super_X'].min()
    maxint = data['interface_delta_X'].max()
    minint = data['interface_delta_X'].min()
    #Following lines define everything about the actual figure
    plt.figure(figsize=[16,9])
    plt.xlim(xmin = minrmsd, xmax = maxrmsd)
    plt.ylim(ymin = minint, ymax = maxint)
    plot1 = plt.scatter(x1,y1, s=4, c='Blue', marker='o')
    plot2 = plt.scatter(x2,y2, s=4, c='Red', marker='o')
    plt.tick_params(axis='both',direction='inout',width=1,length=6,labelsize=13,pad=4)
    plt.title('interface_delta_x vs ligand_rms_no_super_X', size=16)
    plt.xlabel("ligand_rms_no_super_X", fontsize=13)
    plt.ylabel("interface_delta_X", fontsize=13)
    plt.legend(['total_score <= average', 'total_score > average'], markerscale=5, fontsize=12)
    #Prompts user to decide on whether to export png file
    printfile()
    #Displays plot
    plt.show()
示例#11
0
def Exchange_rates(Base, Destination):
    data =requests.get('https://api.cryptonator.com/api/full/{}-{}'.format(Base, Destination))
    Data = (data.json()['ticker']['markets'])

    market = []
    price = []
    volume = []

    for A in range(len(Data)):
        market.append(Data[A]['market'])
        price.append(Data[A]['price'])
        volume.append(Data[A]['volume'])
    Difference = float(max(price))-float(min(price))
    print("Minimum Price is \t",max(price)," \tat ", market[price.index(max(price))],
          " \nMaximum Price is \t",min(price)+" \tat ", market[price.index(min(price))],
          "\ndifference \t  is\t", Difference,Destination)

    numbers = (0, len(market))

    plt.scatter(numbers, market, color='red')

    for i, txt in enumerate(price):
        plt.annotate(txt, (numbers[i], market[i]))

    plt.title('ARBITRAGE')
    plt.ylabel('#PRICE')
    plt.show()
示例#12
0
def volumeScatter(df):
    # scatter plot shows ranges of music volumes at different stress levels
    plt.figure()
    plt.scatter(df["Stress"], df["DB"])
    plt.xlabel('Stress levels')
    plt.ylabel('Music volume')
    plt.show()
示例#13
0
def predict_prices(dates, prices, x):
    dates = np.reshape(dates, (len(dates), 1))

    svr_lin = SVR(kernel='linear', C=1e3)
    svr_poly = SVR(kernel='poly', C=1e3, degree=2)
    svr_rbf = SVR(kernel='rbf', C=1e3, gamma=0.1)
    svr_lin.fit(dates, prices)
    svr_poly.fit(dates, prices)
    svr_rbf.fit(dates, prices)

    plt.scatter(dates, prices, color='black', label='Data')
    plt.plot(dates, svr_rbf.predict(dates), color='red', label='RBF model')
    plt.plot(dates,
             svr_lin.predict(dates),
             color='green',
             label='Linear model')
    plt.plot(dates,
             svr_poly.predict(dates),
             color='blue',
             label='Polynomial model')
    plt.xlabel('Date')
    plt.ylabel('Price')
    plt.title('Support Vector Regression')
    plt.legend()
    plt.show()

    return svr_rbf.predict(x)[0], svr_lin.predict(x)[0], svr_poly.predict(x)[0]
示例#14
0
def scatter_plot(P, L, pcIdx1, pcIdx2, letterList, rev):
    fig = plt.figure()
    # following the convention in lecture note ScatterPlot.html
    colors = ["r", "lime", "b", "y", "c", "m", "k", "tan", "pink", "darkred"]
    for i, letter in enumerate(letterList):
        plt.scatter(P[L == letter, pcIdx2],
                    P[L == letter, pcIdx1],
                    s=0.1,
                    c=colors[i],
                    label=letter)
    plt.axes().set_aspect('equal')
    #plt.axes().set_aspect('equal', 'datalim')
    plt.xlabel("Principle Component {}".format(pcIdx2))
    plt.ylabel("Principle Component {}".format(pcIdx1))
    plt.axhline(0, color='grey')
    plt.axvline(0, color='grey')
    plt.ylim([-5000, 5000])
    plt.xlim([-5000, 5000])
    plt.legend()
    plt.gca().invert_yaxis()
    fig.set_size_inches(8, 8)
    fName = os.path.join(
        pDir, 'scatter_PC{}_PC{}_{}_{}.png'.format(pcIdx1, pcIdx2,
                                                   "".join(letterList), rev))
    savefig(fName, bbox_inches='tight')
    plt.show()
def show_graph(x_list, y_list, width, height):
    """ x_list, y_list = x- & y-coordinates to plot
        width, height = size of plot
    """
    plt.figure(figsize=[width, height])  # [width, height]
    plt.scatter(x_list, y_list, marker='.', s=5)
    plt.show()
    return
示例#16
0
def tsne_plot(embedding, expression_value, cmaps="PuRd"):
    plt.scatter(embedding[:, 0],
                embedding[:, 1],
                lw=0.1,
                c=expression_value,
                cmap=plt.cm.get_cmap('PuRd'))
    plt.colorbar(label='expression value')
    plt.show()
def plot_line(x, y, y_hat, line_color='blue'):
    # Plot outputs
    plt.scatter(x, y, color='black')
    plt.plot(x, y_hat, color=line_color, linewidth=3)
    plt.xticks(())
    plt.yticks(())

    plt.show()
def plotPredictions(yActualTrain, yActualVal, yPredTrain, yPredVal):
  ''' Plot both train and validation predictions '''
  plt.figure(figsize=(6, 3))
  plt.subplot(131)
  plt.scatter(yActualTrain, yPredTrain, s=1)
  plt.subplot(132)
  plt.scatter(yActualVal,yPredVal, s=1)
  plt.show()
示例#19
0
def plot_data(X, y, theta=np.array([])):
    """ Plot student admission data on a graph """

    # Set y and x axis labels for scatter plot
    plt.ylabel('Exam 2 score')
    plt.xlabel('Exam 1 score')

    admitted = np.where(y == 1)[0]
    not_admitted = np.where(y == 0)[0]

    # Plot all admitted students
    plt.scatter(X[admitted, :1],
                X[admitted, 1:],
                marker='+',
                label='Admitted',
                c='black')

    # Plot all non-admitted students
    plt.scatter(X[not_admitted, :1],
                X[not_admitted, 1:],
                marker='o',
                label='Not admitted',
                c='yellow',
                edgecolors='black')

    # Set legend for scatter plot
    plt.legend(loc='upper right', fontsize=8)

    # Show best fit line
    if theta.size != 0:
        if theta.size <= 3:
            x_coords = np.array([np.min(X[:, 1]), np.max(X[:, 1])])
            y_coords = (-1 / theta[2]) * (theta[0] + theta[1] * x_coords)
            plt.plot(x_coords, y_coords, 'b-', label='Decision boundary')
        else:
            # Here is the grid range
            u = np.linspace(-1, 1.5, 50)
            v = np.linspace(-1, 1.5, 50)

            z = np.zeros((u.size, v.size))
            # Evaluate z = theta*x over the grid
            for i, ui in enumerate(u):
                for j, vj in enumerate(v):
                    z[i, j] = np.dot(mapFeature(ui, vj), theta)

            z = z.T  # important to transpose z before calling contour
            # print(z)

            # Plot z = 0
            pyplot.contour(u, v, z, levels=[0], linewidths=2, colors='g')
            pyplot.contourf(u,
                            v,
                            z,
                            levels=[np.min(z), 0, np.max(z)],
                            cmap='Greens',
                            alpha=0.4)

    plt.show()
示例#20
0
 def pca_plot(self):
     label = np.unique(self.label)
     with plt.style.context("seaborn-darkgrid"):
         for l in label:
             plt.scatter(self.Y[y == l, 0], self.Y[y == l, 1], label=l)
         plt.xlabel("PC 1")
         plt.ylabel("PC 2")
         plt.legend()
         plt.show()
示例#21
0
def scatter_chart(plt, col1, col2, Title="Scatter Plot"):
    color = ['r']
    results = linregress(col1,col2)
    print results
    plt.scatter(col1,col2)
    plt.plot(col1, col1*results[0] + results[1])
    plt.ylabel(col2.name)
    plt.xlabel(col1.name)
    plt.title(Title)
示例#22
0
def makePlot(muDistr, AiArray, Ai, num):
    #plt.scatter(muDistr,AiArray)
    if (num == 17):
        plt.scatter(muDistr, AiArray)
        title = "P(alpha|x,I) "
        plt.ylabel("Normalized Probability")
        plt.xlabel('x')
        plt.title(title)
        plt.show()
def plotPredictions(clf):
    xx, yy = np.meshgrid(np.arange(0, 250000, 10), np.arange(10, 70, 0.5))
    Z = clf.predict(np.c_[xx.ravel(), yy.ravel()])

    plt.figure(figsize=(8, 6))
    Z = Z.reshape(xx.shape)
    plt.contourf(xx, yy, Z, cmap=plt.cm.Paired, alpha=0.8)
    plt.scatter(X[:, 0], X[:, 1], c=y.astype(np.float))
    plt.show()
示例#24
0
def main():
    #Before anything happens, number of command-line arguments is checked and appropriate action taken.
    argumnumber()
    if (len(sys.argv) < 2):
        file = raw_input("Please provide a sorted_models file: ")
    else:
        file = sys.argv[1]
    #Prompts user to provide maxtot and maxinter before progam continues.
    global maxtot
    maxtot = float(raw_input("Please enter maximum total_score: "))
    global maxinter
    maxinter = float(raw_input("Please enter maximum interface_delta_X: "))

    #Imports DataFrame and filters based on max values provided.
    models_raw = pd.read_csv(
        file, sep=' ', names=['model', 'total_score', 'interface_delta_X'])
    models = models_raw.loc[(models_raw['total_score'] <= maxtot)
                            & (models_raw['interface_delta_X'] <= maxinter)]
    sumavrg = (
        (np.sum(models['total_score']) + np.sum(models['interface_delta_X'])) /
        (len(models['total_score'])))
    #Appends a column to models that contains the sum of total_score and interface_delta_X
    sumarr = (models['total_score'] + models['interface_delta_X'])
    models['add'] = sumarr
    #Finds values with lowest sum
    modhi = models.loc[models['add'] > sumavrg]
    modlo = models.loc[models['add'] <= sumavrg]
    minidx = models['add'].idxmin()
    xmin = models.iloc[minidx]['total_score']
    ymin = models.iloc[minidx]['interface_delta_X']

    #Creates the plot.
    x1 = modlo['total_score']
    y1 = modlo['interface_delta_X']
    x2 = modhi['total_score']
    y2 = modhi['interface_delta_X']
    plt.figure(figsize=[16, 9])
    plot1 = plt.scatter(x1, y1, s=2, c='Green', marker='.')
    plot2 = plt.scatter(x2, y2, s=2, c='Red', marker='.')
    plt.tick_params(axis='both',
                    direction='inout',
                    width=1,
                    length=6,
                    labelsize=13,
                    pad=4)
    plt.title('interface_delta_x vs total_score', size=16)
    plt.xlabel("total_score", fontsize=13)
    plt.ylabel("interface_delta_X", fontsize=13)
    plt.legend(['Sum <= average', 'Sum > average'], markerscale=7, fontsize=12)
    plt.annotate(xy=(xmin, ymin),
                 s="Lowest sum: total_score: " + str(xmin) +
                 "; interface_delta_X: " + str(ymin),
                 textcoords='axes fraction',
                 xytext=(0.6, 0.05))
    printtofile()
    plt.show()
示例#25
0
def mandlebrot(x, y):
    for xval in range(x):
        for yval in range(y):
            val = 0
            iteration = 0
            while val in range(20):
                val = val * val + x + y * 1j
                iteration = iteration + 1
            plt.scatter(x, y, color=(0, 0, 255, iteration))
    plt.show
示例#26
0
def feature_summary(x_col, y_col, show_r2=False):
    """Gives a summary of a feature

    :return:
    """
    # Preparation
    x_name = x_col.name
    y_name = y_col.name
    df = pd.concat([x_col, y_col], axis=1).sort_index()
    plt.rcParams["figure.figsize"] = (10, 7)
    breaks(1)
    print("%s" % x_name)
    print('Quantile:\n', x_col.quantile([0.0, 0.1, 0.25, 0.5, 0.75, 1.0]))

    # Histogram
    plt.subplot(221)
    try:
        plt.hist(x_col, bins=30)
        plt.xlabel(x_name)
        plt.title('Histogram (CF GHP): %s' % x_name)
    except ValueError:
        print("No histogram for %s available" % x_name)

    # Correlation
    if y_name != x_name:
        df = df.sort_values(x_name)
        # df[x_name + "_2"] = df[x_name] * df[x_name]
        # df[x_name + "_3"] = df[x_name] * df[x_name] * df[x_name]
        x = df.drop(y_name, 1)
        reg = linear_model.LinearRegression(normalize=True)
        reg.fit(x, df[y_name])
        # Plot
        plt.subplot(222)
        plt.scatter(df[x_name], df[y_name])
        plt.plot(df[x_name], reg.predict(x), color='g')
        plt.xlabel(x_name)
        plt.xlim([df[x_name].min(), df[x_name].max()])
        plt.title('x:%s / y:%s ' % (x_name, y_name))
        plt.ylabel("Target function: %s" % y_name)
        if show_r2:
            print("R²:", r2_score(df[y_name], reg.predict(x)))
            print(feature_importance(x, reg.coef_))

    # Show plots
    plt.show()

    # Timeline
    x_col.rolling(window=10,
                  center=False).mean().plot(title='%s: Timeline' % x_name,
                                            figsize=(10, 2),
                                            xlim=(170000, 175000))
    plt.show()

    plt.close('all')
    return " "
    def plot_various_trial_analyses(self,neuron_ind, var_level):
        plt.figure(figsize=(16, 5))

        #the first thing we want to do is just plot the data average
        #so first get the data for all trials
        neuron_i_data_by_trial = self.by_trial_IT_Neural_Data_objmeans_sorted_by_category[var_level][:, :, neuron_ind]
        #now take the mean over the second dimension -- the trial dimension
        neuron_i_data_trial_mean = neuron_i_data_by_trial.mean(1)
        #for convenience, let's compute the min and max values of the neural response
        minval = neuron_i_data_trial_mean.min()
        maxval = neuron_i_data_trial_mean.max()
        #now let's plot the responses across objects
        plt.plot(neuron_i_data_trial_mean)
        #and block stuff to make the categories easier to see
        plt.fill_between(np.arange(64), minval, maxval, 
                         where=(np.arange(64) / 8) % 2, color='k', alpha=0.2)
        plt.xticks(np.arange(0, 64, 8) + 4, self.unique_categories, rotation=30);
        plt.ylabel('Neural Response of neuron %d' % neuron_ind)
        plt.ylim(minval, maxval)
        plt.xlabel('Responses for Variation %s images' % var_level)

        #now let's look at two trials -- the first and 6th ones, for example 
        t1 = 0; t2 = 5
        t1_data = neuron_i_data_by_trial[:, t1]
        t2_data = neuron_i_data_by_trial[:, t2]
        plt.figure(figsize=(12, 5))
        plt.subplot(1, 2, 1)
        plt.plot(t1_data)
        plt.xticks(np.arange(0, 64, 8), self.unique_categories, rotation=30);
        plt.title('Neuron %d, trial %d, var %s' % (neuron_ind, t1, var_level))
        plt.subplot(1, 2, 2)
        plt.plot(t2_data)
        plt.xticks(np.arange(0, 64, 8), self.unique_categories, rotation=30);
        plt.title('Neuron %d, trial %d, var %s' % (neuron_ind, t2, var_level))

        #let's do a scatter plot of the responses to one trial vs the other
        plt.figure()
        plt.scatter(t1_data, t2_data)
        plt.xlabel('responses of neuron %d, trial %d, %s'% (neuron_ind, t1, var_level))
        plt.ylabel('responses of neuron %d, trial %d, %s'% (neuron_ind, t2, var_level))

        #how correlated are they exactly between trials? let's use pearson correlation
        rval = stats.pearsonr(t1_data, t2_data)[0]
        plt.title('Correlation for varlevel %s images = %.3f' % (var_level, rval))

        #in fact, let's have a look at the correlation for all pairs of trials 
        fig = plt.figure(figsize = (7, 7))
        #the numpy corrcoef function basically gets the pairwise pearson correlation efficiently
        corrs = np.corrcoef(neuron_i_data_by_trial.T)
        #now let's plot the matrix of correlations using the matshow function
        plt.colorbar(fig.gca().matshow(corrs))
        plt.xlabel('trials of neuron %d' % neuron_ind)
        plt.ylabel('trials of neuron %d' % neuron_ind)
        plt.title('Between-trial correlations for varlevel %s' % var_level)
def plot_regression_line(x, y, b):
    plt.scatter(x, y, color="m", marker="o", s=30)

    y_pred = b[0] + b[1] * x

    plt.plot(x, y_pred, color="g")

    plt.xlabel('x')
    plt.ylabel('y')

    plt.show()
示例#29
0
文件: plot.py 项目: fengjiran/pyml
def plot(x, y, **kwargs):
    """can only do 2D plot right now"""
    assert (x.shape[-1] == 2)
    color = (y + 2) / 5
    if 'accuracy' in kwargs:
        accuracy = kwargs['accuracy']
    plt.figure()
    plt.scatter(x[:, 0], x[:, 1], c=color)
    if 'title' in kwargs:
        plt.suptitle(kwargs['title'])
    if 'accuracy' in kwargs:
        plt.title("Accuracy: %.1f%%" % (kwargs['accuracy'] * 100), fontsize=10)
    plt.show()
示例#30
0
    def plot_points(self):
        points = self.points
        x_pts = [pt[0] for pt in points]
        y_pts = [pt[1] for pt in points]
        col = [pt[3] for pt in points]

        plt.figure()
        plt.scatter(x_pts, y_pts, c=col)
        # plt.axes([0, 10, 0, 10])
        plt.ylim(-15, 15)
        plt.xlim(0, 15)
        # plt.axes(xlim=(-5, 5), ylim=(0, 3.5))
        plt.show()
示例#31
0
    def scatterPlot(self):
        axis = [
            min(self.tdata[:, 0]) - 1,
            max(self.tdata[:, 0]) + 1,
            min(self.tdata[:, 1]) - 1,
            max(self.tdata[:, 1]) + 1
        ]

        setx = np.linspace(axis[0], axis[1])
        plt.scatter(self.tdata[:, 0], self.tdata[:, 1])
        plt.plot(setx, self.modelFunction(setx))
        plt.axis(axis)
        plt.show()
示例#32
0
def print_scatter_data():
	import matplotlib.pylab as plt
	filename = Par.dirname + ('/Scatter.dat')
	fitnesses = []
	self_reliences = []
	life_times = []
	self_reliences_dead = []

	for Agent in Par.Agents:
		if Agent.dead != True:
			fitnesses.append(Agent.fitness)
			Needs = Agent.needs
			Production = Agent.production
			selfReli = [0.0]*Par.num_resources
			for i in range(Par.num_resources):
				selfReli[i] = Production[i]*Needs[i]
			self_reliences.append(abs(sum(selfReli)))
		else:
			life_time = Agent.t_death - Agent.t_discovery 
			life_times.append(life_time)
			Needs = Agent.needs
			Production = Agent.production
			selfReli = [0.0]*Par.num_resources
			for i in range(Par.num_resources):
				selfReli[i] = Production[i]*Needs[i]
			self_reliences_dead.append(abs(sum(selfReli)))
			
	
	file =open(filename, 'w')

	for i in range(len(fitnesses)):
		s= str(fitnesses[i]) +'		'+ str(self_reliences[i])
		file.write(s)
		file.write('\n')
	file.close()	


	plt.scatter(self_reliences, fitnesses)
	plt.ylabel('Fitness')
	plt.xlabel('self_reliences')
	plt.savefig('FitnessVSR.png')
	plt.close()
	plt.scatter(self_reliences_dead, life_times)
	plt.ylabel('LifeTimes')
	plt.xlabel('self_reliences')
	plt.savefig('LifeTimeVSR.png')
	plt.close()
示例#33
0
import matplotlib as plt
import numpy as np
import pandas as pd

dat=pd.read_csv('Voters.csv').as_matrix()
x=dat[:,0]
y=dat[:,1]
plt.scatter(x,y)
plt.show()
plt.hist(x)
plt.hist(y,bins=15)


#images
train=pd.read_csv('test.csv')
M=train.as_matrix()
im=M[0,1:]
im=im.reshape(28,28)
M=train.as_matrix()
plt.imshow(im)
plt.show()
plt.imshow(im,cmap="gray")


from scipy.stats import norm
norm.pdf(0)
norm.pdf(0,loc=5, scale=10)
r=np.random.randn(10)
norm.pdf(r)
norm.cdf(r)
r=10*np.random.randn(10000)+5
df = pd.DataFrame(ground_cricket_data)

# instantiate LinearRegression class
regr = linear_model.LinearRegression()

# define variables
x = df['Ground Temperature']
# pandas.Series.to_frame() returns a data frame
x = x.to_frame()
y = df['Chirps/Second']

# fit the object to the data
regr.fit(x, y)

# plot using equation
plt.scatter(x, y)
# use attributes to plot linear regression equation
# y = β0 + β1x where β0 is intercept and β1 is coefficient
plt.plot(x, (regr.intercept_ + (regr.coef_ * x)))

# plot using max/min as data for prediction
# create a new data frame of min and max feature values
df_new = pd.DataFrame({'Ground Temp': [df['Ground Temperature'].min(),
                       df['Ground Temperature'].max()]})
plt.scatter(x, y)
# plot the new frame against the prediction
plt.plot(df_new, regr.predict(df_new))

# calculate the r-squared score (or coefficient of determination)
regr.score(x, y)
###Note, this function will auto change, it acts global.
seqDB, data_genes, data_isoforms = qcf.bioReplicateSelfCorrelate(seqDB,data_genes, data_isoforms,
                                                                 excludeSelf=1) ###Exclude self avoids self comparison

#
pairs = np.zeros([len(seqDB),2])

#xToPlot= 'pearsonCorrToMTT'
#yToPlot = 'pearsonCorrToMeanReplicate'
xToPlot= 'spearmanCorrToMeanReplicate'
yToPlot = 'pearsonCorrToMeanReplicate'
for rowId in range(len(seqDB)):
    pairs[rowId,0] = seqDB.loc[rowId,xToPlot] #seqDB['num_cells'] ###X AXIS
    pairs[rowId,1] = seqDB.loc[rowId,yToPlot] ####Y AXIS
    
plt.scatter(pairs[:,0],pairs[:,1])
plt.xlabel(xToPlot)
plt.ylabel(yToPlot)
axes = plt.gca()
axes.set_xlim([0.8,1.05])
axes.set_ylim([0.8,1.05])
fig = plt.gcf()
fig.set_size_inches(15,10)


# In[12]:

seqDB


# In[21]:
示例#36
0

# In[67]:

#4
categoricalFreq_rel=categoricalFreq.div(categoricalFreq.sum(1).astype(float))
categoricalFreq_rel
categoricalFreq_rel.plot(kind='barh', stacked=True)
title('MPG by Efficiency(stacked)')
savefig('stacked', dpi=400, bbox_inches='tight')


# In[64]:

#5
plt.scatter(carData['barrels08'], carData['highway08'])
plt.title('Barrel Consumption vs Highway MPG')


# In[2]:

#Part 2
import scipy as sp
import sklearn as sk


# In[3]:

#2
medicalData=pd.read_table('Medical.csv', header=False, sep=',')
medicalData
示例#37
0
import numpy as np
import matplotlib as plt
import sys, string, os

n_archivos = len(sys.argv)

for i in range(n_archivos)
	datos = np.loadtxt(sys.argv[i])

	N = np.shape(datos)[0]

	for i in range(N):
    	plt.scatter(datos[i,0],datos[i,1])
	plt.show()
df["time"] = [t[11:13] + t[14:16] for t in df["lastupdated"]]
df["day"] = [date2int(t) for t in df["lastupdated"]] # day 0 is May 17, 2014
df["dayofwk"] = [(t+6)%7 for t in df["day"]] # 0 indexed Sunday
df.head()

# <codecell>

plt.figure(figsize=(10,15))

im = plt.imread('chicago.png')
implot = plt.imshow(im)

x = (df['west'] - df['west'].min())*477/(df['east'].max() - df['west'].min())
y = 798-(df['north'] - df['south'].min())*798/(df['north'].max() - df['south'].min())
s = df['currentspeed'] / df['currentspeed'].max()
plt.scatter(x,y,c=s,linewidth=0,s=1000,alpha=0.1)

#x0 = (df.ix[0]['west'] - df['west'].min())*477/(df['east'].max() - df['west'].min())
#y0 = 798-(df.ix[0]['north'] - df['south'].min())*798/(df['north'].max() - df['south'].min())
#plt.scatter(x0,y0,c='r',s=2000)
#x0 = (df.ix[0]['east'] - df['west'].min())*477/(df['east'].max() - df['west'].min())
#y0 = 798-(df.ix[0]['south'] - df['south'].min())*798/(df['north'].max() - df['south'].min())
#plt.scatter(x0,y0,c='r',s=2000)
plt.xlim(0,477)
plt.ylim(798,0)
plt.xticks([])
plt.yticks([])
#plt.plot([df['west'],df['west'],df['east'],df['east'],df['west']],[df['south'],df['north'],df['north'],df['south'],df['south']],linewidth=20,alpha=0.2)

# <codecell>
plt.show()

# Dendrogram
from scipy.spatial.distance import pdist
from scipy.cluster.hierarchy import linkage, dendrogram, fcluster, fclusterdata

distanceMatrix = pdist(data)

dend = dendrogram(linkage(distanceMatrix, method='complete'), color_threshold=2, leaf_font_size=10, labels = df.yearID.tolist())
          
assignments = fcluster(linkage(distanceMatrix, method = 'complete'), 2, 'distance')

cluster_output = pandas.DataFrame({'team':df.yearID.tolist(), 'cluster':assignments})
cluster_output

plt.scatter(df.total_salaries, df.total_runs, s=60, c=cluster_output.cluster)

# Got the following code when I tried to improve the plot
# AttributeError: 'int' object has no attribute 'view'

#colors = cluster_output.cluster
#colors[colors == 1] = 'b'
#colors[colors == 2] = 'g'
#colors[colors == 3] = 'r'
#
#plt.scatter(df.total_salaries, df.total_runs, s=100, c=colors,  lw=0)

############################################################################

# Principal component analysis
示例#40
0
def plotDistribution(dist):
	for k in dist.index:
		alpha = np.array([dist[x][k] for x in dist])*100
		x = [k for x in np.arange(0,len(alpha))]
		y = dist.columns
		plt.scatter(x,y,c=alpha,marker='s',linewidths=0,cmap='Oranges',norm=pltcolors.Normalize(vmin=0,vmax=1),vmin=0,vmax=1,edgecolors=None)
示例#41
0
pd.set_option('display.max_rows', 3000)
pd.set_option('display.width', 100000)





df = pd.read_csv('../input/2013_NCAA_Game.csv')

pd.scatter_matrix(df)


pd.scatter_matrix(df, diagonal='kde')


hist(df['Team Avg Scoring Margin'])


plt.scatter(df['Team Score'], df['Team Margin'])



pf = pd.read_csv('../input/clean_player_data.csv')
pf = pf.drop_duplicates()


tt = pf.groupby('Team').mean()



import sys
import numpy as np
import matplotlib as plt
import math

a=sys.argv[1]
data=np.loadtxt(a)

x=data[:,3]
y=data[:,4]

figura=plt.polyfit(x,y,1)
plt.scatter(x,y)
plt.plot(x,y*figura[0]*(y**4) + figura[i]*(y**3), figura[2]*(y**2), figura[3]*y + fit[4])
plt.xlabel("Pasos (x)", frontsize=20)
plt.ylabel("Distancia (y)", frontsize =20)
plt.tittle("Numero de Pasos VS Distancia con la Regresion", frontsize =15)
plt.savefig('ajuste.png')



示例#43
0
        initialCon.append(-0.01 / 300.0 * x)

# Ensuring boundary conditions
#initialCon[0] = 0
#initialCon[-1] = 0


yPrev = initialCon
yCurrent = initialCon
sample = []
 
for n in range(0,10):
    for i in range(0,len(x)):
        if i !=0 and i != 649:
           yNew = ((2 - 2*r**2 - 6 * eps * r**2 * N**2)*yCurrent[i] 
                   - yPrev[i] + r**2*(1 + 4*eps*N**2)*(yCurrent[i+1] 
                    + yCurrent[i-1]) - eps*r**2*N**2 *(yCurrent[i+2] 
                    + yCurrent[i-2]))
                   
        yPrev = copy(yCurrent)
        yCurrent = copy(yNew)
        plt.scatter(yNew)
        plt.draw()
        plt.pause()
        plt.clf()
                    
                   
                   
        
    
示例#44
0

#%% USPS version
pca = PCA(n_components=5)

X_trans = pca.fit_transform(X)

data = np.hstack((X_trans, np.matrix(y).T))

#%%



np.random.shuffle(data)
sample = data[:500,:]

#%%
pl.figure()


for i in range(5):
    for j in range(5):
        try:
            #pl.subplot(5,5,((j)*5)+i)
            pl.figure()
            pl.scatter(sample[:,i].A1, sample[:,j].A1,  20, sample[:,5].A1)
            pl.show()
        except IndexError:
            print i,j

示例#45
0
plt.hist(data.year, bins=np.arange(1950, 2013), color='#cccccc')
plt.xlabel("Release Year")
remove_border()
# Received following message: 
# Traceback (most recent call last):
#   File "<stdin>", line 1, in <module>
# AttributeError: 'module'object has not attribute 'hist'
# AND
# AttributeError: 'module'object has not attribute 'xlabel'

plt.hist(data.score, bins=20, color='#cccccc')
plt.xlabel("IMDB rating")
remove_border()
# Again, I'm receiving AttributeError messages. Is there an issue with the matplotlib that is not allowing me to produce a histogram?

plt.scatter(data.year, data.score, lw=0, alpha=.08, color='k')
plt.xlabel("Year")
plt.ylabel("IMDB Rating")
remove_border()
# Again, I'm receiving AttributeError message.

data[(data.votes > 9e4) & (data.score < 5)][['title', 'year', 'score', 'votes', 'genres']]

data[data.score == data.score.min()][['title', 'year', 'score', 'votes', 'genres']]

data[data.score == data.score.max()][['title', 'year', 'score', 'votes', 'genres']]

genre_count = np.sort(data[genres].sum())[::-1]
pd.DataFrame({'Genre Count': genre_count})
# The genres were not listed alongside the counts
print r("summary(rdata)")

r("?princomp")

r("p = princomp(rdata)")

print r("names(p)")

print r("head(p$scores, n=6")

irisPd = pd.DataFrame(r.get("p$scores"),columns=['pc1','pc2','pc3','pc4','pc5'])

irisPd.head()

mat.scatter(irisPY.Comp1,irisPY.Comp2).
title('Iris').
xlabel('Primary Component 1').
ylabel('Primary Component 2')
	
mat.show()
	  
	  
colors = ['red', 'green', 'blue']
labels = ['Setosa', 'Virginica', 'Versicolor']

fig = mat.figure()
ax = mat.add_subplot(1, 1, 1)

ax.set_xlabel('Primary Component 1')
ax.set_ylabel('Primary Component 2')
ax.set_title('Species Data')
示例#47
0
    
    tama = []
    tam = 0
    clustersx = np.empty((0))
    clustersy = np.empty((0))
    for i in range(n_centros):
        arr_x = np.empty((0))
        arr_y = np.empty((0))
        
        for j in range(n_puntos):
            if (int(minimos[j])==i):
                arr_x = np.append(arr_x,posx[j])
                arr_y = np.append(arr_y,posy[j])
        tam += np.size(arr_x)
        tama.append(tam)
        
        clustersx = np.append(clustersx,arr_x)
        clustersy = np.append(clustersy,arr_y)
    
    clustersx = np.split(clustersx,tama)
    clustersy = np.split(clustersy,tama)


for i in range(n_centros):
    plt.xlabel("Puntos en x")
    plt.ylabel("Puntos en y")
    plt.title("K-means clustering")
    plt.scatter(clustersx[i],clustersy[i],c=colores[i])
    plt.scatter(centrosx[i],centrosy[i],c=colores[i],s=110)

plt.show()
示例#48
0
import numpy as np
import matplotlib as pl
n = 1024
X = np.random.normal(0,1,n)
Y = np.random.normal(0,1,n)

pl.scatter(X,Y)
示例#49
0
文件: main.py 项目: 0x5b/nnpython
import matplotlib as plt
import numpy as np
from sklearn import datasets, linear_model

def generate_data():
	np.random.seed(0)
	X, y = datasets.make_moons(200, noise=0.20)
	return X, y

class Config:
	nn_input_dim = 2
	nn_output_dim = 2
	epsilon = 0.01
	reg_lambda = 0.01

def visualize(X, y, model):
	plot_decision_boundary(lambda x: predict(model, x), X, y)
	plt.title("Logistic regression")

def plot_decision_boundary(pred_func, X, y):
	pass

if __name__ == "__mail__":
	X, y = generate_data()
	plt.scatter(X[:,0], X[:,1], s=40, c=y, cmap=plt.cm.Spectral)
	plt.show()
def read():
	pickle_file = open("pickled_data.pkl", "r")
	t = pickle.load(pickle_file)
	v = pickle.load(pickle_file)
	print t
	print v

initialize(v, s, t, dt, n)
calculate(v, s, t, dt, n)
store(v, t, n)

#plot
plt.figure(1)
plt.subplot(211)
plt.plot(t, v,"g-", linewidth=2.0)
plt.scatter(t, v)
plt.title('The Velocity of a Free Falling Object')
plt.xlabel('Time($t$)', fontsize=14)
plt.ylabel('Velocity($m/s$)', fontsize=14)
plt.text(3,-60,r'$g = 9.8 m/s^2$', fontsize=16)
plt.grid(True)

plt.subplot(212)
plt.plot(t, s,"g-", linewidth=2.0)
plt.scatter(t, s)
plt.title('The Displacement of a Free Falling Object')
plt.xlabel('Time($t$)', fontsize=14)
plt.ylabel('Displacement($m$)', fontsize=14)
plt.text(3,-300,r'$g = 9.8 m/s^2$', fontsize=16)
plt.grid(True)