示例#1
0
def biplot(df, x_name, y_name):
    fig, ax = plt.subplots()
    ax.grid(False)
    x = df[x_name]
    y = df[y_name]
    plt.scatter(x,y,c='blue', edgecolors='none',alpha=0.5)
    abline_plot(intercept=p.Intercept, slope=p.DEP_DELAY,ax=plt.gca(),color="brown")
    plt.xlabel("Departure delay")
    plt.ylabel("Arrival delay")
    plt.show()
 def test_abline_remove(self, close_figures):
     mod = self.mod
     intercept, slope = mod.params
     fig = plt.figure()
     ax = fig.add_subplot(111)
     ax.scatter(self.X[:,1], self.y)
     abline_plot(intercept=intercept, slope=slope, ax=ax)
     abline_plot(intercept=intercept, slope=2*slope, ax=ax)
     lines = ax.get_lines()
     lines.pop(0).remove()
     close_or_save(pdf, fig)
示例#3
0
 def test_abline_remove(self):
     mod = self.mod
     intercept, slope = mod.params
     fig = plt.figure()
     ax = fig.add_subplot(111)
     ax.scatter(self.X[:, 1], self.y)
     abline_plot(intercept=intercept, slope=slope, ax=ax)
     abline_plot(intercept=intercept, slope=2 * slope, ax=ax)
     lines = ax.get_lines()
     lines.pop(0).remove()
     close_or_save(pdf, fig)
示例#4
0
 def plot_scatter_and_line(self, result):
     '''(d)(f)'''
     fig = plt.figure()
     ax = fig.add_subplot(111)
     ax.scatter(self.x, self.y, c='w')
     ax.set_ylabel("y")
     ax.set_xlabel("x")
     rp.abline_plot(intercept=-1, slope=0.5, ax=ax, c='r', label="model fit")
     rp.abline_plot(model_results=result, ax=ax, c='g', label="pop.regression")
     plt.legend(loc='lower right', shadow=True, fontsize='medium')
     plt.show()
示例#5
0
def calibrate(data, plot = True, pdf = "rtcalibration.pdf"):
    """Fits a linear model to the irts"""
    filename = data.iloc[0,0]
    mod = sm.ols(formula = 'irt ~ rt', data = data)
    res = mod.fit()
    # scatter-plot data
    ax = data.plot(x='rt', y='irt', kind='scatter')
    a = abline_plot(model_results=res, ax=ax)
    a = abline_plot(intercept=0, slope=1, ax=ax)
    # print(res.rsquared)
    a.suptitle("%s \n R2: %s \n R2adj: %s" % (filename, res.rsquared, res.rsquared_adj), fontsize = 10)
    # text(0.9, 0.1,("R2:"), ha='center', va='center', transform=ax.transAxes)
    a.savefig(pdf, format = 'pdf')
    matplotlib.pyplot.close()
    return [filename, res.params.Intercept, res.params.rt]
示例#6
0
    def simple_regession(self):
        ''' The answer of exercise03-08:
        (a)
            (i)  Yes, from F-stat
            (ii) Explain it from RSE and R^2 stat
            (iii)negative
            (iv) Code, no prediction interval
        (b) Code
        (c) Residual/fitted: non-linearity
        '''

        # model = smf.ols(formula="mpg ~ horsepower", data=self.df)
        y = self.df['mpg']
        X = self.df[['horsepower']]
        X = sm.add_constant(X)
        print X
        res = sm.OLS(y, X).fit()
        # res = model.fit()
        print res.summary()

        print "The prediction is: ", res.predict(exog=[[1, 98]])
        print "The prediction interval is: "

        '''
        self.df.plot(kind="scatter", x='horsepower', y='mpg', c='w')
        graph_x = np.linspace(min(self.df['horsepower']), 200)
        graph_y = res.predict(sm.add_constant(graph_x))
        plt.plot(graph_x, graph_y)
        '''
        fig = rp.abline_plot(model_results=res)
        ax = fig.axes[0]
        ax.scatter(X['horsepower'], y, c='w')
        plt.show()
        lrplot.plot_R_graphs(res)
示例#7
0
 def test_abline_ab_ax(self):
     mod = self.mod
     intercept, slope = mod.params
     fig = plt.figure()
     ax = fig.add_subplot(111)
     ax.scatter(self.X[:,1], self.y)
     fig = abline_plot(intercept=intercept, slope=slope, ax=ax)
     plt.close(fig)
示例#8
0
 def test_abline_ab_ax(self):
     mod = self.mod
     intercept, slope = mod.params
     fig = plt.figure()
     ax = fig.add_subplot(111)
     ax.scatter(self.X[:, 1], self.y)
     fig = abline_plot(intercept=intercept, slope=slope, ax=ax)
     close_or_save(pdf, fig)
示例#9
0
def create_scatter(df, module):
    #to create a scatter plot, we find each student, get their average mark across all modules except this one and plot it against this one.
    df2 = pd.DataFrame({
        "avg": (df.sum(axis=1) - df[module]) / (df.count(axis=1) - 1),
        module:
        df[module]
    })

    plt.clf()
    sns.set_context("notebook", font_scale=0.5)
    sns.set(style="white")
    ax = df2.plot.scatter(x=module, y="avg", c='Black')
    model = sm.OLS(df2["avg"], sm.add_constant(df2[module]), missing='drop')
    abline_plot(model_results=model.fit(), ax=ax, c="Red")
    abline_plot(intercept=0, slope=1, ax=ax, c="Blue")

    ax.set(xlabel=module, ylabel="Average Mark")
    ax.set_xlim(0, 22)
    ax.set_ylim(0, 22)
    for l in [9, 12, 15, 18]:
        ax.axhline(y=l, c='Blue')
        ax.axvline(x=l, c='Blue')
    plt.savefig(module + "Scatter.png")
示例#10
0
def plot_all_experiments(datasets: list,
                         experiment_names: list,
                         test_train_split: float = None,
                         savefig: str = "...",
                         ols_line: bool = False,
                         legend_kwargs: dict = None) -> None:
    fig, ax = plt.subplots(8, 1)
    fig.set_size_inches(6, 24)

    for i, dataset in enumerate(datasets):
        axis = ax[i]
        axis.scatter(x=dataset.index, y=dataset[0], s=1)
        axis.set_title(f"Experiment {experiment_names[i]}")

        if test_train_split is not None:  # plot vertical line
            test_train_split_index = int(len(dataset) * test_train_split)
            axis.axvline(x=test_train_split_index,
                         color="grey",
                         linestyle="--",
                         linewidth=1)

        if ols_line:
            x = sm.add_constant(dataset.index)
            y = dataset[0]
            abline_plot(model_results=sm.OLS(y, x).fit(),
                        ax=axis,
                        color="black",
                        linewidth=1)

        if legend_kwargs:
            axis.legend(**legend_kwargs)

    plt.tight_layout()

    if savefig:
        plt.savefig(savefig)
    plt.show()
def pltDfrXY(cDfr, dIPlt, pF='Hugo.pdf', cOff=0, pltAxXY=None, cModel=None):
    assert cDfr.shape[1] > 1
    sTtl, xLbl, yLbl = dIPlt['title'], dIPlt['xLbl'], dIPlt['yLbl']
    tpMark, szMark, ewMark = dIPlt['tpMark'], dIPlt['szMark'], dIPlt['ewMark']
    styLn, wdthLn, lClr = dIPlt['styLn'], dIPlt['wdthLn'], dIPlt['lClr']
    xLim = (dIPlt['xLimB'], dIPlt['xLimT'])
    yLim = (dIPlt['yLimB'], dIPlt['yLimT'])
    if cModel is not None:
        cFig = regplt.abline_plot(model_results=cModel)
        cAx = cFig.axes[0]
    else:
        cFig, cAx = plt.subplots()
    for k in range(1, cDfr.shape[1]):
        cClr = lClr[(cOff + k - 1) % len(lClr)]
        cAx.plot(cDfr.iloc[:, 0],
                 cDfr.iloc[:, k],
                 marker=tpMark,
                 ms=szMark,
                 mew=ewMark,
                 mec=cClr,
                 mfc=cClr,
                 ls=styLn,
                 lw=wdthLn,
                 color=cClr)
    decorateSaveFigLegOut(pF,
                          cFig,
                          cDfr,
                          sTtl,
                          xLbl,
                          yLbl,
                          xLim,
                          yLim,
                          nmCX=cDfr.columns[0],
                          nmCY=cDfr.columns[k],
                          pltAxXY=pltAxXY)
    plt.close()
示例#12
0

plotDf=pandas.DataFrame()
plotDf['total']=df.sum(axis=1)

for c in df.columns:
        plotDf[c+"y"]=(plotDf['total']-df[c])/(df.count(axis=1)-1)
        plotDf[c]=df[c]

plotDf=plotDf.replace(to_replace=[0,np.inf,-np.inf],value=None)

for c in df.columns:
        ax=plotDf.plot.scatter(x=c,y=c+"y",c='Black')
        try: #handle case where LSF can't occur
          model=sm.OLS(plotDf[c+"y"],sm.add_constant(plotDf[c]),missing='drop')
          abline_plot(model_results=model.fit(),ax=ax,c='Red')
          abline_plot(intercept=0,slope=1,ax=ax,c='Blue')
        except:
          pass

        ax.set(xlabel=c,ylabel="Average Mark (CGS)")
        ax.set_xlim(0,22.5)
        ax.set_ylim(0,22.5)
        for l in [9,12,15,18]: #draw lines for first etc boundaries
          ax.axhline(y=l,c='Blue')
          ax.axvline(x=l,c='Blue')
        plt.savefig(c+"_CGS.pdf")

plt.clf()
seaborn.set_context("notebook",font_scale=0.5)
seaborn.violinplot(data=df,cut=0,fontsize=8)
示例#13
0
 def test_abline_ab(self):
     mod = self.mod
     intercept, slope = mod.params
     fig = abline_plot(intercept=intercept, slope=slope)
     close_or_save(pdf, fig)
示例#14
0
 def test_abline_model_ax(self):
     fig = plt.figure()
     ax = fig.add_subplot(111)
     ax.scatter(self.X[:, 1], self.y)
     fig = abline_plot(model_results=self.mod, ax=ax)
     close_or_save(pdf, fig)
示例#15
0
 def test_abline_model(self):
     fig = abline_plot(model_results=self.mod)
     ax = fig.axes[0]
     ax.scatter(self.X[:, 1], self.y)
     close_or_save(pdf, fig)
    plot_acf(value, lags=50)
    pcor = pacf(value, nlags=50)
    plt.plot(pcor)
    #plt.show()

################################# tendance (sert pas)
from statsmodels.api import OLS
from statsmodels.graphics.regressionplots import abline_plot

X = np.ones((len(r_rachat_sous), 2))
X[:, 1] = np.arange(0, len(r_rachat_sous))
reg = OLS(r_rachat_sous, X)
results = reg.fit()
results.params

fig = abline_plot(model_results=results)
ax = fig.axes[0]
ax.plot(X[:, 1], r_rachat_sous, 'r')
ax.margins(.1)
#plt.show()

############## test de Ljung Box - Bruit blanc

import statsmodels as sm
from statsmodels import *

#on enlève rachat et souscription qui ne sont pas stationaires

for key, value in element.items():
    try:
        res = sm.tsa.arima_model.ARMA(value, (1, 1)).fit(disp=-1)
示例#17
0
def plot_dosage_by_rsID(gene_reference, dos, cov_mat, counts,
                        title=None, ax=None,
                        additional_covar=None,
                        adjx=True, adjy=True):
    """
    Arguments:
    ---------
    gene_reference - a gene reference object
    meqtl - a list of matrix-eQTL objects, one for each chromosome
    cov_mat - covariate matrix
    counts - counts
    additional_covar - a matrix of same rows as cov_mat to add to the model
    """
    gr = gene_reference
    cov_mat_t = cov_mat.copy(deep=True)
    try:
        geno = dos.ix[gr.rsID, cov_mat_t.index]
    except pd.core.indexing.IndexingError:
        geno = dos.ix[cov_mat_t.index]
    cov_mat_t[gr.rsID] = geno
    cov_mat_t = cov_mat_t.ix[cov_mat_t[gr.rsID].notnull() , :]
    geno = geno[geno.notnull()]
    c = counts.ix[gr.gene, cov_mat_t.index]
    cov_mat = cov_mat.ix[cov_mat_t.index,:]
    if adjx:
        results = sm.OLS(geno, cov_mat).fit()
        adj_dos_mat = geno -\
            np.dot(results.params, cov_mat.T)
    else:
        adj_dos_mat = geno
    if adjy:
        results = sm.OLS(c, cov_mat).fit()
        adj_counts = c - np.dot(results.params, cov_mat.T)
        const = results.params.const
    else:
        adj_counts = c
        const = 0
    # Need to grab original genotypes
    colors = []
    # Make this into a function
    color_dict = np.linspace(0, 1, 3)
    for i in geno:
        if i <= 0.5: colors.append(color_dict[0])
        elif i > 0.5 and i <= 1.5: colors.append(color_dict[1])
        else: colors.append(color_dict[2])
    if ax:
        ax_orig = True
    else:
        ax_orig = None
        fig, ax = plt.subplots(nrows=1, ncols=1, sharey=False,
                               sharex=False, subplot_kw=dict(axisbg='#FFFFFF'))
    ax.scatter(adj_dos_mat, adj_counts + const, s=50,
               c=colors)
    xticks = ax.get_xticks()
    yticks = ax.get_yticks()
    ax.set_xticks(xticks[1::2])
    ax.set_yticks(yticks[1::2])
    fitted_line = sm.OLS(adj_counts, adj_dos_mat).fit()
    abline_plot(const, fitted_line.params[0], color='k', ax=ax)
    test = sm.OLS(c, cov_mat_t).fit()
    if test.params[gr.rsID] > 0:
        annot_y = - 1
    else:
        annot_y = 1
    yrange = yticks[-1] - yticks[0]
    ax.text(xticks[0] + 0.025, yticks[annot_y] + annot_y / 2 * yrange / 5,
            '$R^{2}$=%s' % str(test.rsquared)[0:4],
            style='italic')
    ax.set_ylabel('$log_{2}$ CPM')
    ax.set_xlabel('Fitted Dosages')
    if title:
        ax.set_title('%s partial regression\non %s' % (title, gr.rsID))
    else:
        pass
    if ax_orig:
        return(ax, test)
    else:
        return(fig, test)
示例#18
0
 def test_abline_ab(self):
     mod = self.mod
     intercept, slope = mod.params
     fig = abline_plot(intercept=intercept, slope=slope)
     plt.close(fig)
示例#19
0
 def test_abline_model(self):
     fig = abline_plot(model_results=self.mod)
     ax = fig.axes[0]
     ax.scatter(self.X[:,1], self.y)
     plt.close(fig)
示例#20
0
col_y = 'perMnd' #'mio' #'fasteUdg' #'perMnd'
#ax=df.plot(x='m2',y='perMnd', kind='scatter',s=1.0) #0.5)
ax=df.plot(x=col_x,y=col_y, kind='scatter',s=1.0) #0.5)

#155=16.000, 200=17.700
#45m2 = 1700, 38kr/m2. 26m2 for 1000 kr mere.

if 1:
  #X2 = sm.add_constant( df['m2'] )
  X2 = sm.add_constant( df[col_x] )
  mdl = sm.OLS(endog = df[col_y], exog = X2 )
  result=mdl.fit()
  print(result.summary())
  pprint.pprint(result.params)

  abline_plot(model_results=result, ax=ax)

#plt.xlim(100,350) #600)
=======
filename = 'C:\\Users\\jg.STATUSDK\\Downloads\\husudg - Sheet1.tsv'


df = pd.read_csv(
    filename,
    sep='\t',
    #nrows=2
    #chunksize=2,
    #iterator=True,
    )

#df = df.head(167)
示例#21
0
 def test_abline_model(self):
     fig = abline_plot(model_results=self.mod)
     ax = fig.axes[0]
     ax.scatter(self.X[:, 1], self.y)
     plt.close(fig)
示例#22
0
 def test_abline_ab(self):
     mod = self.mod
     intercept, slope = mod.params
     fig = abline_plot(intercept=intercept, slope=slope)
     plt.close(fig)
示例#23
0
# ii)
# start y = 39 steigung aber negativ = 0.158

# iii)
fit.conf_int()
#                     0          1
# const       38.525212  41.346510
# horsepower  -0.170517  -0.145172

# iv)
fit.rsquared

# c)
ax = df.plot(kind="scatter", x="horsepower", y="mpg")
abline_plot(model_results=fit, ax=ax, color="orange", linewidth=3)

# Aufgabe 11.2
# a)
df = pd.read_csv(
    r"C:\Users\freya\OneDrive\HSLU\6. Semester 2020FS\STAT\SW11\Übungen\Boston.csv",
    index_col=0)
df.head()
df.columns

# b)
# i)
# medv = bo + b1 * lstat

# ii)
y = df["medv"]
示例#24
0
def plot_dosage_by_rsID(gene_reference,
                        dos,
                        cov_mat,
                        counts,
                        title=None,
                        ax=None,
                        additional_covar=None,
                        adjx=True,
                        adjy=True):
    """
    Arguments:
    ---------
    gene_reference - a gene reference object
    meqtl - a list of matrix-eQTL objects, one for each chromosome
    cov_mat - covariate matrix
    counts - counts
    additional_covar - a matrix of same rows as cov_mat to add to the model
    """
    gr = gene_reference
    cov_mat_t = cov_mat.copy(deep=True)
    try:
        geno = dos.ix[gr.rsID, cov_mat_t.index]
    except pd.core.indexing.IndexingError:
        geno = dos.ix[cov_mat_t.index]
    cov_mat_t[gr.rsID] = geno
    cov_mat_t = cov_mat_t.ix[cov_mat_t[gr.rsID].notnull(), :]
    geno = geno[geno.notnull()]
    c = counts.ix[gr.gene, cov_mat_t.index]
    cov_mat = cov_mat.ix[cov_mat_t.index, :]
    if adjx:
        results = sm.OLS(geno, cov_mat).fit()
        adj_dos_mat = geno -\
            np.dot(results.params, cov_mat.T)
    else:
        adj_dos_mat = geno
    if adjy:
        results = sm.OLS(c, cov_mat).fit()
        adj_counts = c - np.dot(results.params, cov_mat.T)
        const = results.params.const
    else:
        adj_counts = c
        const = 0
    # Need to grab original genotypes
    colors = []
    # Make this into a function
    color_dict = np.linspace(0, 1, 3)
    for i in geno:
        if i <= 0.5: colors.append(color_dict[0])
        elif i > 0.5 and i <= 1.5: colors.append(color_dict[1])
        else: colors.append(color_dict[2])
    if ax:
        ax_orig = True
    else:
        ax_orig = None
        fig, ax = plt.subplots(nrows=1,
                               ncols=1,
                               sharey=False,
                               sharex=False,
                               subplot_kw=dict(axisbg='#FFFFFF'))
    ax.scatter(adj_dos_mat, adj_counts + const, s=50, c=colors)
    xticks = ax.get_xticks()
    yticks = ax.get_yticks()
    ax.set_xticks(xticks[1::2])
    ax.set_yticks(yticks[1::2])
    fitted_line = sm.OLS(adj_counts, adj_dos_mat).fit()
    abline_plot(const, fitted_line.params[0], color='k', ax=ax)
    test = sm.OLS(c, cov_mat_t).fit()
    if test.params[gr.rsID] > 0:
        r2_text_pos = yticks[-1] - (yticks[-1] - yticks[-2]) / 5
    else:
        r2_text_pos = yticks[0] + (yticks[1] - yticks[0]) / 5
    ymin_, ymax_ = ax.get_ylim()
    if r2_text_pos < ax.get_ylim()[0]:
        r2_text_pos = ymin_ + (ymax_ - ymin_) / 12
    elif r2_text_pos > ax.get_ylim()[0]:
        r2_text_pos = ymax_ - (ymax_ - ymin_) / 12
    ax.text(xticks[0] + 0.025,
            r2_text_pos,
            '$r^{2}$=%s' % str(test.rsquared)[0:4],
            style='italic')
    ax.set_ylabel('$log_{2}$ CPM')
    ax.set_xlabel('Fitted Dosages')
    if title:
        ax.set_title('%s partial regression\non %s' % (title, gr.rsID))
    else:
        pass
    if ax_orig:
        return (ax, test)
    else:
        return (fig, test)
示例#25
0
    # OLS
    X = sm.add_constant(x)
    model = sm.OLS(y, X)
    results = model.fit()

    # Main
    sc = ax.scatter(x=x, y=y, fc='#3182bdcc', ec='#3182bd', s=8, zorder=5)
    ax.set_ylabel('Fertility rate')
    ax.set_xlabel(r'Log$_2$(FPKM+1)')
    ax.grid()

    ax.text(x=9,
            y=0.5,
            s=r"$R^2={rsquared:.2f}$".format(rsquared=results.rsquared))
    abline_plot(model_results=results, color='#d62728', ax=ax,
                zorder=6)  # regression line

    # Top
    xw = np.ones(shape=len(x)) / len(x)
    axt.hist(x, bins=22, color='#ff9896', weights=xw, zorder=5)
    #axt.axes.get_xaxis().set_visible(False)
    axt.set_xticklabels([])
    axt.yaxis.set_major_formatter(mtick.PercentFormatter(1, decimals=0))
    axt.set_ylabel('%')
    axt.grid()

    # Bottom
    yw = np.ones(shape=len(y)) / len(y)
    axb.hist(y,
             bins=22,
             color='#aec7e8',
 def test_abline_model_ax(self, close_figures):
     fig = plt.figure()
     ax = fig.add_subplot(111)
     ax.scatter(self.X[:,1], self.y)
     fig = abline_plot(model_results=self.mod, ax=ax)
     close_or_save(pdf, fig)
示例#27
0
 def test_abline_model_ax(self):
     fig = plt.figure()
     ax = fig.add_subplot(111)
     ax.scatter(self.X[:,1], self.y)
     fig = abline_plot(model_results=self.mod, ax=ax)
     plt.close(fig)
 def test_abline_ab(self, close_figures):
     mod = self.mod
     intercept, slope = mod.params
     fig = abline_plot(intercept=intercept, slope=slope)
     close_or_save(pdf, fig)
 def test_abline_model(self, close_figures):
     fig = abline_plot(model_results=self.mod)
     ax = fig.axes[0]
     ax.scatter(self.X[:,1], self.y)
     close_or_save(pdf, fig)
示例#30
0
    return df


if __name__ == "__main__":
    training_file = "green_tripdata_2017-01.csv"
    testing_file = "green_tripdata_2017-06.csv"

    df_train = preprocess(training_file)
    lm = sm.OLS(df_train['fare_amount'], df_train.drop('fare_amount',
                                                       axis=1)).fit()

    y_train = lm.predict(df_train.drop('fare_amount', axis=1))
    rtrain = pearsonr(y_train, df_train['fare_amount'])

    df_test = preprocess(testing_file)
    y_test = lm.predict(df_test.drop('fare_amount', axis=1))
    rtest = pearsonr(y_test, df_test['fare_amount'])

    print(lm.summary())
    print("rtrain: {:.4f} , rtest: {:.4f}".format(rtrain[0], rtest[0]))

    # scatter-plot data
    ax = df_train.plot(x='trip_distance', y='fare_amount', kind='scatter', s=1)
    ax.set_ylim(0, 250)
    ax.set_xlim(0, 80)

    # plot regression line
    abline_plot(model_results=lm, ax=ax, markersize=1)

    plt.show()
示例#31
0
]].fillna(0)

# Dependent variable.
Y = high_thc["cannabinoid_d9_thca_percent"].fillna(0)

# Fit a regression model.
X = sm.add_constant(X)
model = sm.OLS(Y, X)
regression_results = model.fit()
print(regression_results.summary())

# Plot the regression
ax = high_thc.plot(x='cannabinoid_cbda_percent',
                   y='cannabinoid_d9_thca_percent',
                   kind='scatter')
abline_plot(model_results=regression_results, ax=ax)

#-----------------------------------------------------------------------------#
# Trend an analyte (butane) over time.
# https://stackoverflow.com/questions/36410075/select-rows-from-a-dataframe-based-on-multiple-values-in-a-column-in-pandas
# https://stackoverflow.com/questions/17706109/summing-the-number-of-occurrences-per-day-pandas
# https://apps.leg.wa.gov/wac/default.aspx?cite=314-55-102
# https://stackoverflow.com/questions/10998621/rotate-axis-text-in-python-matplotlib
# https://stackoverflow.com/questions/7917107/add-footnote-under-the-x-axis-using-matplotlib
#-----------------------------------------------------------------------------#
concentrate_types = [
    "hydrocarbon_concentrate",
    "concentrate_for_inhalation",
    "non-solvent_based_concentrate",
    "co2_concentrate",
    "food_grade_solvent_concentrate",
// Horsepower
//////////
""")

regY = data.get('mpg').get_values()

model = sm.OLS(regY, sm.add_constant(data['horsepower']))
results = model.fit()
p = results.params
print(results.summary())

# scatter-plot data
ax = data.plot(x='horsepower', y='mpg', kind='scatter')

# plot regression line
abline_plot(model_results=results, ax=ax)

#plt.show()

print("""
//////////
// All features
//////////
""")
df = data[values]

model = sm.OLS(regY, sm.add_constant(df))
results = model.fit()
print(results.summary())

#plot_data()