def train(Theta, X_train, Y_train, learning_rate, noOfIter):
    train_size = X_train.shape[0]

    z = np.dot(X_train, Theta)
    h = Sigmoid.sigmoid(z)

    J = CostFunction.cost(h, Y_train, train_size)
    Jarr = []
    Jarr = np.hstack((Jarr, J))

    for i in range(noOfIter):
        dTheta = 1 / train_size * np.dot((h - Y_train).T, X_train).T
        #print(dTheta.shape)
        Theta = Theta - learning_rate * dTheta
        z = np.dot(X_train, Theta)
        h = Sigmoid.sigmoid(z)
        J = CostFunction.cost(h, Y_train, train_size)
        Jarr = np.hstack((Jarr, J))
        print(J)

    plotGraph = False
    if plotGraph:
        PlotData.plotGraph(Jarr, noOfIter)

    return Theta, J
Exemplo n.º 2
0
def main():

    if '-h' in sys.argv:
        print_usage_message()
        exit()

    vectorFileName = fn.create_prof_vect_name(sys.argv)
    corrFileName = fn.create_correlations_name(sys.argv)

    if not os.path.exists(corrFileName):
        tokenVects = read.word_vects(vectorFileName)
        if tokenVects is None:
            print("Specified vector file not found.")
            print("To create vectors use 'createProfVectors.py'")
            exit()
        ratingVect = read.overall_rating_vect(vectorFileName)
        vocabVect = read.vocab_from_vect_file(vectorFileName)
        corrTups = stat.find_correlations(tokenVects, ratingVect, vocabVect)
        write.token_correlations(corrTups, corrFileName)
    else:
        corrTups = read.token_correlations(corrFileName)

    corrPlotFileName = None
    if '-save' in sys.argv:
        corrPlotFileName = fn.create_correlations_plot_name(sys.argv)

    # Plot correlations
    plot.tuple_pair_score_correlation(
        corrTups,
        title=plot.create_token_pair_score_correlation_name(sys.argv),
        saveFile=corrPlotFileName)
Exemplo n.º 3
0
    def getCountRateHist(self):
        '''Plots a histogram of the count rate.  The number of bins is 
        for the histogram, and the sample length is how long the count rate
        is averaged over (equivalent to "sample length" for the count rate
        vs. time graph.'''

        #check if data has been imported. if not, give a warning
        if self.dataSet:

            #ask for number of bins for histogram
            labelText = "Enter the number of bins"
            numBins = tksd.askinteger("Count Rate Histogram", labelText, parent=self.root, minvalue=1)
            if not numBins:
                return

            #ask for length of sample to calculate count rate
            labelText = "Enter the sample length (seconds)"
            sampleLength = tksd.askfloat("Sample Length", labelText, parent=self.root, minvalue=0)
            if not sampleLength:
                return

            #plot histogram in matplotlib
            pd.plotHistOfCountRates(self.dataSet, sampleLength, numBins)

        else:
            self.showImportAlert()
Exemplo n.º 4
0
def CompareDataPerformance(df, names, ptitle="", ModelIndex=0, Epochs=10):
    print("Run for: ", ptitle)
    Train, Test = pf.SplitData(df)
    XTrain, YTrain = pf.TrainandTarget(Train, names)
    XTest, YTest = pf.TrainandTarget(Test, names)
    p_model, p_history = tam.BuildTrainSeqModel(ModelIndex, Epochs, XTrain,
                                                YTrain, XTest, YTest)
    plots = [
        pld.PlotPerformance(p_history.history, ptitle + str(ModelIndex)),
        pld.ROCCurve(XTest, YTest, p_model, ptitle + str(ModelIndex))
    ]
    pld.SavePlots(plots, ptitle + "MI" + str(ModelIndex) + ".pdf")
Exemplo n.º 5
0
def capital_gains_lift(source):
    """
    Computes capital gains lift in top income percentages over time chart
    """
    dataset = gds.getDataSetUsingCSV(source)
    columns = (
        ("Top 10% income share-including capital gains",
         "Top 10% income share"),
        ("Top 5% income share-including capital gains", "Top 5% income share"),
        ("Top 1% income share-including capital gains", "Top 1% income share"),
        ("Top 0.5% income share-including capital gains",
         "Top 0.5% income share"),
        ("Top 0.1% income share-including capital gains",
         "Top 0.1% income share"),
        ("Top 0.05% income share-including capital gains",
         "Top 0.05% income share"),
    )
    source = list(dataset)
    series = [
        md.delta(gds.timeseries(source, a), gds.timeseries(source, b))
        for a, b in columns
    ]
    return pld.linechart(series,
                         labels=list(col[1] for col in columns),
                         title="U.S. Capital Gains Income Lift",
                         ylabel="Percentage Difference")
Exemplo n.º 6
0
    def showThresholdGraph(self):
        '''Shows a graph of the .wav amplitudes so that the user can figure out a good threshold'''
        #pass
        #import the first 5 seconds of the .wav file

        #prompt for file name
        filename = tkfd.askopenfilename(title="Pick your .wav file", initialdir=".", parent=self.root)
        if not filename:
            return

        rate, wavData = ad.DataSet.readWaveFile(filename)
        #get the first 5 seconds of data
        numEntriesFor5Sec = rate * 5;
        first5Sec = wavData[1:numEntriesFor5Sec];
        
        #plot data
        pd.plot(first5Sec, "Calibrate the Threshold!")
Exemplo n.º 7
0
    def getIntervals(self):
        '''Plots a histogram of the time interval length between consecutive
        counts.'''

        #check if data has been imported. if not, give a warning
        if self.dataSet:

            #ask for the number of bins for the histogram
            labelText = "Enter the desired number of bins in the histogram"
            numBins = tksd.askinteger("Intervals Histogram", labelText, parent=self.root, minvalue=1)
            if not numBins:
                return

            #plot the histogram in matplotlib
            pd.plotHistOfIntervals(self.dataSet, numBins)

        else:
            self.showImportAlert()
Exemplo n.º 8
0
    def getCountRate(self):
        '''Plots the count rate as a function of time. The rates are calculated
        for each "bin" (i.e. independent sample)'''

        #Check if data has been imported. if not, give a warning
        if self.dataSet:

            #prompt for desired bin spacing
            labelText = "Enter the desired sample length in seconds"
            binSpacing = tksd.askfloat("Count Rate", labelText, parent=self.root, minvalue=0, initialvalue=1)
            if not binSpacing:
                return

            #plot the count rate in matplotlib
            pd.plotCountRate(self.dataSet, binSpacing)
            
        else:
            #say that you need to import data first
            self.showImportAlert()
Exemplo n.º 9
0
def main():

    if '-h' in sys.argv:
        print_usage_message()
        exit()

    stmr = None
    stopwords = None
    if '-ss' in sys.argv:
        stmr = LancasterStemmer()
        stopwords = read.stopwords(stmr)

    countNames = fn.create_token_count_names(sys.argv)
    rawTokenCountName = countNames[0]
    revTokenCountName = countNames[1]
    profTokenCountName = countNames[2]

    rawTokens = read.token_count(rawTokenCountName, True)
    revTokens = read.token_count(revTokenCountName, True)
    profTokens = read.token_count(profTokenCountName, True)

    if rawTokens == None or revTokens == None or profTokens == None:
        profTokenDict = grab_prof_token_dict(stopwords, stmr)

        if rawTokens == None:
            rawTokens = grab_token_count(profTokenDict, count.num_tokens,
                                         rawTokenCountName)
        if revTokens == None:
            revTokens = grab_token_count(profTokenDict,
                                         count.num_reviews_with_token,
                                         revTokenCountName)

        if profTokens == None:
            profTokens = grab_token_count(profTokenDict,
                                          count.num_profs_with_token,
                                          profTokenCountName)

    plotName = create_plot_name()
    plotFileName = None
    if '-save' in sys.argv:
        plotFileName = fn.create_count_plot_name(sys.argv)

    plot.token_counts(rawTokens, revTokens, profTokens, plotFileName, plotName)
Exemplo n.º 10
0
 def _Flip_X_Axis_fired(self):
     global files_selected
     for ifile in files_selected.file_list:
         this_plot = jpl.Plotting(plot_info={'save_file': ifile})
         if not os.path.isfile(this_plot.PickleFile):
             print('FNF:', this_plot.PickleFile)
             pass
         this_plot.LoadPickle(DefWipe=False)
         this_plot.Flip_X_Axis()
         this_plot.PlotAll()
         this_plot.close_fig()
Exemplo n.º 11
0
def percent_income_share(source):
    """Create Income Share chart"""
    columns = (
        "Top 10% income share",
        "Top 5% income share",
        "Top 1% income share",
        "Top 0.5% income share",
        "Top 0.1% income share",
    )
    source = list(gds.getDataSetUsingCSV(source))
    return pld.linechart([gds.timeseries(source, col) for col in columns],
                         labels=columns,
                         title="U.S. Percentage Income Share",
                         ylabel="Percentage")
Exemplo n.º 12
0
def mean_normalized_percent_income_share(source):
    dataset = gds.getDataSetUsingCSV(source)
    columns = (
        "Top 10% income share",
        "Top 5% income share",
        "Top 1% income share",
        "Top 0.5% income share",
        "Top 0.1% income share",
    )
    source = list(dataset)
    return pld.linechart(
        [md.normalize(gds.timeseries(source, col)) for col in columns],
        labels=columns,
        title="Mean Normalized U.S. Percentage Income Share",
        ylabel="Percentage")
Exemplo n.º 13
0
def UpdateFileList(plot_info, rc_params, file_list, window_size=None):
    for ifile in file_list:
        this_plot = jpl.Plotting(plot_info={'save_file': ifile})
        if not os.path.isfile(this_plot.PickleFile):
            print('FNF:', this_plot.PickleFile)
            pass
        else:
            print('Updating:', this_plot.PickleFile)
        if window_size is not None:
            this_plot.LoadPickle(DefWipe=False, ForceWindowSize=window_size)
        else:
            this_plot.LoadPickle(DefWipe=False)

        this_plot.UpdateInfo(plot_info)
        pl.rcParams.update(rc_params)
        this_plot.PlotAll()
        this_plot.close_fig()
Exemplo n.º 14
0
def income_composition(source):
    """
    Compares income composition
    """
    dataset = gds.getDataSetUsingCSV(source)
    columns = (
        "Top 10% income composition-Wages, salaries and pensions",
        "Top 10% income composition-Dividends",
        "Top 10% income composition-Interest Income",
        "Top 10% income composition-Rents",
        "Top 10% income composition-Entrepreneurial income",
    )
    source = list(dataset)
    labels = ("Salary", "Dividends", "Interest", "Rent", "Business")
    return pld.stackedarea([gds.timeseries(source, col) for col in columns],
                           labels=labels,
                           title="U.S. Top 10% Income Composition",
                           ylabel="Percentage")
Exemplo n.º 15
0
def average_incomes(source):
    """
    Compares percentage average incomes
    """
    dataset = gds.getDataSetUsingCSV(source)
    columns = (
        "Top 10% average income",
        "Top 5% average income",
        "Top 1% average income",
        "Top 0.5% average income",
        "Top 0.1% average income",
        "Top 0.05% average income",
    )
    source = list(dataset)
    return pld.linechart([gds.timeseries(source, col) for col in columns],
                         labels=columns,
                         title="U.S. Average Income",
                         ylabel="2008 US Dollars")
Exemplo n.º 16
0
def average_top_income_lift(source):
    """
    Compares top percentage avg income over total avg
    """
    dataset = gds.getDataSetUsingCSV(source)
    columns = (
        ("Top 10% average income", "Top 0.1% average income"),
        ("Top 5% average income", "Top 0.1% average income"),
        ("Top 1% average income", "Top 0.1% average income"),
        ("Top 0.5% average income", "Top 0.1% average income"),
        ("Top 0.1% average income", "Top 0.1% average income"),
    )
    source = list(dataset)
    series = [
        md.delta(gds.timeseries(source, a), gds.timeseries(source, b))
        for a, b in columns
    ]
    return pld.linechart(series,
                         labels=list(col[0] for col in columns),
                         title="U.S. Income Disparity",
                         ylabel="2008 US Dollars")
Exemplo n.º 17
0
"""局部加权线性回归"""
import numpy as np
import PlotData as PD
import regression_lwlr as lw
from plot_lwlr import plot_lwlr
from hold_out import hold_out3
rng=np.random.RandomState(0)
X=10*rng.rand(120)
def model(x):
    y=2*x-5+rng.randn(120)+1.8*np.sin(3*x)
    return y
y=model(X)#随机产生120个数据
print(PD.plot1(X,y))#初步数据可视化
X1=X.copy()
y1=y.copy()
x1_test=X1[80:]#此处数据是作为数据可视化用的,数据可视化要用一维数组
y1_test=y1[80:]
x1_train=X1[:80]
y1_train=y1[:80]
#print(ya)
X=X.reshape(-1,1)
m=len(y)#获取原特征矩阵的行数
ones=np.ones(m).reshape(-1,1)
X=np.hstack([ones,X])#特征矩阵中合并一个x0矩阵,x0初始为1
print(X)#输出特征数组
y=y.reshape(-1,1)
"""对数据进行分割,暂时分三分之二为训练集,三分之一为测试集,设定多个k值,通过数据可视化查看拟合情况然后选取最后k值
   进行留出集验证评估模型各性能指标"""
X_train=X[:80,:]#训练集和测试集分割
y_train=y[:80,:]
X_test=X[80:,:]
Exemplo n.º 18
0
def graphdata(dataframe):
	datasignal, wsignal = pf.ProcessFrame(dataframe, dataframe.Label, 1)
	databack, wback = pf.ProcessFrame(dataframe, dataframe.Label, 0)
	figs = [pld.PlotData([datasignal, databack], feat, [wsignal, wback]) for feat in pf.GetVariables(datasignal)]
	pld.SavePlots(figs)
	pld.ClosePlots(figs)
import EpochVideo
import PlotData

# Code initialization 
if __name__ == '__main__':
    # Parameters
    main_path = r'C:\Users\HeLab\Documents\Ruijia\Project\EyeTracking\Data\v1\624-1339450-OKR'
    
    info_dir = 'OUT_INFO'
    info_path = os.path.join(main_path, info_dir)
    
    video_dir = 'IN_VIDEO'
    video_name = 'video_2019-11-25-15-55-02.h264'
    video_path = os.path.join(main_path, video_dir, video_name)
    
    data_dir = 'OUT_VIDEO'
    data_file = 'RUN1_2020-01-19-15-16'
    data_path = os.path.join(main_path, data_dir , data_file)
     
    result_dir = 'RESULT'
    result_path = os.path.join(main_path, result_dir, data_file)
    
    # Script execution
    print('\n-------Processing data-------')
    ProcessData.main(main_path, info_path, data_path, result_path)
    print('\n-------Epoching data-------')
    EpochData.main(main_path, info_path, data_path, result_path)
    print('\n-------Plot data-------')
    PlotData.main(main_path, info_path, data_path, result_path)
    print('\n-------Epoching video-------')
    EpochVideo.main(main_path, info_path, video_path, data_path, result_path) 
Exemplo n.º 20
0

def make_data(N, err=1.0, rseed=1):
    """随机抽样数据"""
    rng = np.random.RandomState(rseed)
    X = rng.rand(N, 1)**2
    y = 10 - 1. / (X.ravel() + 0.1)
    if err > 0:
        y += err * rng.randn(N)
    return X, y


X, y = make_data(40)  #产生四十个样本
X1 = X.copy()
#print(X)
print(PD.plot1(X, y))  #对数据进行初步可视化,方便选择x次方的,还是x开方的函数
y = y.reshape(-1, 1)
"""对degree进行选择,一连串的开方或次方函数用交叉验证得到不同的训练集和验证集的代价函数,通过比较得到较优的degree,在进行模型性能评估"""
degree = np.arange(1, 7)
vc.linear(X, y, degree, 5)  #交叉验证,最后一个参数为所几折交叉验证
X = vc.Degree2(4, X)  #得到个代价函数后,根据初步可视化选好的模型,这里选择开方函数,若有合适的,可选择次方函数Degree
theta, J = gd.grad(X, y, 0.003, 1500)  #生成theta和代价函数J
#可视化预测曲线
x_test1 = np.linspace(0, 1, 300).reshape(-1, 1)
x_test2 = vc.Degree2(4, x_test1)
y_pre1 = np.dot(x_test2, theta)  #通过theta,x,得到y矩阵
print("可视化拟合效果")
print(PD.plot2(X1, y, x_test1, y_pre1))  #可视化拟合效果
y_pre = np.dot(X, theta)
#用较好的模型进行性能评估
MAE = MAE(y, y_pre)  #调用MAE函数
Exemplo n.º 21
0
\hoffset -1.5cm
\headsep 1.5cm
\parindent 1.2em
\baselineskip 16pt plus 2pt minus 2pt
\begin{document}
\tiny
'''

this_info = pa.Series()
mat_graph_folder = data_dir + 'MatHack/'
mkdir_p(mat_graph_folder)
this_info['save_file'] = mat_graph_folder + 'FitrComp.pdf'
this_info['title'] = r'fitr comp'
this_info['xlabel'] = r'$\sqrt{8t}/\sqrt{8t_{0}}$'
this_info['ylabel'] = r'ratio'
data_plot = jpl.Plotting(plot_info=this_info)
table_out = {}
for iens, ifile in zip(master_ens_list, this_filelist):
    for iblock in block_flags:
        this_file = ifile.replace('.py3p', iblock + '.py3p')
        if os.path.isfile(this_file):
            print('Reading: ', this_file)
            with open(this_file, 'rb') as f:
                fit_data, dump = pik.load(f)
            data_plot = fit_data.PlotVaryFitr(data_plot)
            fit_data.SortChi()
            table_out[fit_data.name] = fit_data.Get_Formatted_Table(
                fmt_latex=True).to_latex(escape=False).replace(
                    '{}', fit_data.name.replace('_', r'\_'))
        else:
            print('FNF: ', this_file)
Exemplo n.º 22
0
# Script utlizzato per laciare i modelli
# Autore: Cristian Di Pietrantonio


# import necessari
from pymodelica import compile_fmu
from pyfmi import load_fmu

import sys
sys.path.append("../../../Utilities")
import os
import PlotData as plt
import numpy as N


model_name="ClosedSystem"

#sono caricati tutti i file ".mo" nella cartella
model_files = [f for f in os.listdir(".") if ".mo" in f]

#input_object = (['noise', 'failures'], input_function)

model_comp = compile_fmu(model_name, model_files)
model = load_fmu(model_comp)
maxt = 1000
res = model.simulate(start_time=0, final_time=maxt, options={'ncp':1000})

plt.plotData(2, 2, ['s.x', 's.pOpen', 'm.y', 's.riverLoad'], res)
Exemplo n.º 23
0
def TestAuto():
    '''
    testing function for standard autocorrelation analysis
    '''
    def thisFun(*x):
        return x[0]
    def thisDer(*x):
        return [1]

    const = 100
    this_size = 20000
    values = np.random.uniform(size=this_size)

    values2 = np.arange(this_size)/this_size

    values3 = np.random.normal(loc=0.5,scale=0.25,size=this_size)
    val_df = pa.DataFrame()

    # tuple_list = []
    # for ii in range(100):
    #     tuple_list.append(('-1-',ii))
    # for ii in range(400):
    #     tuple_list.append(('-2-',ii))
    # for ii in range(1000):
    #     tuple_list.append(('-3-',ii))
    # for ii in range(500):
    #     tuple_list.append(('-4-',ii))

    tuple_list = []
    for ii in range(this_size//2):
        tuple_list.append(('-1-',ii))
    for ii in range(this_size//2):
        tuple_list.append(('-2-',ii))
    #
    # tuple_list = []
    # for ii in range(this_size):
    #     tuple_list.append(('-1-',ii))

    indicies = pa.MultiIndex.from_tuples(tuple_list,names=['stream','configs'])
    # indicies = range(this_size)
    # val_df = pa.DataFrame()
    # val_df['one'] = pa.Series(values,index=indicies)
    # val_df['two'] = pa.Series(values2,index=indicies)
    # val_df['three'] = pa.Series(values3,index=indicies)
    # def RatFun(one,two,three):
    #     return const*one/(two*three)
    #
    # def RatFunDer(one,two,three):
    #     return [const/(two*three),-const*one/(three*two**2),-const*one/(two*three**2)]

    val_df = pa.DataFrame()
    val_df['one'] = pa.Series(values,index=indicies)
    val_df['two'] = pa.Series(values2,index=indicies)
    val_df['three'] = pa.Series(values3,index=indicies)
    def RatFun(one,two):
        return const*one*two

    def RatFunDer(one,two):
        return [const*two,const*one]

    testdata = AutoCorrelate(Fun=[thisFun,thisDer],name='test_bootstrap_uniform',data=val_df[['one']])
    testdata2 = AutoCorrelate(Fun=[thisFun,thisDer],name='test_bootstrap_arange',data=val_df[['two']])
    testdata3 = AutoCorrelate(Fun=[thisFun,thisDer],name='test_bootstrap_normal',data=val_df[['three']])
    testdatarat = AutoCorrelate(Fun=[RatFun,RatFunDer],name='test_auto_ratio',data=val_df[['one','two']])

    this_info = pa.Series()
    this_info['save_file'] = this_dir+'/TestGraphs/test_Wopt.pdf'
    this_info['title'] = 'Test Auto Graph'
    # this_info['xlims'] = [0,10]
    # this_info['ylims'] = [0,15]
    import PlotData as jpl
    data_plot = jpl.Plotting(plot_info=this_info)
    data_plot = testdata.PlotWopt(data_plot)
    data_plot = testdata2.PlotWopt(data_plot)
    data_plot = testdata3.PlotWopt(data_plot)
    data_plot = testdatarat.PlotWopt(data_plot)
    # data_plot.LoadPickle(DefWipe=False)
    data_plot.PrintData()
    data_plot.PlotAll()

    this_info = pa.Series()
    this_info['save_file'] = this_dir+'/TestGraphs/test_Auto.pdf'
    this_info['title'] = 'Test Auto Graph'
    # this_info['xlims'] = [0,10]
    # this_info['ylims'] = [0,15]
    import PlotData as jpl
    data_plot = jpl.Plotting(plot_info=this_info)
    data_plot = testdata.PlotTauInt(data_plot)
    data_plot = testdata2.PlotTauInt(data_plot)
    data_plot = testdata3.PlotTauInt(data_plot)
    data_plot = testdatarat.PlotTauInt(data_plot)
    # data_plot.LoadPickle(DefWipe=False)
    data_plot.PrintData()
    data_plot.PlotAll()

    return testdata,testdata2,testdata3,testdatarat
Exemplo n.º 24
0
# Major library imports
import numpy as np
import PlotData as jpl

# Enthought library imports
from enable.api import Component, ComponentEditor, ColorTrait
from traits.api import HasTraits, Instance, Float, String, List, Enum, Bool, Int
from traitsui.api import Item, HSplit, VGroup, View

# Chaco imports
from chaco.api import ArrayPlotData, HPlotContainer, Plot
from chaco.api import marker_trait
from chaco.tools.api import PanTool, ZoomTool

data_plot = jpl.TestChaco()
simple_col = ['blue', 'red', 'green', 'purple', 'gold', 'black']


def hex_to_rgb(value):
    if not isinstance(value, str):
        return value
    value = value.lstrip('#')
    lv = len(value)
    return tuple(
        float(int(value[i:i + lv // 3], 16)) / 255.
        for i in range(0, lv, lv // 3))


class BeginFrame(HasTraits):
    """
Exemplo n.º 25
0
data = np.loadtxt('ex1data1.txt', delimiter=",")  #下载数据文件,此数据文件数组为array数组
#print(data)#显示数据
X = data[:, 0:-1]  #提取特征变量数组
y = data[:, -1]  #提取标签数组,此处提取出来后会变一维
#print(X)
#print(y)
y = y.reshape(-1, 1)  #对数组变形,使维数与特征变量数组相同
#print(y_1)
#print(sum(X))
#调用自己的数组标准化库进行操作
h = ns.Nor()
#X=h.S_n_normalize(X,"standard")#数组标准化,数据符合正态分布
X = h.S_n_normalize(X, "normalize")  #数组归一化,数据都在零和一之间
print(X)
print("数据初步可视化")  #其过欠拟合分析主要是数据可视化看出来的
print(PD.plot1(X, y))  #对数据进行初步可视化,看其标准化效果
"""发现有异常值,调用自己的异常值处理库对异常值进行处理
g=dl.Err()
X=g.S_errvaldeal(X,'mean')
print((PD.plot1(X,y)))#进行进一步可视化看处理效果"""
X1 = X
m = X.shape[0]  #获取原特征矩阵的行数
n = X.shape[1]  #获取原特征矩阵的列数
ones = np.ones(m).reshape(-1, 1)
X = np.hstack([ones, X])  #特征矩阵中合并一个x0矩阵,x0初始为1
#print(X)
q = int(m * 0.3)
#print(q)
X_train = X[q:, :]  #选出样本中的百分之三十作验证集,其余为训练集
y_train = y[q:, :]  #运用切片完成
X_val = X[:q, :]
Exemplo n.º 26
0
# Script utlizzato per laciare i modelli
# Autore: Cristian Di Pietrantonio


# import necessari
from pymodelica import compile_fmu
from pyfmi import load_fmu

import sys
sys.path.append("../../../Utilities")
import os
import PlotData as plt
import numpy as N


model_name="ClosedSystem"

#sono caricati tutti i file ".mo" nella cartella
model_files = [f for f in os.listdir(".") if ".mo" in f]

#input_object = (['noise', 'failures'], input_function)

model_comp = compile_fmu(model_name, model_files)
model = load_fmu(model_comp)
maxt = 10
res = model.simulate(start_time=0, final_time=maxt)

plt.plotData(2, 2, ['process1.myState', 'process2.myState', 's.turn', 'm.y'], res)
Exemplo n.º 27
0
data = np.loadtxt('ex1data1.txt', delimiter=",")  #下载数据文件,此数据文件数组为array数组
#print(data)#显示数据
X = data[:, 0:-1]  #提取特征变量数组
y = data[:, -1]  #提取标签数组,此处提取出来后会变一维
#print(X)
#print(y)
y = y.reshape(-1, 1)  #对数组变形,使维数与特征变量数组相同
#print(y_1)
#print(sum(X))
#调用自己的数组标准化库进行操作
h = ns.Nor()
#X=h.S_n_normalize(X,"standard")#数组标准化,数据符合正态分布
X = h.S_n_normalize(X, "normalize")  #数组归一化,数据都在零和一之间
print(X)
print("数据初步可视化")
print(PD.plot1(X, y))  #对数据进行初步可视化,看其标准化效果

X1 = X.copy()
m = X.shape[0]  #获取原特征矩阵的行数
n = X.shape[1]  #获取原特征矩阵的列数
ones = np.ones(m).reshape(-1, 1)
X = np.hstack([ones, X])  #特征矩阵中合并一个x0矩阵,x0初始为1
#print(X)
theta = np.zeros((n + 1, 1))  #theta初始为行数n+1的零矩阵
theta = ht.hold_out2(X, y, 0.8, 5)  #留出集评估
print("一般情况下,J_train比较大,为过拟合,即高偏差情况,若J_test远大于J_train为欠拟合")
#theta=normalEqu(X,y)#选择较优值进行模型构建
#print('theta')
#print(theta)#显示theta

x_test1 = np.linspace(0, 1, 300).reshape(-1, 1)  #显示拟合曲线查看拟合效果
Exemplo n.º 28
0
 def ShowMacro(self):
     data = sd.load_obj('Macro')
     pd.PlotMacro(data['MoneySupply'], 'M2', '2016-1-1', '2017-11-1',
                  ['m2'])
Exemplo n.º 29
0
def main():

    if '-h' in sys.argv:
        print_usage_message()
        exit()

    vectFileName = fn.create_prof_vect_name(sys.argv, True)
    simMatFileName = fn.create_sim_mat_name(sys.argv)
    predsFileName = fn.create_preds_name(sys.argv)

    print(vectFileName)
    print(simMatFileName)
    print(predsFileName)

    # Grab the ratings vector
    if '-d' in sys.argv:
        ratings = read.difficulty_rating_vect(vectFileName)
    else:
        ratings = read.overall_rating_vect(vectFileName)

    # Assign similarity metric
    sim_f = vp.inverse_euclidean_distance
    if '-cos' in sys.argv:
        sim_f = vp.cosine_similarity
    elif '-pear' in sys.argv:
        sim_f = vp.abs_pearson_correlation

    # Set if weighted or not
    weighted = True
    if '-unweighted' in sys.argv:
        weighted = False

    # Grab predictions or create them if not available
    predictions = read.knn_predictions(predsFileName)
    if predictions is None:

        simMat = read.similarity_matrix(simMatFileName)
        if simMat is None:
            wordVects = read.word_vects(vectFileName)
            if wordVects is None:
                print("Vector file " + vectFileName + " does not exist")
                exit()
            wordVects = vp.process_token_vectors(wordVects, sys.argv)
            simMat = knn.get_similarity_matrix(wordVects, sim_f)
            write.similarity_matrix(simMat, simMatFileName)

        predictions = knn.knn_dataset(ratings, MaxK, simMat, weighted)
        write.knn_predictions(predictions, predsFileName)

    idxToPlot = None

    if '-maxK' in sys.argv:
        maxK = int(sys.argv[sys.argv.index('-maxK') + 1])
        predictions = predictions[:, :maxK]

    pidVect = read.pid_vect(vectFileName)
    singleRevIdxs = vp.pids_to_idxs(pidVect,
                                    read.pids_file(fn.PidsSingleRevFile))
    smallLenIdxs = vp.pids_to_idxs(pidVect,
                                   read.pids_file(fn.PidsSmallRevLenFile))

    plotFileName = None
    if '-save' in sys.argv:
        plotFileName = fn.create_knn_accuracy_plot_name(sys.argv)

    # Output results of the run
    plot.knn_error(
        predictions,
        ratings,
        title=plot.create_knn_error_title(sys.argv),
        idxToPlot=[singleRevIdxs, smallLenIdxs],
        subTitles=[
            "Error with profs with one review",
            "Error with profs with aggrigate review " +
            "lengths one std div above the mean " + "review length or less"
        ],
        saveFile=plotFileName)
Exemplo n.º 30
0
# In[]
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import PlotData
import scipy.optimize as op

# In[]
df = pd.read_csv("ex2data1.txt", names=['Exam 1', 'Exam 2', 'Admitted'])
x = df.iloc[:, 0:2].values
y = df.iloc[:, 2:3].values

# In[]
PlotData.plotData(x, y)
theta = np.zeros([1, 3])
ones = np.ones([x.shape[0], 1])
X = np.concatenate((ones, x), axis=1)


# In[]
def sigmoid(z):
    return 1 / (1 + np.exp(-z))


def cost(x, y, theta):
    a = y * np.log(sigmoid(x @ theta.T))
    b = (1 - y) * np.log(1 - sigmoid(x @ theta.T))
    return np.sum(-(a + b) / len(x))


def gradient(x, y, theta, alpha, iters):
Exemplo n.º 31
0
def main():

    if '-h' in sys.argv:
        print_usage_message()
        exit()

    profDicts = read.prof_dicts()

    revLens = st.rev_len_arr(profDicts)
    print("Number of Reviews:", revLens.shape[0])
    print("Mean review length:", revLens.mean())
    print("Std Dev review length:", revLens.std())
    print()

    numRevsProf = st.num_revs_profs(profDicts)
    print("Number of professors:", numRevsProf.shape[0])
    print("Mean num reviews per prof:", numRevsProf.mean())
    print("Std Dev num revews per prof:", numRevsProf.std())
    print()

    profRevLen = st.profs_revs_len(profDicts)
    print("Mean tokens per prof:", profRevLen.mean())
    print("Std Dev tokens per prof:", profRevLen.std())
    print()

    overRats = np.array([prof['rating_overall'] for prof in profDicts],
                        dtype=float)
    diffRats = np.array([prof['rating_difficulty'] for prof in profDicts],
                        dtype=float)

    overRatMean = overRats.mean()
    diffRatMean = diffRats.mean()

    print("Overall ratings mean:", overRatMean)
    print("Overall ratings std dev:", overRats.std())
    print("Difficulty ratings mean:", diffRatMean)
    print("Difficulty ratings std dev:", diffRats.std())
    print()

    overMeanDiff = overRats - overRatMean
    overMeanDiff = np.abs(overMeanDiff)
    diffMeanDiff = diffRats - diffRatMean
    diffMeanDiff = np.abs(diffMeanDiff)

    print("Nieve approach to prediction: Guessing the Mean")
    print("All profs")
    print("Overall absolute error mean:", overMeanDiff.mean())
    print("Overall absolute error std div:", overMeanDiff.std())
    print("Difficulty absolute error mean:", diffMeanDiff.mean())
    print("Difficulty absolute error std div:", diffMeanDiff.std())
    print()

    oneRevPids = set(read.pids_file(fn.PidsSingleRevFile))
    oneOverRats = np.array([
        prof['rating_overall']
        for prof in profDicts if prof['pid'] in oneRevPids
    ],
                           dtype=float)
    oneOverDiff = np.abs(oneOverRats - oneOverRats.mean())

    print("Profs with one review")
    print("One review absolute error mean:", oneOverDiff.mean())
    print("One review absolute error std div:", oneOverDiff.std())
    print()

    smallRevPids = set(read.pids_file(fn.PidsSmallRevLenFile))
    smallOverRats = np.array([
        prof['rating_overall']
        for prof in profDicts if prof['pid'] in smallRevPids
    ],
                             dtype=float)
    smallOverDiff = np.abs(smallOverRats - smallOverRats.mean())

    print("Profs with short reviews")
    print("Small review absolute error mean:", smallOverDiff.mean())
    print("small review absolute error std div:", smallOverDiff.std())
    print()

    save = False
    if '-save' in sys.argv:
        save = True

    plot.plot_word_review_count(revLens, profRevLen, numRevsProf, save=save)
Exemplo n.º 32
0
        logger.info("Getting metadata")
        nodeId, srcId = getRequestParameters("training")


        file = os.path.join(dir, "../run.sh")
        logger.info("creating training input file")
        gen.generate(args.reqId)

        logger.info("training begun")
        call([file,args.reqId, str(nodeId), srcId])

        file = os.path.join(dir, "../test.sh")
        logger.info("evaluation begun")
        call([file, args.reqId, str(nodeId), srcId, emergent_log_file])

        plt.setupTrainingOutput(args.reqId)
        logger.info("Storing output")
        dc.store_correlation(request_id=args.reqId)
        logger.info("Finished.")
        log_end_time("training")

    else:
        logger.info("Started prediction job")
        logger.info("Getting metadata")
        nodeId, srcId = getRequestParameters("prediction")

        file = os.path.join(dir, "../predict.sh")
        logger.info("creating prediction input file")
        genP.generate(args.reqId)
        logger.info("prediction begun")
        call([file, args.reqId, str(nodeId), str(srcId), emergent_log_file])
Exemplo n.º 33
0
# Script utlizzato per laciare i modelli
# Autore: Cristian Di Pietrantonio


# import necessari
from pymodelica import compile_fmu
from pyfmi import load_fmu

import sys
sys.path.append("../../../model_checking/dfs-cristian/PyLib")
import os
import PlotData as plt
import numpy as N


model_name="ClosedSystem"

#sono caricati tutti i file ".mo" nella cartella
model_files = [f for f in os.listdir(".") if ".mo" in f]

#input_object = (['noise', 'failures'], input_function)

model_comp = compile_fmu(model_name, model_files)
model = load_fmu(model_comp)
maxt = 1000
res = model.simulate(start_time=0, final_time=maxt, options={'ncp':1000})

plt.plotData(2, 2, ['ec.quantX', 's.pOpen', 'm.y', 's.riverLoad'], res)
Exemplo n.º 34
0
X2 = data[:, 1]  #一维数组
#print(X2)
#print(y)
y = y.reshape(-1, 1)  #对数组变形,使维数与特征变量数组相同,实际意义,房子的价钱
#print(y)
#print(sum(X))
#调用自己的数组标准化库进行操作
h = ns.Nor()
#X=h.S_n_normalize(X,"standard")#数组标准化,数据符合正态分布
X = h.S_n_normalize(X, "normalize")  #数组归一化,数据都在零和一之间
#y=h.S_n_normalize(y,"normalize")#数组归一化,数据都在零和一之间
print(X)
X1 = X[:, 0]  #一维数组,为房子的面积
X2 = X[:, 1]  #一维数组,为房子的高度
#print("数据初步可视化")
print(PD.plot3(X1, X2, y.ravel()))  #对数据进行初步可视化,看其标准化效果
X1 = X1.reshape(-1, 1)
X2 = X2.reshape(-1, 1)
X = X1 * X2  #房子的总体积
X = vc.Degree(4, X)  #多项式选择
m = len(y)
#ones=np.ones(m).reshape(-1,1)
#X=np.hstack([ones,X])#特征矩阵中合并一个x0矩阵,x0初始为1
#print(X)
"""对惩罚项运用交叉验证得出的代价函数值进行选择,选择出较好的惩罚值的模型,然后进行模型性能评估"""
reg_choose(X, y, numval=10)  #10折交叉验证得到不同惩罚值的代价函数表
print("从函数表可以看出,J值均比较大,可能为欠拟合")
theta = normalEqu_Reg(X, y, 0.0)  #选出较好的theta,此时惩罚值为零
x_test1 = np.linspace(0, 1, 300).reshape(-1, 1)  #显示拟合曲线查看拟合效果
x_test3 = x_test1 * x_test1
x_test3 = vc.Degree(4, x_test3)
Exemplo n.º 35
0
 def ShowFundamental(self, year, quarter, area=None, industry=None):
     data = sd.load_obj(str(year) + str(quarter) + 'Qfundamental')
     pd.PlotSingleFundamental(data['basic'], 'profit', area, industry)
     pd.PlotMultiFundamental(data['basic'], 'pe', 'rev', 'totals', 'profit',
                             area, industry)
Exemplo n.º 36
0
        logger.info("Started training job")
        logger.info("Getting metadata")
        nodeId, srcId = getRequestParameters("training")

        file = os.path.join(dir, "../run.sh")
        logger.info("creating training input file")
        gen.generate(args.reqId)

        logger.info("training begun")
        call([file, args.reqId, str(nodeId), srcId])

        file = os.path.join(dir, "../test.sh")
        logger.info("evaluation begun")
        call([file, args.reqId, str(nodeId), srcId, emergent_log_file])

        plt.setupTrainingOutput(args.reqId)
        logger.info("Storing output")
        dc.store_correlation(request_id=args.reqId)
        logger.info("Finished.")
        log_end_time("training")

    else:
        logger.info("Started prediction job")
        logger.info("Getting metadata")
        nodeId, srcId = getRequestParameters("prediction")

        file = os.path.join(dir, "../predict.sh")
        logger.info("creating prediction input file")
        genP.generate(args.reqId)
        logger.info("prediction begun")
        call([file, args.reqId, str(nodeId), str(srcId), emergent_log_file])
Exemplo n.º 37
0
 def ShowPrice(self, ticker):
     data = sd.load_obj(ticker + 'price')
     pd.PlotPrice(data['price'], ticker)
     pd.PlotTick(data['tick'], ticker)
Exemplo n.º 38
0
def main():

    if '-h' in sys.argv:
        print_usage_message()
        exit()

    vectorFileName = fn.create_prof_vect_name(sys.argv, True)
    tokenVects = read.word_vects(vectorFileName)
    if tokenVects is None:
        print("Could not find token vects")
        print("Use 'createProfVectors.py' to create vectors")
        exit()

    tokenVects = vp.process_token_vectors(tokenVects, sys.argv)

    if '-d' in sys.argv:
        ratings = read.difficulty_rating_vect(vectorFileName)
    else:
        ratings = read.overall_rating_vect(vectorFileName)

    # Create Training and validation sets
    pidVect = read.pid_vect(vectorFileName)

    nonSingleSmallIdxs = ffnn.non_single_small_idxs(pidVect)
    singleIdxs = vp.pids_to_idxs(pidVect, read.pids_file(fn.PidsSingleRevFile))
    smallIdxs = vp.pids_to_idxs(pidVect,
                                read.pids_file(fn.PidsSmallRevLenFile))
    singleSmallIdxs = list(set(singleIdxs).union(set(smallIdxs)))
    singleSmallIdxs.sort()
    singleSmallIdxs = np.array(singleSmallIdxs)

    trainingVects = tokenVects[nonSingleSmallIdxs, :]
    trainingRatings = ratings[nonSingleSmallIdxs]

    validVects = tokenVects[singleSmallIdxs, :]
    validRatings = ratings[singleSmallIdxs]

    print(trainingVects.shape, trainingRatings.shape, validVects.shape,
          validRatings.shape)
    """
  
   xTrain, xValid, yTrain, yValid = train_test_split(tokenVects, ratings,
                                                      test_size=0.3)
   """
    # Select and train model
    if '-deep' in sys.argv:
        model = ffnn.deep_model(tokenVects.shape[1])
    else:
        model = ffnn.shallow_model(tokenVects.shape[1])

    history = model.fit(trainingVects,
                        trainingRatings,
                        epochs=10,
                        batch_size=5,
                        validation_data=(validVects, validRatings))

    plotTitle = plot.ffnn_error_title(sys.argv)
    outfile = None
    if '-save' in sys.argv:
        outfile = fn.create_ffnn_plot_name(sys.argv)

    plot.ffnn_error(history, title=plotTitle, filename=outfile)