示例#1
0
文件: mvr.py 项目: AngeloK/cs584-hws
def select_mvar_model_from_cross_validation(datafile, max_degree, folder=10):
    data = read_data(datafile)
    minimum_error = np.inf
    minimum_training_error = np.inf
    global_mimimum_error = np.inf
    optimal_theta = 0
    global_optimal_theta = 0
    global_optimal_degree = 0
    testing_error_collection = {}
    training_error_collection = {}
    time_collection = {}
    for degree_index in range(1, max_degree+1):
        time_start = time.time()
        for testing_data_index in range(folder):
            testing_data, testing_size, training_data, training_size = cross_validation(data, testing_data_index)
            theta, training_error, testing_error = mvar_regression(testing_data, testing_size, training_data, training_size, degree_index)
            if testing_error < minimum_error:
                minimum_error = testing_error
                minimum_training_error = training_error
                optimal_theta = theta
        print "Min error: %s" %str(minimum_error)
        testing_error_collection[degree_index] = minimum_error
        training_error_collection[degree_index] = minimum_training_error
        if minimum_error < global_mimimum_error:
            global_mimimum_error = minimum_error
            global_optimal_theta = optimal_theta
            global_optimal_degree = degree_index
        time_end = time.time()
        time_cost = time_end - time_start
        time_collection[degree_index] = time_cost
    print "Global Optimal Theta: %s" % global_optimal_theta
    print "Global Optimal Degree: %s" % global_optimal_degree
    return testing_error_collection, training_error_collection, time_collection
示例#2
0
def run(N):
    """ Runs N episodes of a given length and then runs a demo with greedy policy
	"""
    agent = Agent()

    data = read_data('./data/q.dat')
    if data is not None:
        agent.Q = data

    for i in range(N):
        bot = Bot()
        run_episode(bot, agent, None, draw=False, policy='eps_greedy')
    # if bot.center[1] > 7: print "robot moved on: %i steps" % bot.center[1]

    pg.init()
    pg.display.init()
    surf = pg.display.set_mode((800, 600))
    surf.fill((0, 0, 0))
    pg.display.flip()
    print "Surf1:", surf

    bot = Bot()
    bot.info()
    run_episode(bot, agent, surf, draw=True, policy='eps_greedy', episode_len=60)
    print "Robot's moves:\n", bot.path
    print "Robot walked %i m" % bot.center[1]
    print "Last state value=%.1f" % agent.get_state_value(bot.get_state())
    write_data(agent.Q, "data/q.dat")
    write_path(agent.Q_values, "data/path.csv")
def convert(folder_name):
    os.chdir(folder_name)
    las_filename = ''
    try:
        las_filename = [x for x in os.listdir() if x[-3:] == 'las'][0]
    except:
        return

    if os.path.exists('failed.txt') or not os.path.exists(
            'cropped_size.txt'
    ):  # This probably indicated the crop failed. In this case, remove the las file so it doesn't take up space
        os.remove(las_filename)
        return

    out_filename = las_filename.split('.')[0] + '.data'
    with open('cropped_size.txt', 'r') as f:
        [desired_rows, desired_cols,
         channels] = [int(x) for x in f.readline().split(',')]
    success = convert_las_to_matrix_and_store(las_filename, desired_rows,
                                              desired_cols, out_filename)
    os.remove(las_filename)
    if success:
        # open up the file and count the 0's in the matrix
        # If there are too many, reject it
        m = read_data(out_filename)
        zeros = m.size - numpy.count_nonzero(m)
        if zeros < 125000:
            with open('pickled', 'w') as f:
                f.write('')  # This file indicates success to the pipeline
        else:
            with open('failed.txt', 'a') as f:
                f.write("Found " + str(zeros) + " zeros")
示例#4
0
def DrawPIVPlot(files, bg_a, points):
    # reading saved data and creating vector labels
    x, y, u, v, mask = tools.read_data(files)
    label = []
    for i in range(x.shape[0]):
        for j in range(x.shape[1]):
            label.append(f'Ux:{u[i, j]:6.4f} , Uy:{v[i, j]:6.4f} (mm/s)')
    # plotting the results
    #fig, ax = plt.subplots()
    #ax.imshow(np.flipud(bg_a), cmap='gray')
    #plt.draw()
    q = ax.quiver(x,
                  y,
                  u,
                  v,
                  color='b',
                  units='xy',
                  minlength=0.1,
                  minshaft=1.2)
    ax.set_title('Velocity Field', size=16)
    patches = [Polygon(points, True)]
    p = PatchCollection(patches, alpha=1.0)
    ax.add_collection(p)
    ax.axis([0, 780, 0, 580])
    plt.xlabel('x (mm)', size=14, labelpad=2)
    plt.ylabel('y (mm)', size=14, labelpad=-10)
    return fig, q, label
def initialize_clustering(data_path, train_percent, clustering_algorithm,
                          number_of_clusters):
    df_pre, df_raw = read_data(data_path=data_path)
    records = make_records(df_pre=df_pre, df_raw=df_raw)
    train_records, test_records = divide_train_test(
        records=records, train_percent=train_percent)
    return get_clusters(clustering_algorithm, train_records,
                        number_of_clusters)
示例#6
0
文件: run.py 项目: tsybulkin/dor
def run(nbr_episodes, mode):
	if mode == "forward": Q = read_data("data/forward.dat")
	elif mode == "right": Q = read_data("data/right.dat")
	elif mode == "left": Q = read_data("data/left.dat")
	else: 
		print "Unknown mode:", mode
		return None


	seed()
	
	for i in range(nbr_episodes):
		D = Dor()
		run_episodes(mode,episode_len,D,Q)
		print "Dor coordinates:",D.CoM, "orientation:%i%%" % degrees(D.orientation)

	for action in D.actions:
		print action

	#clean_q(Q)
	write_data(Q,"data/"+mode+".dat")
def make_front_and_back_examples(in_dir, out_dir):
    """生成每个样本的正面和背面两个角度的图像矩阵
    """
    for f in os.listdir(in_dir):
        fp = os.path.join(in_dir, f)
        fn = f.split('.')[0]

        data = tools.read_data(fp)
        np.save(os.path.join(out_dir, fn + '_front.npy'),
                np.rot90(data[:, :, 0]))
        np.save(os.path.join(out_dir, fn + '_back.npy'),
                np.rot90(data[:, :, 31]))
def select_linear_model(datafile, reduce_training_data=False):
    data = read_data(datafile)
    one_vector = np.ones((len(data), 1), dtype=np.float)
    Z = np.column_stack((one_vector, data))
    k = 10
    plt.scatter(data[:, 0], data[:, 1], color="red")
    minimum_error = np.inf
    optimal_parameter = 0
    for testing_index in range(k):
        testing_data, testing_size, training_data, training_size = cross_validation(
            data, testing_index)
        print "Training size = %s" %str(training_size)
        if reduce_training_data:
            print "Data reduced..."
            reduced_training_data_count = int(training_size * 0.75)
            training_size = reduced_training_data_count
            print "Reduced training data size = %s" %str(training_size)
            training_data = np.copy(
                training_data[
                    :reduced_training_data_count,
                    :])
        parameters = normal_equation(training_data)
        predicted_by_training_data = create_polynomial_regression_function(
            training_data[:, 0],
            parameters)
        training_error = np.dot(
            (predicted_by_training_data - training_data[:, 0]).T,
            (predicted_by_training_data - training_data[:, 0] )) / training_size
        training_error = training_error/training_size
        testing_data_x = testing_data[:, 0]
        testing_data_y = testing_data[:, 1]
        predicted_testing_data = create_polynomial_regression_function(
            testing_data_x,
            parameters)
        testing_error = np.dot(
            (predicted_testing_data - testing_data_y).T,
            (predicted_testing_data - testing_data_y)) / testing_size
        if testing_error < minimum_error:
            minimum_error = testing_error
            optimal_parameter = parameters
        print "Training Error %s" % training_error
        print "Testing Error %s" % testing_error
        print "Parameter %s " % parameters
        print "\n"
    x_max = int(max(data[:, 0]))+2
    x_min = int(min(data[:, 0]))-2
    x = np.array([i for i in range(x_min, x_max)])
    y = create_polynomial_regression_function(x, optimal_parameter)
    plt.plot(x, y)
    print "The minimum testing error we've got from this training is: \n %s" % minimum_error
    print "Model parameters got from this training data is: \n %s" % optimal_parameter
    print "\n"
示例#9
0
文件: mvr.py 项目: AngeloK/cs584-hws
def analyze_dual_regression(datafile, lambda_, sigma):
    data = read_data(datafile)
    minimum_error = np.inf
    time_cost = 0
    time_start = time.time()
    for folder_index in range(10):
        testing_data, testing_size, training_data, training_size = cross_validation(data, folder_index)
        testing_error = dual_regression(training_data, testing_data, lambda_ = 0.5, sigma=0.5)
        if testing_error < minimum_error:
            minimum_error = testing_error
    time_end = time.time()
    print "time_costing %s" % str(time_end - time_start)
    print "Minimum Testing Error %s" %str(testing_error)
def update_tables():
    if Path(data_cleaned_dir).is_dir():
        check()

        print('Updating tables...')
        excel_file_paths = sorted(data_raw_dir.glob('**/*.xlsx'),
                                  reverse=True)[:MAX_NUMBER_OF_MONTHS]

        latest_excel_file_path = excel_file_paths[0]

        max_date = latest_excel_file_path.name.split('.')[0]

        date_str_list = [p.name.split('.')[0] for p in excel_file_paths[1:]]

        df_product_price, df_compound = tools.read_data(latest_excel_file_path)

        df_compound = (
            df_compound.drop_duplicates().loc[lambda x: x.Date == max_date])

        selected_columns = [
            '투여', '식약분류', '주성분코드', '제품코드', '제품명', '업체명', '규격', '단위', '전문/일반',
            '비고'
        ]

        df_product = (df_product_price.loc[lambda x: (x.비고.isna()) &
                                           (x.Date == max_date)]
                      [selected_columns].drop_duplicates().reset_index().drop(
                          'index', axis='columns').set_index('제품코드'))

        df_price_new = (df_product_price[[
            '제품코드', 'Date', '상한금액'
        ]].drop_duplicates().reset_index().drop(
            'index', axis='columns').assign(상한금액=lambda x: pd.to_numeric(
                x.상한금액.apply(lambda v: None if type(v) is not int else v),
                errors='coerce')))

        df_price_old = (
            pd.read_pickle(f'{data_cleaned_dir}/price.pickle').reset_index(
            ).loc[lambda x: x.Date.isin(date_str_list)])

        df_price = (pd.concat([df_price_new, df_price_old]).set_index('제품코드'))

        df_compound.to_pickle(data_compound_path)
        df_product.to_pickle(data_product_path)
        df_price.to_pickle(data_price_path)

        print('Tables have been updated.')
    else:
        tools.check_data_raw()
def draw_scatter_graph(files, save=False):
    '''
    Plot scatter graph for each dataset. If save is True, then this function will
    save result graph as "data_scatter_graph.png"
    '''
    graph_index = 1
    for f in files:
        data = read_data(f)
        # draw sub graph by graph index.
        plt.subplot(2, 2, graph_index)
        plt.title(f)
        plt.scatter(data[:, 0], data[:, 1], color="red")
        graph_index += 1
    if save:
        plt.savefig("data_scatter_plot.png")
    plt.show()
示例#12
0
class Test_Lgoin(unittest.TestCase):
    def setUp(self):
        self.login = Login()
        self.log = GetLog.get_logger()

    @parameterized.expand(read_data("login.yaml"))
    def test_login(self, mobile, password):
        result = self.login.login_inter(mobile, password)
        self.log.info("登录结果:{}".format(result.json()))
        print("登录结果:", result.json())
        assert_common(self, result)
        # 获取 token 值,并追加到信息头
        token = result.json()["data"]
        self.log.info("token值:{}".format(token))
        api.headers["Authorization"] = "Bearer " + token
        print("追加token后的信息头:", api.headers)
def select_ploynomial_models(data_file, max_degree=6, save=False, reduce_training_data=False):
    '''
    Using 10-cross-validation to test polynomial model.
    '''
    data = read_data(data_file)

    # Define the range of x to draw model graph.
    if max_degree < 1:
        raise ValueError("Max_degree should be greater than 1")
    # if max_degree == 1, then this function only draws one graph, otherwise,
    # it will draw multiple graphs.
    if max_degree == 1:
        optimal_parameters, testing_error = select_polynomial_model_from_cross_validation(
            data, 1, reduce_training_data=reduce_training_data)
    else:
        optimal_parameters = 0
        minimum_testing_error = np.inf
        optimal_degree = 0

        # Collect the smallest errors for each degree for plotting
        # Degree-Error line graph.
        error_collections = {}
        training_error_collections = {}
        for i in range(1, max_degree+1):
            parameters, testing_error, training_error = select_polynomial_model_from_cross_validation(
                data, i, reduce_training_data=reduce_training_data
            )

            # Save degree and error in a dict.
            error_collections[i] = testing_error
            training_error_collections[i] = training_error
            if testing_error < minimum_testing_error:
                optimal_degree = i
                minimum_testing_error = testing_error
                optimal_parameters = parameters
    if save:
        plt.savefig("polynomial_model_graph.png")
    # return optimal_parameters, optimal_degree
    plt.scatter(data[:, 0], data[:, 1], color="red")
    x_max = int(max(data[:, 0]))+2
    x_min = int(min(data[:, 0]))-2
    x = np.array([i for i in range(x_min, x_max)])
    y = create_polynomial_regression_function(x, optimal_parameters)
    plt.plot(x, y)
    print "Optimal Degree %s" %str(optimal_degree)
    print "Optimal Paremeters %s" %str(optimal_parameters)
    return error_collections, training_error_collections
def given_linear_regression(datafile):
    '''
    Compare performances of my function with the given regression function.
    '''

    # Import data
    data = read_data(datafile)
    col, row = data.shape

    testing_data, testing_size, training_data, training_size = cross_validation(
        data, 1)

    # Define the range of x
    x_max = int(max(data[:, 0]))+2
    x_min = int(min(data[:, 0]))-2
    x = np.array([i for i in range(x_min, x_max)])
    x_length = len(x)
    one_vector_ = np.ones((x_length, 1), dtype=np.float)
    X = np.column_stack((one_vector_, x.reshape(x_length, 1)))

    # Draw my function's fitting result graph.
    # plt.subplot(2, 1, 1)
    # plot_regression_model(
        # "My Method",
        # training_data,
        # training_size,
        # testing_data,
        # testing_size,
        # 1)

    # Draw given function's fitting result graph
    one_vector = np.ones((training_size, 1), dtype=np.float)
    one_vector_test = np.ones((testing_size, 1), dtype=np.float)
    Z = np.column_stack((one_vector, training_data[:, 0]))
    Z_test = np.column_stack((one_vector_test, testing_data[:, 0]))
    # print Z
    clf = linear_model.LinearRegression()
    clf.fit(Z, training_data[:, 1])
    print("Residual sum of squares: %.2f"
          % np.mean((clf.predict(Z_test) - testing_data[:, 1]) ** 2))
    y = np.dot(X, clf.coef_)
    y_training = clf.coef_ * Z
    plt.scatter(data[:, 0], data[:, 1], color="red")
    plt.plot(x, y)
    x_test = testing_data[:, 0]
    y_test = testing_data[:, 1]
def test_movement():
    data = tools.read_data('data/sensor.dat')
    x_p=0
    y_p=0
    z_p=0
    for (step, reading) in enumerate(data):
        print step
        # if step == 10:
        # break
        r1 = reading['odometry']['r1']
        r2 = reading['odometry']['r2']
        t = reading['odometry']['t']
        x=x_p + t*math.cos(z_p+r1)                            
        y=y_p + t*math.sin(z_p+r1)                            
        z=z_p+r1+r2
        x_p=x
        y_p=y
        z_p=z
        drawing.draw_state_for_me(step, x, y, z,'/testm/')
示例#16
0
def test_movement():
    data = tools.read_data('data/sensor.dat')
    x_p = 0
    y_p = 0
    z_p = 0
    for (step, reading) in enumerate(data):
        print step
        # if step == 10:
        # break
        r1 = reading['odometry']['r1']
        r2 = reading['odometry']['r2']
        t = reading['odometry']['t']
        x = x_p + t * math.cos(z_p + r1)
        y = y_p + t * math.sin(z_p + r1)
        z = z_p + r1 + r2
        x_p = x
        y_p = y
        z_p = z
        drawing.draw_state_for_me(step, x, y, z, '/testm/')
示例#17
0
文件: mvr.py 项目: AngeloK/cs584-hws
def analyze_stochastic_gradient_descent(datafile):
    '''
    Compute
    '''
    time_start = time.time()
    minimum_error = np.inf
    optimal_parameter = 0
    data = read_data(datafile)
    for testing_data_index in range(10):
        testing_data, testing_size, training_data, training_size = cross_validation(data, testing_data_index)
        parameters = iterative_compute_gd(training_data, 2)
        y = predicted_value_of_dual_regression(parameters, testing_data[:, :-1], 2)
        testing_error = compute_testing_error(y, testing_data[:, -1])
        if testing_error < minimum_error:
            minimum_error = testing_error
            optimal_parameter = parameters
    time_end = time.time()
    time_cost = time_end - time_start
    print time_cost, minimum_error, optimal_parameter
    def get_landmarks(self):
        pass

    def add_landmarks(self, numLand=1):
        pass

    def predict(self, numLand, pose, noise):
        pass

    def correct(self, numPose, numLand, pose, noise):
        pass


if __name__ == "__main__":
    data = tools.read_data('data/sensor.dat')
    world = tools.read_world('data/world.dat')
    f = plt.Figure(
        figsize=(5, 5),
        dpi=100,
    )
    f.set_visible(False)
    plt.axes()

    # Accommodate landmarks to plot...
    lx = []
    ly = []
    for landmark in world:
        lx.append(landmark['x'])
        ly.append(landmark['y'])
    landmarks = [lx, ly]
示例#19
0
import tools
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt

if __name__ == '__main__':
    args = tools.parse_args_visualize()
    model = tools.load_model(args['load_path'])
    data, target = tools.read_data(args['data_path'])
    sns.set_style('darkgrid')
    sns.scatterplot(x=data, y=target, label='Data')
    reg_x = np.arange(min(data), max(data), 5)
    reg_y = [model.predict(i) for i in reg_x]
    plt.plot(reg_x, reg_y, color='red', label='Regression')
    plt.legend(loc='best')
    plt.savefig('plot.png')
    print('Plot saved to plot.png')
    def get_landmarks(self):
        pass

    def add_landmarks(self, numLand=1):
        pass

    def predict(self, numLand, pose, noise):
        pass

    def correct(self, numPose, numLand, pose, noise):
        pass


if __name__ == "__main__":
    data = tools.read_data('data/sensor.dat')
    world = tools.read_world('data/world.dat')
    f = plt.Figure(figsize=(5, 5), dpi=100, )
    f.set_visible(False)
    plt.axes()

    # Accommodate landmarks to plot...
    lx = []
    ly = []
    for landmark in world:
        lx.append(landmark['x'])
        ly.append(landmark['y'])
    landmarks = [lx, ly]

    print "Beginning EKFSlam Test"
    # Keep track of the observed landmarks
示例#21
0
def initialize():
    if not Path(data_cleaned_dir).is_dir():
        Path(data_cleaned_dir).mkdir(parents=True, exist_ok=True)

        excel_file_paths = sorted(
            list(data_raw_dir.glob('**/*.xlsx')),
            reverse=True
        )[:MAX_NUMBER_OF_MONTHS]

        df_list = [
            tools.read_data(data_path)
            for data_path in tqdm(excel_file_paths)
        ]

        df_product_price, df_compound = map(pd.concat, list(zip(*df_list)))

        max_date = df_product_price.Date.max()

        df_compound = (
            df_compound
                .drop_duplicates()
                .loc[lambda x: x.Date == max_date]
        )

        selected_columns = [
            '투여',
            '식약분류',
            '주성분코드',
            '제품코드',
            '제품명',
            '업체명',
            '규격',
            '단위',
            '전문/일반',
            '비고'
        ]

        df_product = (
            df_product_price
                .loc[lambda x:
                    (x.비고.isna()) & (x.Date == max_date)
                ]
                [selected_columns]
                .drop_duplicates()
                .reset_index()
                .drop('index', axis='columns')
                .set_index('제품코드')
        )

        df_price = (
            df_product_price
                [['제품코드', 'Date', '상한금액']]
                .drop_duplicates()
                .reset_index()
                .drop('index', axis='columns')
                .assign(상한금액=lambda x:
                    pd.to_numeric(
                        x.상한금액.apply(lambda v:
                            None if type(v) is not int else v
                        ),
                        errors='coerce'
                    )
                )
                .set_index('제품코드')
        )

        df_compound.to_pickle(data_compound_path)
        df_product.to_pickle(data_product_path)
        df_price.to_pickle(data_price_path)
from algorithms.hierarchical import hierarchical
from algorithms.incremental import incremental
from algorithms.lda.lda import get_lda
from clustering_test import preform_test
from similarity_algorithms import euclidean_distance
from tools import get_clusters, make_records, read_data, divide_train_test

if __name__ == '__main__':
    data_path = "QA-samples.xlsx"
    train_percent = 0.8
    number_of_clusters = 900
    clustering_algorithm = get_lda(False)

    df_pre, df_raw = read_data(data_path=data_path)
    records = make_records(df_pre=df_pre, df_raw=df_raw)[:100]
    train_records, test_records = divide_train_test(
        records=records, train_percent=train_percent)
    clusters = get_clusters(clustering_algorithm, train_records,
                            number_of_clusters)
    is_lda = clustering_algorithm == get_lda(
        True) or clustering_algorithm == get_lda(False)
    preform_test(clusters,
                 test_records,
                 euclidean_distance,
                 clustering_algorithm_name=clustering_algorithm.__name__,
                 is_lda=is_lda,
                 number_of_clusters=number_of_clusters)
示例#23
0
文件: demo.py 项目: mcvidomi/MFI
import pickle
import pdb
import os
import view
from sklearn.svm import SVC

from sklearn.metrics.pairwise import pairwise_kernels
from sklearn.svm.libsvm import decision_function

# CREATE RESULT FOLDER
if not os.path.exists("results"):
    os.makedirs("results")

# READ DATA
print("LOAD/READ DATA")
xtrain, ytrain, xtest, ytest = tools.read_data()
numbers = [4, 9]
x, y = tools.choose_numbers(xtrain, ytrain, numbers)
xt, yt = tools.choose_numbers(xtest, ytest, numbers)
print("LOAD/READ DATA --- DONE!")

# TRAIN SVM
clf = SVC()
clf.fit(x, y)

# GENERATE RANDOM SAMPLES
samplesize = 5000
samples = np.random.uniform(
    -1., 1.,
    (samplesize, len(x[0])))  #np.random.uniform(0.,1.,(samplesize,len(x[0])))
示例#24
0
文件: demo.py 项目: mcvidomi/ML2Motif
import numpy as np
import pickle
import pdb
import os
import view
from SVM import *
from POIM import *
from Motif import *

# CREATE RESULT FOLDER
if not os.path.exists("results"):
    os.makedirs("results")

# READ DATA
print("LOAD/READ DATA")
xtrain, xtest, y_train, ytest = tools.read_data(job="read", lines=10000)
print("LOAD/READ DATA --- DONE!")

# TRAIN SVM
print("TRAIN SVM")

Cobj = SVM(xtrain, y_train)
Cobj.train(C=1.)
Cobj.svm_save("results/svm_trained.pkl")

# COMPUTE gPOIM
print("COMPUTE gPOIM")
small_k = 2
Pobj = gPOIM()
Pobj.set_up(Cobj, samplesize=100, small_k=small_k)
Pobj.save("results/gPOIM.pkl")