示例#1
0
def fit(X, y, theta, alpha, num_iters, auto_stop=False):
    """
    fit a dataset with only one parameter (using linear_regression)
    return (theta, cost_history)
    """
    m = X.shape[0]
    tmp = [0, 0]
    J_history = []

    for iter_ in range(num_iters):
        tmp[0] = theta[0] - alpha * 1 / m * sum((predict(X, theta) - y) * 1)
        tmp[1] = theta[1] - alpha * 1 / m * sum((predict(X, theta) - y) * X)
        theta[0] = tmp[0]
        theta[1] = tmp[1]

        J_history.append(cost(X, y, theta))
        if len(J_history) >= 2:

            # test is thecost increase
            if J_history[-1] - const.INCREASE_THRESHOLD > J_history[
                    0]:  # the cost increase
                logerr(
                    'wrong value of alpha (%f), the cost inscrease -> stop fit'
                    % (alpha))
                return theta, J_history

            # test is the fit is done
            if auto_stop:
                if J_history[-2] <= J_history[-1] + (const.STOP_THRESHOLD *
                                                     alpha):
                    loginfo('auto stopped at %d iterations' % (iter_))
                    return theta, J_history

    return theta, J_history
示例#2
0
def export_theta(filename, theta):
    try:
        with open(filename, 'w') as f:
            json.dump(theta, f)
    except (os.NotADirectoryError, os.FileNotFoundError):
        logerr('unable to write theta in', filename)
    loginfo('export theta in %s' % (filename))
def start_predict(all_args):
    theta = get_theta_in_args(all_args)
    if theta is None:
        exit(1)
    loginfo('using theta ->', theta)
    if all_args['data_predict']['value'] is None:
        try:
            all_args['data_predict']['value'] = [int(input('car km (int): '))]
        except ValueError:
            logerr('km should be an int')
            exit(1)

    for i in all_args['data_predict']['value']:
        print('for %8dkm -> estimated price: %d' % (i, predict(i, theta)))
示例#4
0
def setarg(all_args, arg):
    if '=' in arg and len(arg.split('=')) == 2:
        arg_name, arg_value = arg.split('=')
        for key, arg_dict in all_args.items():
            if arg_name in arg_dict['argnames']:
                if convert_from_str(arg_value, arg_dict['type'])[0]:
                    arg_dict['value'] = convert_from_str(
                        arg_value, arg_dict['type'])[1]
                    return True
                logerr('in arg %s -> unable to convert %s to %s' %
                       (arg, arg_value, str(arg_dict['type'])))
                return False
    logerr('invalid argument ->', arg)
    return False
示例#5
0
def import_theta(filename):
    if not os.path.isfile(filename):
        logerr('cannot import theta: %s is not a file' % (filename))
        return None

    with open(filename, 'r') as f:
        try:
            theta_data = json.load(f)
        except ValueError:
            logerr('cannot import theta')
            return None

    if not check_theta(theta_data):
        return None
    loginfo('import theta from %s' % (filename))
    return theta_data
示例#6
0
def check_theta(theta):
    """
    check theta values
    """
    if type(theta) is not list:
        logerr('theta is not a list %s' % (theta))
        return False
    if len(theta) is not 2:
        logerr('invalid theta size (%d: excpected 2) -> %s' %
               (len(theta), theta))
        return False
    try:
        theta[0] = float(theta[0])
        theta[1] = float(theta[1])
    except (ValueError, TypeError):
        logerr('cannot convert theta values to float %s' % (theta))
        return False
    return True
示例#7
0
def import_data(all_args):
    try:
        data = pd.read_csv(all_args['data_filename']['value'])
    except (pd.errors.ParserError, pd.errors.EmptyDataError):
        logerr('unable to read the csv:', all_args['data_filename']['value'])
        return None, None, None
    try:
        X = np.array(data[all_args['data_km']['value']])
    except KeyError:
        logerr(
            'invalid column %s in the csv: %s' %
            (all_args['data_km']['value'], all_args['data_filename']['value']))
        return None, None, None
    try:
        y = np.array(data[all_args['data_price']['value']])
    except KeyError:
        logerr('invalid column %s in the csv: %s' %
               (all_args['data_price']['value'],
                all_args['data_filename']['value']))
        return None, None, None
    return data, X, y
            logerr('km should be an int')
            exit(1)

    for i in all_args['data_predict']['value']:
        print('for %8dkm -> estimated price: %d' % (i, predict(i, theta)))


if __name__ == '__main__':
    for arg in sys.argv[1:]:
        if arg in ['--usage']:
            args.print_usage(all_args, sys.argv[0])
            exit(0)
        elif not args.setarg(all_args, arg):
            exit(1)

    if all_args['theta']['value'] is not None and not check_theta(
            all_args['theta']['value']):
        exit(1)

    if all_args['data_predict']['value'] is not None:
        if type(all_args['data_predict']['value']) is int:
            all_args['data_predict']['value'] = [
                all_args['data_predict']['value']
            ]
        for data in all_args['data_predict']['value']:
            if type(data) is not int:
                logerr('we can predict only with int data')
                exit(1)

    start_predict(all_args)