Exemplo n.º 1
0
def make_control(argv):
    # return a Bunch

    print argv
    if len(argv) not in (1, 2):
        usage('invalid number of arguments')

    pcl = ParseCommandLine(argv)
    arg = Bunch(
        base_name=argv[0].split('.')[0],
        test=pcl.has_arg('--test'),
    )

    random_seed = 123456
    random.seed(random_seed)

    path = Path()  # use the default dir_input

    debug = False

    return Bunch(
        arg=arg,
        debug=debug,
        path=path,
        path_out=path.dir_working() + arg.base_name + '-' + 'derived.csv',
        random_seed=random_seed,
        test=arg.test,
    )
Exemplo n.º 2
0
def make_control(argv):
    # return a Bunch

    print argv
    if len(argv) not in (3, 4):
        usage('invalid number of arguments')

    pcl = ParseCommandLine(argv)
    arg = Bunch(
        base_name=argv[0].split('.')[0],
        geo=pcl.get_arg('--geo'),
        test=pcl.has_arg('--test'),
    )
    if arg.geo is None:
        usage('missing --arg')
    if arg.geo not in ('census_tract', 'zip5'):
        usage('invalid GEO value: ', + arg.geo)

    random_seed = 123456
    random.seed(random_seed)

    path = Path()  # use the default dir_input

    debug = False

    return Bunch(
        arg=arg,
        debug=debug,
        max_sale_price=85e6,  # according to Wall Street Journal
        path=path,
        path_out_csv=path.dir_working() + arg.base_name + '-' + arg.geo + '.csv',
        path_out_occurs=path.dir_working() + arg.base_name + '-' + arg.geo + '-occurs.pickle',
        random_seed=random_seed,
        test=arg.test,
    )
Exemplo n.º 3
0
def make_control(argv):
    # return a Bunch

    print argv
    if not(2 <= len(argv) <= 3):
        usage('invalid number of arguments')

    pcl = ParseCommandLine(argv)
    arg = Bunch(
        base_name='valgbr',
        yyyymm=argv[1],
        test=pcl.has_arg('--test'),
    )

    try:
        arg.yyyymm = int(arg.yyyymm)
    except:
        usage('YYYYMM not an integer')

    random_seed = 123
    random.seed(random_seed)

    dir_working = Path().dir_working()

    debug = False

    out_file_name = (
        ('test-' if arg.test else '') +
        '%s.pickle' % arg.yyyymm
    )

    # assure output directory exists
    dir_path = dir_working + arg.base_name + '/'
    if not os.path.exists(dir_path):
        os.makedirs(dir_path)

    fixed_hps = Bunch(
        loss='quantile',
        alpha=0.5,
        n_estimators=1000,
        max_depth=3,
        max_features=None)

    return Bunch(
        arg=arg,
        debug=debug,
        fixed_hps=fixed_hps,
        path_in=dir_working + 'samples-train.csv',
        path_out=dir_path + out_file_name,
        random_seed=random_seed,
        test=arg.test,
    )
Exemplo n.º 4
0
def make_control(argv):
    # return a Bunch

    print argv
    if not(4 <= len(argv) <= 7):
        usage('invalid number of arguments')

    pcl = ParseCommandLine(argv)
    arg = Bunch(
        base_name='ege',
        folds=pcl.get_arg('--folds'),
        rfbound=pcl.get_arg('--rfbound'),   # arg.rbound is None or a string or a list of strings
        test=pcl.has_arg('--test'),
    )

    if arg.rfbound is not None:
        if not len(arg.rfbound) == 2:
            usage('corret is --rfbound HP YYYYMM')
        arg.hp = arg.rfbound[0]
        arg.yyyymm = arg.rfbound[1]
        if not(arg.hp in ('max_depth', 'max_features')):
            usage('--rfbound {max_depth|max_features} YYYYMM')

    if arg.folds is None:
        usage('--folds is required')
    else:
        arg.folds = int(arg.folds)

    random_seed = 123
    random.seed(random_seed)

    dir_working = Path().dir_working()

    debug = False

    out_file_name_base = (
        ('test-' if arg.test else '') +
        arg.base_name +
        ('' if arg.rfbound is None else '-rfbound-%s-%s' % (arg.hp, arg.yyyymm)) +
        ('-folds-%02d' % arg.folds)
    )

    return Bunch(
        arg=arg,
        debug=debug,
        path_in=dir_working + 'samples-train-validate.csv',
        path_out=dir_working + out_file_name_base + '.pickle',
        random_seed=random_seed,
        test=arg.test,
    )
Exemplo n.º 5
0
def make_control(argv):
    # return a Bunch

    print argv
    if not(4 <= len(argv) <= 5):
        usage('invalid number of arguments')

    pcl = ParseCommandLine(argv)
    arg = Bunch(
        base_name='rfbound',
        hp=argv[1],
        yyyymm=argv[2],
        folds=argv[3],
        test=pcl.has_arg('--test'),
    )

    try:
        arg.folds = int(arg.folds)
    except:
        usage('INT not an integer; ' + str(arg.folds))

    random_seed = 123
    random.seed(random_seed)

    dir_working = Path().dir_working()

    debug = False

    out_file_name = (
        '%s/%s%s-%s-folds-%02d.pickle' % (
            arg.base_name,
            ('test-' if arg.test else ''),
            arg.hp,
            arg.yyyymm,
            arg.folds)
    )

    # assure the output directory exists
    dir_path = dir_working + arg.base_name
    if not os.path.exists(dir_path):
        os.makedirs(dir_path)

    return Bunch(
        arg=arg,
        debug=debug,
        path_in=dir_working + 'samples-train-validate.csv',
        path_out=dir_working + out_file_name,
        random_seed=random_seed,
        test=arg.test,
    )
Exemplo n.º 6
0
def make_control(argv):
    # return a Bunch

    print argv
    if not (4 <= len(argv) <= 7):
        usage('invalid number of arguments')

    pcl = ParseCommandLine(argv)
    arg = Bunch(
        base_name='ege',
        folds=pcl.get_arg('--folds'),
        rfbound=pcl.get_arg(
            '--rfbound'
        ),  # arg.rbound is None or a string or a list of strings
        test=pcl.has_arg('--test'),
    )

    if arg.rfbound is not None:
        if not len(arg.rfbound) == 2:
            usage('corret is --rfbound HP YYYYMM')
        arg.hp = arg.rfbound[0]
        arg.yyyymm = arg.rfbound[1]
        if not (arg.hp in ('max_depth', 'max_features')):
            usage('--rfbound {max_depth|max_features} YYYYMM')

    if arg.folds is None:
        usage('--folds is required')
    else:
        arg.folds = int(arg.folds)

    random_seed = 123
    random.seed(random_seed)

    dir_working = Path().dir_working()

    debug = False

    out_file_name_base = (('test-' if arg.test else '') + arg.base_name +
                          ('' if arg.rfbound is None else '-rfbound-%s-%s' %
                           (arg.hp, arg.yyyymm)) + ('-folds-%02d' % arg.folds))

    return Bunch(
        arg=arg,
        debug=debug,
        path_in=dir_working + 'samples-train-validate.csv',
        path_out=dir_working + out_file_name_base + '.pickle',
        random_seed=random_seed,
        test=arg.test,
    )
Exemplo n.º 7
0
def make_control(argv):
    # return a Bunch

    print argv
    if not(2 <= len(argv) <= 3):
        usage('invalid number of arguments')

    pcl = ParseCommandLine(argv)
    arg = Bunch(
        base_name='linval',
        yyyymm=argv[1],
        test=pcl.has_arg('--test'),
    )

    try:
        arg.yyyymm = int(arg.yyyymm)
    except:
        usage('YYYYMM not an integer')

    random_seed = 123
    random.seed(random_seed)

    dir_working = Path().dir_working()

    debug = False

    out_file_name = (
        ('test-' if arg.test else '') +
        '%s.pickle' % arg.yyyymm
    )

    # assure output directory exists
    dir_path = dir_working + arg.base_name + '/'
    if not os.path.exists(dir_path):
        os.makedirs(dir_path)

    return Bunch(
        arg=arg,
        debug=debug,
        path_in=dir_working + 'samples-train-validate.csv',
        path_out=dir_path + out_file_name,
        random_seed=random_seed,
        test=arg.test,
    )
Exemplo n.º 8
0
def make_control(argv):
    # return a Bunch

    print argv
    if len(argv) not in (1, 2, 3):
        usage('invalid number of arguments')

    pcl = ParseCommandLine(argv)
    arg = Bunch(
        base_name='chart-05',
        data=pcl.has_arg('--data'),
        test=pcl.has_arg('--test'),
    )

    if len(argv) == 3:
        both = arg.data and arg.test
        if not both:
            usage('there is an extra invocation option')

    random_seed = 123
    random.seed(random_seed)

    dir_working = Path().dir_working()

    debug = False

    reduced_file_name = ('test-' if arg.test else '') + 'data.pickle'

    # assure output directory exists
    dir_path = dir_working + arg.base_name + '/'
    if not os.path.exists(dir_path):
        os.makedirs(dir_path)

    return Bunch(
        arg=arg,
        debug=debug,
        path_in_ege=dir_working + 'valgbr/*.pickle',
        path_reduction=dir_path + reduced_file_name,
        path_chart_base=dir_path,
        random_seed=random_seed,
        test=arg.test,
    )
Exemplo n.º 9
0
def make_control(argv):
    # return a Bunch

    print argv
    if len(argv) not in (1, 2):
        usage('invalid number of arguments')

    pcl = ParseCommandLine(argv)
    if pcl.has_arg('--help'):
        usage()
    arg = Bunch(
        base_name=argv[0].split('.')[0],
        test=pcl.has_arg('--test'),
    )

    random_seed = 123456
    random.seed(random_seed)

    path = Path()  # use the default dir_input

    debug = False

    file_out_transactions = (('testing-' if arg.test else '') + arg.base_name +
                             '-al-g-sfr' + '.csv')

    return Bunch(
        arg=arg,
        debug=debug,
        max_sale_price=85e6,  # according to Wall Street Journal
        path=path,
        path_in_census_features=path.dir_working() +
        'census-features-derived.csv',
        path_in_parcels_features_census_tract=path.dir_working() +
        'parcels-features-census_tract.csv',
        path_in_parcels_features_zip5=path.dir_working() +
        'parcels-features-zip5.csv',
        path_out_transactions=path.dir_working() + file_out_transactions,
        random_seed=random_seed,
        test=arg.test,
    )
Exemplo n.º 10
0
def make_control(argv):
    # return a Bunch

    print argv
    if len(argv) not in (1, 2):
        usage('invalid number of arguments')

    pcl = ParseCommandLine(argv)
    if pcl.has_arg('--help'):
        usage()
    arg = Bunch(
        base_name=argv[0].split('.')[0],
        test=pcl.has_arg('--test'),
    )

    random_seed = 123456
    random.seed(random_seed)

    path = Path()  # use the default dir_input

    debug = False

    file_out_transactions = (
        ('testing-' if arg.test else '') +
        arg.base_name + '-al-g-sfr' + '.csv'
    )

    return Bunch(
        arg=arg,
        debug=debug,
        max_sale_price=85e6,  # according to Wall Street Journal
        path=path,
        path_in_census_features=path.dir_working() + 'census-features-derived.csv',
        path_in_parcels_features_census_tract=path.dir_working() + 'parcels-features-census_tract.csv',
        path_in_parcels_features_zip5=path.dir_working() + 'parcels-features-zip5.csv',
        path_out_transactions=path.dir_working() + file_out_transactions,
        random_seed=random_seed,
        test=arg.test,
    )
Exemplo n.º 11
0
def make_control(argv):
    # return a Bunch

    print argv
    if len(argv) not in (1, 2):
        usage('invalid number of arguments')

    pcl = ParseCommandLine(argv)
    arg = Bunch(
        base_name=argv[0].split('.')[0],
        test=pcl.has_arg('--test'),
    )

    random_seed = 123
    random.seed(random_seed)

    dir_working = Path().dir_working()

    debug = False

    out_file_name_base = ('testing-' if arg.test else '') + arg.base_name

    return Bunch(
        arg=arg,
        debug=debug,
        fraction_test=0.2,
        max_sale_price=85e6,  # according to Wall Street Journal
        path_in=dir_working + 'transactions-al-g-sfr.csv',
        path_out_info_reasonable=dir_working + out_file_name_base +
        '-info-reasonable.pickle',
        path_out_test=dir_working + out_file_name_base + '-test.csv',
        path_out_train=dir_working + out_file_name_base + '-train.csv',
        path_out_train_validate=dir_working + out_file_name_base +
        '-train-validate.csv',
        path_out_validate=dir_working + out_file_name_base + '-validate.csv',
        random_seed=random_seed,
        test=arg.test,
    )
Exemplo n.º 12
0
def make_control(argv):
    # return a Bunch

    print argv
    if len(argv) not in (1, 2):
        usage('invalid number of arguments')

    pcl = ParseCommandLine(argv)
    arg = Bunch(
        base_name='chart-07',
        data=pcl.has_arg('--data'),
        test=pcl.has_arg('--test'),
    )

    random_seed = 123
    random.seed(random_seed)

    dir_working = Path().dir_working()

    debug = False

    reduced_file_name = ('test-' if arg.test else '') + 'data.pickle'

    # assure output directory exists
    dir_path = dir_working + arg.base_name + '/'
    if not os.path.exists(dir_path):
        os.makedirs(dir_path)

    return Bunch(
        arg=arg,
        debug=debug,
        path_in_best=dir_working + 'chart-06/best.pickle',
        path_in_samples='../data/working/samples-train.csv',
        path_reduction=dir_path + reduced_file_name,
        path_chart_base=dir_path,
        random_seed=random_seed,
        test=arg.test,
    )
Exemplo n.º 13
0
def make_control(argv):
    # return a Bunch

    print argv
    if len(argv) not in (3, 4):
        usage('invalid number of arguments')

    pcl = ParseCommandLine(argv)
    arg = Bunch(
        base_name=argv[0].split('.')[0],
        geo=pcl.get_arg('--geo'),
        test=pcl.has_arg('--test'),
    )
    if arg.geo is None:
        usage('missing --arg')
    if arg.geo not in ('census_tract', 'zip5'):
        usage('invalid GEO value: ', +arg.geo)

    random_seed = 123456
    random.seed(random_seed)

    path = Path()  # use the default dir_input

    debug = False

    return Bunch(
        arg=arg,
        debug=debug,
        max_sale_price=85e6,  # according to Wall Street Journal
        path=path,
        path_out_csv=path.dir_working() + arg.base_name + '-' + arg.geo +
        '.csv',
        path_out_occurs=path.dir_working() + arg.base_name + '-' + arg.geo +
        '-occurs.pickle',
        random_seed=random_seed,
        test=arg.test,
    )
Exemplo n.º 14
0
def make_control(argv):
    # return a Bunch

    print argv
    if len(argv) not in (1, 2, 3):
        usage('invalid number of arguments')

    if len(argv) == 1:
        usage()

    pcl = ParseCommandLine(argv)
    arg = Bunch(
        base_name='chart-02',
        hp=argv[1],
        data=pcl.has_arg('--data'),
        test=pcl.has_arg('--test'),
    )

    random_seed = 123
    random.seed(random_seed)

    dir_working = Path().dir_working()

    debug = False

    out_file_name_base = ('test-'
                          if arg.test else '') + arg.base_name + '-' + arg.hp

    return Bunch(
        arg=arg,
        debug=debug,
        path_in_ege=dir_working + 'ege-rfbound-%s-*-folds-10.pickle' % arg.hp,
        path_out_base=dir_working + out_file_name_base,
        path_data=dir_working + arg.base_name + '-' + arg.hp + '.data.pickle',
        random_seed=random_seed,
        test=arg.test,
    )
Exemplo n.º 15
0
def make_control(argv):
    # return a Bunch

    print argv
    if not (2 <= len(argv) <= 3):
        usage("invalid number of arguments")

    pcl = ParseCommandLine(argv)
    arg = Bunch(base_name="valrf", yyyymm=argv[1], test=pcl.has_arg("--test"))

    try:
        arg.yyyymm = int(arg.yyyymm)
    except:
        usage("YYYYMM not an integer")

    random_seed = 123
    random.seed(random_seed)

    dir_working = Path().dir_working()

    debug = False

    out_file_name = ("test-" if arg.test else "") + "%s.pickle" % arg.yyyymm

    # assure output directory exists
    dir_path = dir_working + arg.base_name + "/"
    if not os.path.exists(dir_path):
        os.makedirs(dir_path)

    return Bunch(
        arg=arg,
        debug=debug,
        path_in=dir_working + "samples-train.csv",
        path_out=dir_path + out_file_name,
        random_seed=random_seed,
        test=arg.test,
    )
Exemplo n.º 16
0
def make_control(argv):
    # return a Bunch

    print argv
    if len(argv) not in (1, 2, 3):
        usage('invalid number of arguments')

    if len(argv) == 1:
        usage()

    pcl = ParseCommandLine(argv)
    arg = Bunch(
        base_name='chart-02',
        hp=argv[1],
        data=pcl.has_arg('--data'),
        test=pcl.has_arg('--test'),
    )

    random_seed = 123
    random.seed(random_seed)

    dir_working = Path().dir_working()

    debug = False

    out_file_name_base = ('test-' if arg.test else '') + arg.base_name + '-' + arg.hp

    return Bunch(
        arg=arg,
        debug=debug,
        path_in_ege=dir_working + 'ege-rfbound-%s-*-folds-10.pickle' % arg.hp,
        path_out_base=dir_working + out_file_name_base,
        path_data=dir_working + arg.base_name + '-' + arg.hp + '.data.pickle',
        random_seed=random_seed,
        test=arg.test,
    )
Exemplo n.º 17
0
def make_control(argv):
    # return a Bunch

    print argv
    if len(argv) not in (1, 2):
        usage('invalid number of arguments')

    pcl = ParseCommandLine(argv)
    arg = Bunch(
        base_name=argv[0].split('.')[0],
        test=pcl.has_arg('--test'),
    )

    random_seed = 123
    random.seed(random_seed)

    dir_working = Path().dir_working()

    debug = False

    out_file_name_base = ('testing-' if arg.test else '') + arg.base_name

    return Bunch(
        arg=arg,
        debug=debug,
        fraction_test=0.2,
        max_sale_price=85e6,  # according to Wall Street Journal
        path_in=dir_working + 'transactions-al-g-sfr.csv',
        path_out_info_reasonable=dir_working + out_file_name_base + '-info-reasonable.pickle',
        path_out_test=dir_working + out_file_name_base + '-test.csv',
        path_out_train=dir_working + out_file_name_base + '-train.csv',
        path_out_train_validate=dir_working + out_file_name_base + '-train-validate.csv',
        path_out_validate=dir_working + out_file_name_base + '-validate.csv',
        random_seed=random_seed,
        test=arg.test,
    )
Exemplo n.º 18
0
def make_control(argv):
    # return a Bunch

    print argv
    if not (1 <= len(argv) <= 2):
        usage('invalid number of arguments')

    pcl = ParseCommandLine(argv)
    arg = Bunch(
        base_name='testbest',
        test=pcl.has_arg('--test'),
    )

    random_seed = 123
    random.seed(random_seed)

    dir_working = Path().dir_working()

    debug = False

    out_file_name = (('test-' if arg.test else '') + '%s.pickle' % 'results')

    # assure output directory exists
    dir_path = dir_working + arg.base_name + '/'
    if not os.path.exists(dir_path):
        os.makedirs(dir_path)

    return Bunch(
        arg=arg,
        debug=debug,
        path_in_data=dir_working + 'samples-train.csv',
        path_in_best=dir_working + 'chart-06/best.pickle',
        path_out=dir_path + out_file_name,
        random_seed=random_seed,
        test=arg.test,
    )