Пример #1
0
def regression2_glm_poisson_train(save_folder_name, datagen_dir, train_dir):

    data_folders = []

    for i in [0, 1, 2]:
        X = join(datagen_dir, 'X.data')
        Y = join(datagen_dir, 'Y.data')

        full_path_train = join(train_dir, save_folder_name)
        data_folders.append(full_path_train)

        B = join(full_path_train, 'B.data')
        icpt = str(i)
        fmt = DATA_FORMAT
        moi = '200'
        mii = '5'
        dfam = '1'
        vpov = '1'
        link = '1'
        lpow = '0'
        tol = '0.0001'
        reg = '0.01'
        config = dict(X=X, Y=Y, B=B, icpt=icpt, fmt=fmt, moi=moi, mii=mii,
                      dfam=dfam, vpov=vpov, link=link, lpow=lpow, tol=tol, reg=reg)
        config_writer(full_path_train + '.json', config)

    return data_folders
Пример #2
0
def clustering_datagen(matrix_dim, matrix_type, datagen_dir):

    row, col = split_rowcol(matrix_dim)
    path_name = '.'.join(['clustering', matrix_type, str(matrix_dim)])

    full_path = join(datagen_dir, path_name)
    X = join(full_path, 'X.data')
    Y = join(full_path, 'Y.data')
    YbyC = join(full_path, 'YbyC.data')
    C = join(full_path, 'C.data')
    nc = '50'
    dc = '10.0'
    dr = '1.0'
    fbf = '100.0'
    cbf = '100.0'

    config = dict(nr=row,
                  nf=col,
                  nc=nc,
                  dc=dc,
                  dr=dr,
                  fbf=fbf,
                  cbf=cbf,
                  X=X,
                  C=C,
                  Y=Y,
                  YbyC=YbyC,
                  fmt=DATA_FORMAT)

    config_writer(full_path + '.json', config)
    return full_path
Пример #3
0
def glm_gamma_predict(save_file_name, datagen_dir, train_dir, predict_dir):

    dfam = '1'
    link = '1'
    vpow = '2'
    lpow = '0'

    X = join(datagen_dir, 'X_test.data')
    B = join(train_dir, 'B.data')
    Y = join(datagen_dir, 'Y_test.data')

    full_path_predict = join(predict_dir, save_file_name)
    M = join(full_path_predict, 'M.data')
    O = join(full_path_predict, 'O.data')

    config = dict(dfam=dfam,
                  link=link,
                  vpow=vpow,
                  lpow=lpow,
                  fmt=DATA_FORMAT,
                  X=X,
                  B=B,
                  Y=Y,
                  M=M,
                  O=O)
    config_writer(full_path_predict + '.json', config)

    return full_path_predict
Пример #4
0
def regression1_datagen(matrix_dim, matrix_type, datagen_dir):

    row, col = split_rowcol(matrix_dim)
    path_name = '.'.join(['regression1', matrix_type, str(matrix_dim)])
    full_path = join(datagen_dir, path_name)

    numSamples = row
    numFeatures = col
    maxFeatureValue = '5'
    maxWeight = '5'
    loc_weights = join(full_path, 'weight.data')
    loc_data = join(full_path, 'X.data')
    loc_labels = join(full_path, 'Y.data')
    noise = '1'
    intercept = '0'
    sparsity = MATRIX_TYPE_DICT[matrix_type]
    tranform_labels = '1'
    fmt = DATA_FORMAT

    config = [
        numSamples, numFeatures, maxFeatureValue, maxWeight, loc_weights,
        loc_data, loc_labels, noise, intercept, sparsity, fmt, tranform_labels
    ]
    config_writer(full_path + '.json', config)

    return full_path
Пример #5
0
def stats1_univar_stats_train(save_folder_name, datagen_dir, train_dir):

    X = join(datagen_dir, 'X.data')
    TYPES = join(datagen_dir, 'types')

    full_path_train = join(train_dir, save_folder_name)
    STATS = join(full_path_train, 'STATS.data')

    config = dict(X=X, TYPES=TYPES, STATS=STATS)
    config_writer(full_path_train + '.json', config)

    return [full_path_train]
Пример #6
0
def kmeans_predict(save_file_name, datagen_dir, train_dir, predict_dir):

    X = join(datagen_dir, 'X_test.data')
    C = join(datagen_dir, 'C.data')

    full_path_predict = join(predict_dir, save_file_name)
    prY = join(full_path_predict, 'prY.data')

    config = dict(X=X, C=C, prY=prY)
    config_writer(full_path_predict + '.json', config)

    return full_path_predict
Пример #7
0
def stats2_stratstats_train(save_folder_name, datagen_dir, train_dir):

    X = join(datagen_dir, 'X.data')
    Xcid = join(datagen_dir, 'Xcid.data')
    Ycid = join(datagen_dir, 'Ycid.data')

    full_path_train = join(train_dir, save_folder_name)
    O = join(full_path_train, 'O.data')

    config = dict(X=X, Xcid=Xcid, Ycid=Ycid, O=O, fmt=DATA_FORMAT)

    config_writer(full_path_train + '.json', config)

    return [full_path_train]
Пример #8
0
def stats1_bivar_stats_train(save_folder_name, datagen_dir, train_dir):

    X = join(datagen_dir, 'X.data')
    index1 = join(datagen_dir, 'set1.indices')
    index2 = join(datagen_dir, 'set2.indices')
    types1 = join(datagen_dir, 'set1.types')
    types2 = join(datagen_dir, 'set2.types')

    full_path_train = join(train_dir, save_folder_name)
    OUTDIR = full_path_train

    config = dict(X=X, index1=index1, index2=index2, types1=types1, types2=types2, OUTDIR=OUTDIR)
    config_writer(full_path_train + '.json', config)
    return [full_path_train]
Пример #9
0
def clustering_kmeans_train(save_folder_name, datagen_dir, train_dir):

    X = join(datagen_dir, 'X.data')

    full_path_train = join(train_dir, save_folder_name)
    C = join(full_path_train, 'C.data')
    k = '50'
    maxi = '50'
    tol = '0.0001'
    config = dict(X=X, k=k, maxi=maxi, tol=tol, C=C)

    config_writer(full_path_train + '.json', config)

    return [full_path_train]
Пример #10
0
def l2_svm_predict(save_file_name, datagen_dir, train_dir, predict_dir):

    X = join(datagen_dir, 'X_test.data')
    Y = join(datagen_dir, 'Y_test.data')

    icpt = save_file_name.split('.')[-1]
    model = join(train_dir, 'model.data')
    fmt = DATA_FORMAT

    config = dict(X=X, Y=Y, icpt=icpt, model=model, fmt=fmt)

    full_path_predict = join(predict_dir, save_file_name)
    config_writer(full_path_predict + '.json', config)

    return full_path_predict
Пример #11
0
def multilogreg_predict(save_file_name, datagen_dir, train_dir, predict_dir):
    X = join(datagen_dir, 'X_test.data')
    Y = join(datagen_dir, 'Y_test.data')
    B = join(train_dir, 'B.data')
    M = join(train_dir, 'M.data')
    dfam = '3'
    vpow = '-1'
    link = '2'
    fmt = DATA_FORMAT

    config = dict(dfam=dfam, vpow=vpow, link=link, fmt=fmt, X=X, B=B, Y=Y, M=M)

    full_path_predict = join(predict_dir, save_file_name)
    config_writer(full_path_predict + '.json', config)

    return full_path_predict
Пример #12
0
def regression1_linearregds_train(save_folder_name, datagen_dir, train_dir):

    data_folders = []
    for i in [0, 1, 2]:
        icpt = str(i)
        reg = '0.01'
        X = join(datagen_dir, 'X.data')
        Y = join(datagen_dir, 'Y.data')

        full_path_train = join(train_dir, save_folder_name + '.' + str(i))
        data_folders.append(full_path_train)
        B = join(full_path_train, 'B.data')

        config = dict(X=X, Y=Y, B=B, icpt=icpt, fmt=DATA_FORMAT, reg=reg)
        config_writer(full_path_train + '.json', config)

    return data_folders
Пример #13
0
def multinomial_naive_bayes_train(save_folder_name, datagen_dir, train_dir):

    X = join(datagen_dir, 'X.data')
    Y = join(datagen_dir, 'Y.data')
    classes = '150'

    full_path_train = join(train_dir, save_folder_name)
    prior = join(full_path_train, 'prior')
    conditionals = join(full_path_train, 'conditionals')
    accuracy = join(full_path_train, 'accuracy')
    fmt = DATA_FORMAT
    probabilities = join(full_path_train, 'probabilities')

    config = dict(X=X, Y=Y, classes=classes, prior=prior, conditionals=conditionals,
                  accuracy=accuracy, fmt=fmt, probabilities=probabilities)

    config_writer(full_path_train + '.json', config)

    return [full_path_train]
Пример #14
0
def multinomial_multilogreg_train(save_folder_name, datagen_dir, train_dir):

    data_folders = []
    for i in [0, 1, 2]:
        icpt = str(i)
        reg = '0.01'
        tol = '0.0001'
        moi = '100'
        mii = '0'
        X = join(datagen_dir, 'X.data')
        Y = join(datagen_dir, 'Y.data')

        full_path_train = join(train_dir, save_folder_name + '.' + str(i))
        data_folders.append(full_path_train)
        B = join(full_path_train, 'B.data')

        config = dict(X=X, Y=Y, B=B, icpt=icpt, reg=reg, tol=tol, moi=moi, mii=mii, fmt=DATA_FORMAT)
        config_writer(full_path_train + '.json', config)

    return data_folders
Пример #15
0
def naive_bayes_predict(save_file_name, datagen_dir, train_dir, predict_dir):

    X = join(datagen_dir, 'X_test.data')
    Y = join(datagen_dir, 'Y_test.data')

    prior = join(train_dir, 'prior')
    conditionals = join(train_dir, 'conditionals')
    fmt = DATA_FORMAT
    probabilities = join(train_dir, 'probabilities')
    config = dict(X=X,
                  Y=Y,
                  prior=prior,
                  conditionals=conditionals,
                  fmt=fmt,
                  probabilities=probabilities)

    full_path_predict = join(predict_dir, save_file_name)
    config_writer(full_path_predict + '.json', config)

    return full_path_predict
Пример #16
0
def multinomial_m_svm_train(save_folder_name, datagen_dir, train_dir):

    data_folders = []
    for i in [0, 1]:
        icpt = str(i)
        reg = '0.01'
        tol = '0.0001'
        maxiter = '20'
        X = join(datagen_dir, 'X.data')
        Y = join(datagen_dir, 'Y.data')

        full_path_train = join(train_dir, save_folder_name + '.' + str(i))
        model = join(full_path_train, 'model.data')
        Log = join(full_path_train, 'Log.data')

        config = dict(X=X, Y=Y, icpt=icpt, classes=150, reg=reg, tol=tol, maxiter=maxiter, model=model,
                      Log=Log, fmt=DATA_FORMAT)
        config_writer(full_path_train + '.json', config)
        data_folders.append(full_path_train)

    return data_folders
Пример #17
0
def stats2_datagen(matrix_dim, matrix_type, datagen_dir):

    row, col = split_rowcol(matrix_dim)
    path_name = '.'.join(['stats2', matrix_type, str(matrix_dim)])
    full_path = join(datagen_dir, path_name)

    D = join(full_path, 'X.data')
    Xcid = join(full_path, 'Xcid.data')
    Ycid = join(full_path, 'Ycid.data')
    A = join(full_path, 'A.data')

    config = dict(nr=row,
                  nf=col,
                  D=D,
                  Xcid=Xcid,
                  Ycid=Ycid,
                  A=A,
                  fmt=DATA_FORMAT)

    config_writer(full_path + '.json', config)
    return full_path
Пример #18
0
def stats1_datagen(matrix_dim, matrix_type, datagen_dir):

    row, col = split_rowcol(matrix_dim)
    path_name = '.'.join(['stats1', matrix_type, str(matrix_dim)])
    full_path = join(datagen_dir, path_name)

    DATA = join(full_path, 'X.data')
    TYPES = join(full_path, 'types')
    TYPES1 = join(full_path, 'set1.types')
    TYPES2 = join(full_path, 'set2.types')
    INDEX1 = join(full_path, 'set1.indices')
    INDEX2 = join(full_path, 'set2.indices')
    MAXDOMAIN = '1100'
    SETSIZE = '20'
    LABELSETSIZE = '10'

    # NC should be less than C and more than num0
    # NC = 10 (old value)
    # num0 = NC/2
    # num0 < NC < C
    # NC = C/2
    NC = int(int(col) / 2)

    config = dict(R=row,
                  C=col,
                  NC=NC,
                  MAXDOMAIN=MAXDOMAIN,
                  DATA=DATA,
                  TYPES=TYPES,
                  SETSIZE=SETSIZE,
                  LABELSETSIZE=LABELSETSIZE,
                  TYPES1=TYPES1,
                  TYPES2=TYPES2,
                  INDEX1=INDEX1,
                  INDEX2=INDEX2,
                  fmt=DATA_FORMAT)

    config_writer(full_path + '.json', config)

    return full_path
Пример #19
0
def multinomial_datagen(matrix_dim, matrix_type, datagen_dir):

    row, col = split_rowcol(matrix_dim)
    path_name = '.'.join(['multinomial', matrix_type, str(matrix_dim)])
    full_path = join(datagen_dir, path_name)

    numSamples = row
    numFeatures = col
    sparsity = MATRIX_TYPE_DICT[matrix_type]
    num_categories = '150'
    intercept = '0'
    X = join(full_path, 'X.data')
    Y = join(full_path, 'Y.data')
    fmt = DATA_FORMAT

    config = [
        numSamples, numFeatures, sparsity, num_categories, intercept, X, Y,
        fmt, '1'
    ]

    config_writer(full_path + '.json', config)

    return full_path