예제 #1
0
def glm_gamma_predict(save_folder_name, datagen_dir, train_dir, predict_dir,
                      config_dir):
    save_path = join(config_dir, save_folder_name)
    predict_write = join(predict_dir, save_folder_name)

    dfam = '1'
    link = '1'
    vpow = '2'
    lpow = '0'
    X = join(datagen_dir, 'X_test.data')
    B = join(train_dir, 'B.data')
    Y = join(datagen_dir, 'Y_test.data')
    M = join(predict_write, 'M.data')
    O = join(predict_write, 'O.data')
    config = dict(dfam=dfam,
                  link=link,
                  vpow=vpow,
                  lpow=lpow,
                  fmt=DATA_FORMAT,
                  X=X,
                  B=B,
                  Y=Y,
                  M=M,
                  O=O)
    config_writer(save_path + '.json', config)

    return save_path
예제 #2
0
def stats1_datagen(matrix_dim, matrix_type, datagen_dir, config_dir):

    path_name = '.'.join(['stats1', matrix_type, str(matrix_dim)])
    datagen_write = join(datagen_dir, path_name)
    save_path = join(config_dir, path_name)

    row, col = split_rowcol(matrix_dim)

    DATA = join(datagen_write, 'X.data')
    TYPES = join(datagen_write, 'types')
    TYPES1 = join(datagen_write, 'set1.types')
    TYPES2 = join(datagen_write, 'set2.types')
    INDEX1 = join(datagen_write, 'set1.indices')
    INDEX2 = join(datagen_write, 'set2.indices')
    MAXDOMAIN = '1100'
    SETSIZE = '20'
    LABELSETSIZE = '10'

    # NC should be less than C and more than num0
    # NC = 10 (old value)
    # num0 = NC/2
    # num0 < NC < C
    # NC = C/2
    NC = int(int(col)/2)

    config = dict(R=row, C=col, NC=NC, MAXDOMAIN=MAXDOMAIN, DATA=DATA, TYPES=TYPES, SETSIZE=SETSIZE,
                  LABELSETSIZE=LABELSETSIZE, TYPES1=TYPES1, TYPES2=TYPES2, INDEX1=INDEX1,
                  INDEX2=INDEX2, fmt=DATA_FORMAT)

    config_writer(save_path + '.json', config)

    return save_path
예제 #3
0
def binomial_multilogreg_train(save_folder_name, datagen_dir, train_dir,
                               config_dir):
    save_path = join(config_dir, save_folder_name)
    train_write = join(train_dir, save_folder_name)

    data_folders = []
    for i in [0, 1, 2]:
        icpt = str(i)
        reg = '0.01'
        tol = '0.0001'
        moi = '100'
        mii = '5'
        X = join(datagen_dir, 'X.data')
        Y = join(datagen_dir, 'Y.data')
        B = join(train_write + '.' + str(i), 'B.data')
        config = dict(X=X,
                      Y=Y,
                      icpt=icpt,
                      reg=reg,
                      tol=tol,
                      moi=moi,
                      mii=mii,
                      B=B)
        config_writer(save_path + '.' + str(i) + '.json', config)
        data_folders.append(save_path + '.' + str(i))

    return data_folders
예제 #4
0
def regression1_datagen(matrix_dim, matrix_type, datagen_dir, config_dir):
    path_name = '.'.join(['regression1', matrix_type, str(matrix_dim)])
    datagen_write = join(datagen_dir, path_name)
    save_path = join(config_dir, path_name)

    row, col = split_rowcol(matrix_dim)

    numSamples = row
    numFeatures = col
    maxFeatureValue = '5'
    maxWeight = '5'
    loc_weights = join(datagen_write, 'weight.data')
    loc_data = join(datagen_write, 'X.data')
    loc_labels = join(datagen_write, 'Y.data')
    noise = '1'
    intercept = '0'
    sparsity = MATRIX_TYPE_DICT[matrix_type]
    tranform_labels = '1'
    fmt = DATA_FORMAT

    config = [
        numSamples, numFeatures, maxFeatureValue, maxWeight, loc_weights,
        loc_data, loc_labels, noise, intercept, sparsity, fmt, tranform_labels
    ]
    config_writer(save_path + '.json', config)

    return save_path
예제 #5
0
def regression1_datagen(matrix_dim, matrix_type, datagen_dir, config_dir):
    path_name = '.'.join(['regression1', matrix_type, str(matrix_dim)])
    datagen_write = join(datagen_dir, path_name)
    save_path = join(config_dir, path_name)

    row, col = split_rowcol(matrix_dim)

    numSamples = row
    numFeatures = col
    maxFeatureValue = '5'
    maxWeight = '5'
    loc_weights = join(datagen_write, 'weight.data')
    loc_data = join(datagen_write, 'X.data')
    loc_labels = join(datagen_write, 'Y.data')
    noise = '1'
    intercept = '0'
    sparsity = MATRIX_TYPE_DICT[matrix_type]
    tranform_labels = '1'
    fmt = DATA_FORMAT

    config = [numSamples, numFeatures, maxFeatureValue, maxWeight, loc_weights, loc_data,
              loc_labels, noise, intercept, sparsity, fmt, tranform_labels]
    config_writer(save_path + '.json', config)

    return save_path
예제 #6
0
def clustering_datagen(matrix_dim, matrix_type, datagen_dir, config_dir):

    path_name = '.'.join(['clustering', matrix_type, str(matrix_dim)])
    datagen_write = join(datagen_dir, path_name)
    save_path = join(config_dir, path_name)
    row, col = split_rowcol(matrix_dim)

    X = join(datagen_write, 'X.data')
    Y = join(datagen_write, 'Y.data')
    YbyC = join(datagen_write, 'YbyC.data')
    C = join(datagen_write, 'C.data')
    nc = '50'
    dc = '10.0'
    dr = '1.0'
    fbf = '100.0'
    cbf = '100.0'

    config = dict(nr=row,
                  nf=col,
                  nc=nc,
                  dc=dc,
                  dr=dr,
                  fbf=fbf,
                  cbf=cbf,
                  X=X,
                  C=C,
                  Y=Y,
                  YbyC=YbyC,
                  fmt=DATA_FORMAT)

    config_writer(save_path + '.json', config)
    return save_path
예제 #7
0
def binomial_l2_svm_train(save_folder_name, datagen_dir, train_dir,
                          config_dir):
    save_path = join(config_dir, save_folder_name)
    train_write = join(train_dir, save_folder_name)

    data_folders = []
    for i in [0, 1]:
        icpt = str(i)
        reg = '0.01'
        tol = '0.0001'
        maxiter = '100'
        X = join(datagen_dir, 'X.data')
        Y = join(datagen_dir, 'Y.data')
        model = join(train_write + '.' + str(i), 'model.data')
        Log = join(train_write + '.' + str(i), 'Log.data')
        config = dict(X=X,
                      Y=Y,
                      icpt=icpt,
                      reg=reg,
                      tol=tol,
                      maxiter=maxiter,
                      model=model,
                      Log=Log,
                      fmt=DATA_FORMAT)
        config_writer(save_path + '.' + str(i) + '.json', config)
        data_folders.append(save_path + '.' + str(i))

    return data_folders
예제 #8
0
def stats1_datagen(matrix_dim, matrix_type, datagen_dir, config_dir):

    path_name = '.'.join(['stats1', matrix_type, str(matrix_dim)])
    datagen_write = join(datagen_dir, path_name)
    save_path = join(config_dir, path_name)

    row, col = split_rowcol(matrix_dim)

    DATA = join(datagen_write, 'X.data')
    TYPES = join(datagen_write, 'types')
    TYPES1 = join(datagen_write, 'set1.types')
    TYPES2 = join(datagen_write, 'set2.types')
    INDEX1 = join(datagen_write, 'set1.indices')
    INDEX2 = join(datagen_write, 'set2.indices')
    MAXDOMAIN = '1100'
    SETSIZE = '20'
    LABELSETSIZE = '10'

    # NC should be less than C and more than num0
    # NC = 10 (old value)
    # num0 = NC/2
    # num0 < NC < C
    # NC = C/2
    NC = int(int(col)/2)

    config = dict(R=row, C=col, NC=NC, MAXDOMAIN=MAXDOMAIN, DATA=DATA, TYPES=TYPES, SETSIZE=SETSIZE,
                  LABELSETSIZE=LABELSETSIZE, TYPES1=TYPES1, TYPES2=TYPES2, INDEX1=INDEX1,
                  INDEX2=INDEX2, fmt=DATA_FORMAT)

    config_writer(save_path + '.json', config)

    return save_path
예제 #9
0
def regression1_linearregcg_train(save_folder_name, datagen_dir, train_dir,
                                  config_dir):
    save_path = join(config_dir, save_folder_name)
    train_write = join(train_dir, save_folder_name)

    data_folders = []
    for i in [0, 1, 2]:
        icpt = str(i)
        reg = '0.01'
        tol = '0.0001'
        maxi = '20'
        X = join(datagen_dir, 'X.data')
        Y = join(datagen_dir, 'Y.data')
        B = join(train_write + '.' + str(i), 'B.data')
        config = dict(X=X,
                      Y=Y,
                      B=B,
                      icpt=icpt,
                      fmt=DATA_FORMAT,
                      maxi=maxi,
                      tol=tol,
                      reg=reg)
        config_writer(save_path + '.' + str(i) + '.json', config)
        data_folders.append(save_path + '.' + str(i))

    return data_folders
예제 #10
0
def regression2_glm_poisson_train(save_folder_name, datagen_dir, train_dir, config_dir):
    save_path = join(config_dir, save_folder_name)
    train_write = join(train_dir, save_folder_name)

    data_folders = []

    for i in [0, 1, 2]:
        X = join(datagen_dir, 'X.data')
        Y = join(datagen_dir, 'Y.data')
        B = join(train_write + '.' + str(i), 'B.data')
        icpt = str(i)
        fmt = DATA_FORMAT
        moi = '200'
        mii = '5'
        dfam = '1'
        vpov = '1'
        link = '1'
        lpow = '0'
        tol = '0.0001'
        reg = '0.01'
        config = dict(X=X, Y=Y, B=B, icpt=icpt, fmt=fmt, moi=moi, mii=mii,
                      dfam=dfam, vpov=vpov, link=link, lpow=lpow, tol=tol, reg=reg)
        config_writer(save_path + '.' + str(i) + '.json', config)
        data_folders.append(save_path + '.' + str(i))

    return data_folders
예제 #11
0
def l2_svm_predict(save_folder_name, datagen_dir, train_dir, predict_dir, config_dir):
    save_path = join(config_dir, save_folder_name)

    X = join(datagen_dir, 'X_test.data')
    Y = join(datagen_dir, 'Y_test.data')
    icpt = save_folder_name.split('.')[-1]
    model = join(train_dir, 'model.data')
    config = dict(X=X, Y=Y, icpt=icpt, model=model, fmt=DATA_FORMAT)
    config_writer(save_path + '.json', config)

    return save_path
예제 #12
0
def l2_svm_predict(save_folder_name, datagen_dir, train_dir, predict_dir, config_dir):
    save_path = join(config_dir, save_folder_name)

    X = join(datagen_dir, 'X_test.data')
    Y = join(datagen_dir, 'Y_test.data')
    icpt = save_folder_name.split('.')[-1]
    model = join(train_dir, 'model.data')
    config = dict(X=X, Y=Y, icpt=icpt, model=model, fmt=DATA_FORMAT)
    config_writer(save_path + '.json', config)

    return save_path
예제 #13
0
def stats1_univar_stats_train(save_folder_name, datagen_dir, train_dir, config_dir):
    save_path = join(config_dir, save_folder_name)
    train_write = join(train_dir, save_folder_name)

    X = join(datagen_dir, 'X.data')
    TYPES = join(datagen_dir, 'types')
    STATS = join(train_write, 'STATS.data')

    config = dict(X=X, TYPES=TYPES, STATS=STATS)
    config_writer(save_path + '.json', config)

    return [save_path]
예제 #14
0
def stats2_stratstats_train(save_folder_name, datagen_dir, train_dir, config_dir):
    save_path = join(config_dir, save_folder_name)
    train_write = join(train_dir, save_folder_name)

    X = join(datagen_dir, 'X.data')
    Xcid = join(datagen_dir, 'Xcid.data')
    Ycid = join(datagen_dir, 'Ycid.data')
    O = join(train_write, 'O.data')
    config = dict(X=X, Xcid=Xcid, Ycid=Ycid, O=O, fmt=DATA_FORMAT)
    config_writer(save_path + '.json', config)

    return [save_path]
예제 #15
0
def stats1_univar_stats_train(save_folder_name, datagen_dir, train_dir,
                              config_dir):
    save_path = join(config_dir, save_folder_name)
    train_write = join(train_dir, save_folder_name)

    X = join(datagen_dir, 'X.data')
    TYPES = join(datagen_dir, 'types')
    STATS = join(train_write, 'STATS.data')

    config = dict(X=X, TYPES=TYPES, STATS=STATS)
    config_writer(save_path + '.json', config)

    return [save_path]
예제 #16
0
def naive_bayes_predict(save_folder_name, datagen_dir, train_dir, predict_dir, config_dir):
    save_path = join(config_dir, save_folder_name)

    X = join(datagen_dir, 'X_test.data')
    Y = join(datagen_dir, 'Y_test.data')
    prior = join(train_dir, 'prior')
    conditionals = join(train_dir, 'conditionals')
    probabilities = join(train_dir, 'probabilities')
    config = dict(X=X, Y=Y, prior=prior, conditionals=conditionals, fmt=DATA_FORMAT,
                  probabilities=probabilities)
    config_writer(save_path + '.json', config)

    return save_path
예제 #17
0
def stats1_bivar_stats_train(save_folder_name, datagen_dir, train_dir, config_dir):
    save_path = join(config_dir, save_folder_name)
    train_write = join(train_dir, save_folder_name)

    X = join(datagen_dir, 'X.data')
    index1 = join(datagen_dir, 'set1.indices')
    index2 = join(datagen_dir, 'set2.indices')
    types1 = join(datagen_dir, 'set1.types')
    types2 = join(datagen_dir, 'set2.types')
    config = dict(X=X, index1=index1, index2=index2, types1=types1, types2=types2, OUTDIR=train_write)
    config_writer(save_path + '.json', config)

    return [save_path]
예제 #18
0
def naive_bayes_predict(save_folder_name, datagen_dir, train_dir, predict_dir, config_dir):
    save_path = join(config_dir, save_folder_name)

    X = join(datagen_dir, 'X_test.data')
    Y = join(datagen_dir, 'Y_test.data')
    prior = join(train_dir, 'prior')
    conditionals = join(train_dir, 'conditionals')
    probabilities = join(train_dir, 'probabilities')
    config = dict(X=X, Y=Y, prior=prior, conditionals=conditionals, fmt=DATA_FORMAT,
                  probabilities=probabilities)
    config_writer(save_path + '.json', config)

    return save_path
예제 #19
0
def dimreduction_pca_train(save_folder_name, datagen_dir, train_dir, config_dir):
    save_path = join(config_dir, save_folder_name)
    train_write = join(train_dir, save_folder_name)

    INPUT = join(datagen_dir, 'X.data')
    SCALE = '1'
    PROJDATA = '1'
    OUTPUT = join(train_write, 'Output.data')

    config = dict(INPUT=INPUT, SCALE=SCALE, PROJDATA=PROJDATA, OUTPUT=OUTPUT, OFMT=DATA_FORMAT)
    config_writer(save_path + '.json', config)

    return [save_path]
예제 #20
0
def stats2_stratstats_train(save_folder_name, datagen_dir, train_dir,
                            config_dir):
    save_path = join(config_dir, save_folder_name)
    train_write = join(train_dir, save_folder_name)

    X = join(datagen_dir, 'X.data')
    Xcid = join(datagen_dir, 'Xcid.data')
    Ycid = join(datagen_dir, 'Ycid.data')
    O = join(train_write, 'O.data')
    config = dict(X=X, Xcid=Xcid, Ycid=Ycid, O=O, fmt=DATA_FORMAT)
    config_writer(save_path + '.json', config)

    return [save_path]
예제 #21
0
def clustering_kmeans_train(save_folder_name, datagen_dir, train_dir, config_dir):

    save_path = join(config_dir, save_folder_name)
    train_write = join(train_dir, save_folder_name)

    X = join(datagen_dir, 'X.data')
    C = join(train_write, 'C.data')
    k = '50'
    maxi = '50'
    tol = '0.0001'
    config = dict(X=X, k=k, maxi=maxi, tol=tol, C=C)
    config_writer(save_path + '.json', config)

    return [save_path]
예제 #22
0
def kmeans_predict(save_folder_name, datagen_dir, train_dir, predict_dir, config_dir):

    save_path = join(config_dir, save_folder_name)
    predict_write = join(predict_dir, save_folder_name)

    X = join(datagen_dir, 'X_test.data')
    C = join(datagen_dir, 'C.data')

    prY = join(predict_write, 'prY.data')

    config = dict(X=X, C=C, prY=prY)
    config_writer(save_path + '.json', config)

    return save_path
예제 #23
0
def kmeans_predict(save_folder_name, datagen_dir, train_dir, predict_dir, config_dir):

    save_path = join(config_dir, save_folder_name)
    predict_write = join(predict_dir, save_folder_name)

    X = join(datagen_dir, 'X_test.data')
    C = join(datagen_dir, 'C.data')

    prY = join(predict_write, 'prY.data')

    config = dict(X=X, C=C, prY=prY)
    config_writer(save_path + '.json', config)

    return save_path
예제 #24
0
def clustering_kmeans_train(save_folder_name, datagen_dir, train_dir,
                            config_dir):

    save_path = join(config_dir, save_folder_name)
    train_write = join(train_dir, save_folder_name)

    X = join(datagen_dir, 'X.data')
    C = join(train_write, 'C.data')
    k = '50'
    maxi = '50'
    tol = '0.0001'
    config = dict(X=X, k=k, maxi=maxi, tol=tol, C=C)
    config_writer(save_path + '.json', config)

    return [save_path]
예제 #25
0
def dimreduction_datagen(matrix_dim, matrix_type, datagen_dir, config_dir):

    path_name = '.'.join(['dimreduction', matrix_type, str(matrix_dim)])
    datagen_write = join(datagen_dir, path_name)
    save_path = join(config_dir, path_name)
    row, col = split_rowcol(matrix_dim)

    R = row
    C = col
    OUT = join(datagen_write, 'X.data')

    config = dict(R=R, C=C, OUT=OUT, FMT=DATA_FORMAT)

    config_writer(save_path + '.json', config)
    return save_path
예제 #26
0
def multilogreg_predict(save_folder_name, datagen_dir, train_dir, predict_dir, config_dir):
    save_path = join(config_dir, save_folder_name)

    X = join(datagen_dir, 'X_test.data')
    Y = join(datagen_dir, 'Y_test.data')
    B = join(train_dir, 'B.data')
    M = join(train_dir, 'M.data')
    dfam = '3'
    vpow = '-1'
    link = '2'

    config = dict(dfam=dfam, vpow=vpow, link=link, fmt=DATA_FORMAT, X=X, B=B, Y=Y, M=M)

    config_writer(save_path + '.json', config)

    return save_path
예제 #27
0
def multinomial_naive_bayes_train(save_folder_name, datagen_dir, train_dir, config_dir):
    save_path = join(config_dir, save_folder_name)
    train_write = join(train_dir, save_folder_name)

    X = join(datagen_dir, 'X.data')
    Y = join(datagen_dir, 'Y.data')
    classes = '150'
    prior = join(train_write, 'prior')
    conditionals = join(train_write, 'conditionals')
    accuracy = join(train_write, 'accuracy')
    probabilities = join(train_write, 'probabilities')
    config = dict(X=X, Y=Y, classes=classes, prior=prior, conditionals=conditionals,
                  accuracy=accuracy, fmt=DATA_FORMAT, probabilities=probabilities)
    config_writer(save_path + '.json', config)

    return [save_path]
예제 #28
0
def regression1_linearregds_train(save_folder_name, datagen_dir, train_dir, config_dir):
    save_path = join(config_dir, save_folder_name)
    train_write = join(train_dir, save_folder_name)

    data_folders = []
    for i in [0, 1, 2]:
        icpt = str(i)
        reg = '0.01'
        X = join(datagen_dir, 'X.data')
        Y = join(datagen_dir, 'Y.data')
        B = join(train_write + '.' + str(i), 'B.data')
        config = dict(X=X, Y=Y, B=B, icpt=icpt, fmt=DATA_FORMAT, reg=reg)
        config_writer(save_path + '.' + str(i) + '.json', config)
        data_folders.append(save_path + '.' + str(i))

    return data_folders
예제 #29
0
def multilogreg_predict(save_folder_name, datagen_dir, train_dir, predict_dir, config_dir):
    save_path = join(config_dir, save_folder_name)

    X = join(datagen_dir, 'X_test.data')
    Y = join(datagen_dir, 'Y_test.data')
    B = join(train_dir, 'B.data')
    M = join(train_dir, 'M.data')
    dfam = '3'
    vpow = '-1'
    link = '2'

    config = dict(dfam=dfam, vpow=vpow, link=link, fmt=DATA_FORMAT, X=X, B=B, Y=Y, M=M)

    config_writer(save_path + '.json', config)

    return save_path
예제 #30
0
def stats2_datagen(matrix_dim, matrix_type, datagen_dir, config_dir):

    path_name = '.'.join(['stats2', matrix_type, str(matrix_dim)])
    datagen_write = join(datagen_dir, path_name)
    save_path = join(config_dir, path_name)
    row, col = split_rowcol(matrix_dim)

    D = join(datagen_write, 'X.data')
    Xcid = join(datagen_write, 'Xcid.data')
    Ycid = join(datagen_write, 'Ycid.data')
    A = join(datagen_write, 'A.data')

    config = dict(nr=row, nf=col, D=D, Xcid=Xcid, Ycid=Ycid,
                  A=A, fmt=DATA_FORMAT)

    config_writer(save_path + '.json', config)
    return save_path
예제 #31
0
def stats2_datagen(matrix_dim, matrix_type, datagen_dir, config_dir):

    path_name = '.'.join(['stats2', matrix_type, str(matrix_dim)])
    datagen_write = join(datagen_dir, path_name)
    save_path = join(config_dir, path_name)
    row, col = split_rowcol(matrix_dim)

    D = join(datagen_write, 'X.data')
    Xcid = join(datagen_write, 'Xcid.data')
    Ycid = join(datagen_write, 'Ycid.data')
    A = join(datagen_write, 'A.data')

    config = dict(nr=row, nf=col, D=D, Xcid=Xcid, Ycid=Ycid,
                  A=A, fmt=DATA_FORMAT)

    config_writer(save_path + '.json', config)
    return save_path
예제 #32
0
def glm_binomial_predict(save_folder_name, datagen_dir, train_dir, predict_dir, config_dir):
    save_path = join(config_dir, save_folder_name)
    predict_write = join(predict_dir, save_folder_name)

    dfam = '2'
    link = '3'
    X = join(datagen_dir, 'X_test.data')
    B = join(train_dir, 'B.data')
    Y = join(datagen_dir, 'Y_test.data')
    M = join(predict_write, 'M.data')
    O = join(predict_write, 'O.data')

    config = dict(dfam=dfam, link=link, fmt=DATA_FORMAT, X=X,
                  B=B, Y=Y, M=M, O=O)
    config_writer(save_path + '.json', config)

    return save_path
예제 #33
0
def linearregds_predict(save_folder_name, datagen_dir, train_dir, predict_dir, config_dir):
    save_path = join(config_dir, save_folder_name)
    predict_write = join(predict_dir, save_folder_name)

    dfam = '1'
    link = '1'
    vpow = '0.0'
    lpow = '1.0'
    X = join(datagen_dir, 'X_test.data')
    B = join(train_dir, 'B.data')
    Y = join(datagen_dir, 'Y_test.data')
    M = join(predict_write, 'M.data')
    O = join(predict_write, 'O.data')
    config = dict(dfam=dfam, link=link, vpow=vpow, lpow=lpow, fmt=DATA_FORMAT, X=X,
                  B=B, Y=Y, M=M, O=O)
    config_writer(save_path + '.json', config)

    return save_path
예제 #34
0
def dimreduction_pca_train(save_folder_name, datagen_dir, train_dir,
                           config_dir):
    save_path = join(config_dir, save_folder_name)
    train_write = join(train_dir, save_folder_name)

    INPUT = join(datagen_dir, 'X.data')
    SCALE = '1'
    PROJDATA = '1'
    OUTPUT = join(train_write, 'Output.data')

    config = dict(INPUT=INPUT,
                  SCALE=SCALE,
                  PROJDATA=PROJDATA,
                  OUTPUT=OUTPUT,
                  OFMT=DATA_FORMAT)
    config_writer(save_path + '.json', config)

    return [save_path]
예제 #35
0
def multinomial_multilogreg_train(save_folder_name, datagen_dir, train_dir, config_dir):
    save_path = join(config_dir, save_folder_name)
    train_write = join(train_dir, save_folder_name)

    data_folders = []
    for i in [0, 1, 2]:
        icpt = str(i)
        reg = '0.01'
        tol = '0.0001'
        moi = '100'
        mii = '0'
        X = join(datagen_dir, 'X.data')
        Y = join(datagen_dir, 'Y.data')
        B = join(train_write + '.' + str(i), 'B.data')
        config = dict(X=X, Y=Y, B=B, icpt=icpt, reg=reg, tol=tol, moi=moi, mii=mii, fmt=DATA_FORMAT)
        config_writer(save_path + '.' + str(i) + '.json', config)
        data_folders.append(save_path + '.' + str(i))

    return data_folders
예제 #36
0
def stats1_bivar_stats_train(save_folder_name, datagen_dir, train_dir,
                             config_dir):
    save_path = join(config_dir, save_folder_name)
    train_write = join(train_dir, save_folder_name)

    X = join(datagen_dir, 'X.data')
    index1 = join(datagen_dir, 'set1.indices')
    index2 = join(datagen_dir, 'set2.indices')
    types1 = join(datagen_dir, 'set1.types')
    types2 = join(datagen_dir, 'set2.types')
    config = dict(X=X,
                  index1=index1,
                  index2=index2,
                  types1=types1,
                  types2=types2,
                  OUTDIR=train_write)
    config_writer(save_path + '.json', config)

    return [save_path]
예제 #37
0
def binomial_l2_svm_train(save_folder_name, datagen_dir, train_dir, config_dir):
    save_path = join(config_dir, save_folder_name)
    train_write = join(train_dir, save_folder_name)

    data_folders = []
    for i in [0, 1]:
        icpt = str(i)
        reg = '0.01'
        tol = '0.0001'
        maxiter = '100'
        X = join(datagen_dir, 'X.data')
        Y = join(datagen_dir, 'Y.data')
        model = join(train_write + '.' + str(i), 'model.data')
        Log = join(train_write + '.' + str(i), 'Log.data')
        config = dict(X=X, Y=Y, icpt=icpt, reg=reg, tol=tol, maxiter=maxiter, model=model,
                      Log=Log, fmt=DATA_FORMAT)
        config_writer(save_path + '.' + str(i) + '.json', config)
        data_folders.append(save_path + '.' + str(i))

    return data_folders
예제 #38
0
def multinomial_datagen(matrix_dim, matrix_type, datagen_dir, config_dir):
    path_name = '.'.join(['multinomial', matrix_type, str(matrix_dim)])
    datagen_write = join(datagen_dir, path_name)
    save_path = join(config_dir, path_name)

    row, col = split_rowcol(matrix_dim)

    numSamples = row
    numFeatures = col
    sparsity = MATRIX_TYPE_DICT[matrix_type]
    num_categories = '150'
    intercept = '0'
    X = join(datagen_write, 'X.data')
    Y = join(datagen_write, 'Y.data')
    fmt = DATA_FORMAT

    config = [numSamples, numFeatures, sparsity, num_categories, intercept,
              X, Y, fmt, '1']

    config_writer(save_path + '.json', config)

    return save_path
예제 #39
0
def clustering_datagen(matrix_dim, matrix_type, datagen_dir, config_dir):

    path_name = '.'.join(['clustering', matrix_type, str(matrix_dim)])
    datagen_write = join(datagen_dir, path_name)
    save_path = join(config_dir, path_name)
    row, col = split_rowcol(matrix_dim)

    X = join(datagen_write, 'X.data')
    Y = join(datagen_write, 'Y.data')
    YbyC = join(datagen_write, 'YbyC.data')
    C = join(datagen_write, 'C.data')
    nc = '50'
    dc = '10.0'
    dr = '1.0'
    fbf = '100.0'
    cbf = '100.0'

    config = dict(nr=row, nf=col, nc=nc, dc=dc, dr=dr, fbf=fbf, cbf=cbf, X=X, C=C, Y=Y,
                  YbyC=YbyC, fmt=DATA_FORMAT)

    config_writer(save_path + '.json', config)
    return save_path
예제 #40
0
def multinomial_datagen(matrix_dim, matrix_type, datagen_dir, config_dir):
    path_name = '.'.join(['multinomial', matrix_type, str(matrix_dim)])
    datagen_write = join(datagen_dir, path_name)
    save_path = join(config_dir, path_name)

    row, col = split_rowcol(matrix_dim)

    numSamples = row
    numFeatures = col
    sparsity = MATRIX_TYPE_DICT[matrix_type]
    num_categories = '150'
    intercept = '0'
    X = join(datagen_write, 'X.data')
    Y = join(datagen_write, 'Y.data')
    fmt = DATA_FORMAT

    config = [numSamples, numFeatures, sparsity, num_categories, intercept,
              X, Y, fmt, '1']

    config_writer(save_path + '.json', config)

    return save_path
예제 #41
0
def regression2_glm_poisson_train(save_folder_name, datagen_dir, train_dir,
                                  config_dir):
    save_path = join(config_dir, save_folder_name)
    train_write = join(train_dir, save_folder_name)

    data_folders = []

    for i in [0, 1, 2]:
        X = join(datagen_dir, 'X.data')
        Y = join(datagen_dir, 'Y.data')
        B = join(train_write + '.' + str(i), 'B.data')
        icpt = str(i)
        fmt = DATA_FORMAT
        moi = '200'
        mii = '5'
        dfam = '1'
        vpov = '1'
        link = '1'
        lpow = '0'
        tol = '0.0001'
        reg = '0.01'
        config = dict(X=X,
                      Y=Y,
                      B=B,
                      icpt=icpt,
                      fmt=fmt,
                      moi=moi,
                      mii=mii,
                      dfam=dfam,
                      vpov=vpov,
                      link=link,
                      lpow=lpow,
                      tol=tol,
                      reg=reg)
        config_writer(save_path + '.' + str(i) + '.json', config)
        data_folders.append(save_path + '.' + str(i))

    return data_folders
예제 #42
0
def multinomial_naive_bayes_train(save_folder_name, datagen_dir, train_dir,
                                  config_dir):
    save_path = join(config_dir, save_folder_name)
    train_write = join(train_dir, save_folder_name)

    X = join(datagen_dir, 'X.data')
    Y = join(datagen_dir, 'Y.data')
    classes = '150'
    prior = join(train_write, 'prior')
    conditionals = join(train_write, 'conditionals')
    accuracy = join(train_write, 'accuracy')
    probabilities = join(train_write, 'probabilities')
    config = dict(X=X,
                  Y=Y,
                  classes=classes,
                  prior=prior,
                  conditionals=conditionals,
                  accuracy=accuracy,
                  fmt=DATA_FORMAT,
                  probabilities=probabilities)
    config_writer(save_path + '.json', config)

    return [save_path]