Exemplo n.º 1
0
def clustering_datagen(matrix_dim, matrix_type, datagen_dir, config_dir):

    path_name = '.'.join(['clustering', matrix_type, str(matrix_dim)])
    datagen_write = join(datagen_dir, path_name)
    save_path = join(config_dir, path_name)
    row, col = split_rowcol(matrix_dim)

    X = join(datagen_write, 'X.data')
    Y = join(datagen_write, 'Y.data')
    YbyC = join(datagen_write, 'YbyC.data')
    C = join(datagen_write, 'C.data')
    nc = '50'
    dc = '10.0'
    dr = '1.0'
    fbf = '100.0'
    cbf = '100.0'

    config = dict(nr=row,
                  nf=col,
                  nc=nc,
                  dc=dc,
                  dr=dr,
                  fbf=fbf,
                  cbf=cbf,
                  X=X,
                  C=C,
                  Y=Y,
                  YbyC=YbyC,
                  fmt=DATA_FORMAT)

    config_writer(save_path + '.json', config)
    return save_path
Exemplo n.º 2
0
def regression1_datagen(matrix_dim, matrix_type, datagen_dir, config_dir):
    path_name = '.'.join(['regression1', matrix_type, str(matrix_dim)])
    datagen_write = join(datagen_dir, path_name)
    save_path = join(config_dir, path_name)

    row, col = split_rowcol(matrix_dim)

    numSamples = row
    numFeatures = col
    maxFeatureValue = '5'
    maxWeight = '5'
    loc_weights = join(datagen_write, 'weight.data')
    loc_data = join(datagen_write, 'X.data')
    loc_labels = join(datagen_write, 'Y.data')
    noise = '1'
    intercept = '0'
    sparsity = MATRIX_TYPE_DICT[matrix_type]
    tranform_labels = '1'
    fmt = DATA_FORMAT

    config = [
        numSamples, numFeatures, maxFeatureValue, maxWeight, loc_weights,
        loc_data, loc_labels, noise, intercept, sparsity, fmt, tranform_labels
    ]
    config_writer(save_path + '.json', config)

    return save_path
Exemplo n.º 3
0
def stats1_datagen(matrix_dim, matrix_type, datagen_dir, config_dir):

    path_name = '.'.join(['stats1', matrix_type, str(matrix_dim)])
    datagen_write = join(datagen_dir, path_name)
    save_path = join(config_dir, path_name)

    row, col = split_rowcol(matrix_dim)

    DATA = join(datagen_write, 'X.data')
    TYPES = join(datagen_write, 'types')
    TYPES1 = join(datagen_write, 'set1.types')
    TYPES2 = join(datagen_write, 'set2.types')
    INDEX1 = join(datagen_write, 'set1.indices')
    INDEX2 = join(datagen_write, 'set2.indices')
    MAXDOMAIN = '1100'
    SETSIZE = '20'
    LABELSETSIZE = '10'

    # NC should be less than C and more than num0
    # NC = 10 (old value)
    # num0 = NC/2
    # num0 < NC < C
    # NC = C/2
    NC = int(int(col)/2)

    config = dict(R=row, C=col, NC=NC, MAXDOMAIN=MAXDOMAIN, DATA=DATA, TYPES=TYPES, SETSIZE=SETSIZE,
                  LABELSETSIZE=LABELSETSIZE, TYPES1=TYPES1, TYPES2=TYPES2, INDEX1=INDEX1,
                  INDEX2=INDEX2, fmt=DATA_FORMAT)

    config_writer(save_path + '.json', config)

    return save_path
Exemplo n.º 4
0
def regression1_datagen(matrix_dim, matrix_type, datagen_dir, config_dir):
    path_name = '.'.join(['regression1', matrix_type, str(matrix_dim)])
    datagen_write = join(datagen_dir, path_name)
    save_path = join(config_dir, path_name)

    row, col = split_rowcol(matrix_dim)

    numSamples = row
    numFeatures = col
    maxFeatureValue = '5'
    maxWeight = '5'
    loc_weights = join(datagen_write, 'weight.data')
    loc_data = join(datagen_write, 'X.data')
    loc_labels = join(datagen_write, 'Y.data')
    noise = '1'
    intercept = '0'
    sparsity = MATRIX_TYPE_DICT[matrix_type]
    tranform_labels = '1'
    fmt = DATA_FORMAT

    config = [numSamples, numFeatures, maxFeatureValue, maxWeight, loc_weights, loc_data,
              loc_labels, noise, intercept, sparsity, fmt, tranform_labels]
    config_writer(save_path + '.json', config)

    return save_path
Exemplo n.º 5
0
def stats1_datagen(matrix_dim, matrix_type, datagen_dir, config_dir):

    path_name = '.'.join(['stats1', matrix_type, str(matrix_dim)])
    datagen_write = join(datagen_dir, path_name)
    save_path = join(config_dir, path_name)

    row, col = split_rowcol(matrix_dim)

    DATA = join(datagen_write, 'X.data')
    TYPES = join(datagen_write, 'types')
    TYPES1 = join(datagen_write, 'set1.types')
    TYPES2 = join(datagen_write, 'set2.types')
    INDEX1 = join(datagen_write, 'set1.indices')
    INDEX2 = join(datagen_write, 'set2.indices')
    MAXDOMAIN = '1100'
    SETSIZE = '20'
    LABELSETSIZE = '10'

    # NC should be less than C and more than num0
    # NC = 10 (old value)
    # num0 = NC/2
    # num0 < NC < C
    # NC = C/2
    NC = int(int(col)/2)

    config = dict(R=row, C=col, NC=NC, MAXDOMAIN=MAXDOMAIN, DATA=DATA, TYPES=TYPES, SETSIZE=SETSIZE,
                  LABELSETSIZE=LABELSETSIZE, TYPES1=TYPES1, TYPES2=TYPES2, INDEX1=INDEX1,
                  INDEX2=INDEX2, fmt=DATA_FORMAT)

    config_writer(save_path + '.json', config)

    return save_path
Exemplo n.º 6
0
def dimreduction_datagen(matrix_dim, matrix_type, datagen_dir, config_dir):

    path_name = '.'.join(['dimreduction', matrix_type, str(matrix_dim)])
    datagen_write = join(datagen_dir, path_name)
    save_path = join(config_dir, path_name)
    row, col = split_rowcol(matrix_dim)

    R = row
    C = col
    OUT = join(datagen_write, 'X.data')

    config = dict(R=R, C=C, OUT=OUT, FMT=DATA_FORMAT)

    config_writer(save_path + '.json', config)
    return save_path
Exemplo n.º 7
0
def stats2_datagen(matrix_dim, matrix_type, datagen_dir, config_dir):

    path_name = '.'.join(['stats2', matrix_type, str(matrix_dim)])
    datagen_write = join(datagen_dir, path_name)
    save_path = join(config_dir, path_name)
    row, col = split_rowcol(matrix_dim)

    D = join(datagen_write, 'X.data')
    Xcid = join(datagen_write, 'Xcid.data')
    Ycid = join(datagen_write, 'Ycid.data')
    A = join(datagen_write, 'A.data')

    config = dict(nr=row, nf=col, D=D, Xcid=Xcid, Ycid=Ycid,
                  A=A, fmt=DATA_FORMAT)

    config_writer(save_path + '.json', config)
    return save_path
Exemplo n.º 8
0
def stats2_datagen(matrix_dim, matrix_type, datagen_dir, config_dir):

    path_name = '.'.join(['stats2', matrix_type, str(matrix_dim)])
    datagen_write = join(datagen_dir, path_name)
    save_path = join(config_dir, path_name)
    row, col = split_rowcol(matrix_dim)

    D = join(datagen_write, 'X.data')
    Xcid = join(datagen_write, 'Xcid.data')
    Ycid = join(datagen_write, 'Ycid.data')
    A = join(datagen_write, 'A.data')

    config = dict(nr=row, nf=col, D=D, Xcid=Xcid, Ycid=Ycid,
                  A=A, fmt=DATA_FORMAT)

    config_writer(save_path + '.json', config)
    return save_path
Exemplo n.º 9
0
def multinomial_datagen(matrix_dim, matrix_type, datagen_dir, config_dir):
    path_name = '.'.join(['multinomial', matrix_type, str(matrix_dim)])
    datagen_write = join(datagen_dir, path_name)
    save_path = join(config_dir, path_name)

    row, col = split_rowcol(matrix_dim)

    numSamples = row
    numFeatures = col
    sparsity = MATRIX_TYPE_DICT[matrix_type]
    num_categories = '150'
    intercept = '0'
    X = join(datagen_write, 'X.data')
    Y = join(datagen_write, 'Y.data')
    fmt = DATA_FORMAT

    config = [numSamples, numFeatures, sparsity, num_categories, intercept,
              X, Y, fmt, '1']

    config_writer(save_path + '.json', config)

    return save_path
Exemplo n.º 10
0
def clustering_datagen(matrix_dim, matrix_type, datagen_dir, config_dir):

    path_name = '.'.join(['clustering', matrix_type, str(matrix_dim)])
    datagen_write = join(datagen_dir, path_name)
    save_path = join(config_dir, path_name)
    row, col = split_rowcol(matrix_dim)

    X = join(datagen_write, 'X.data')
    Y = join(datagen_write, 'Y.data')
    YbyC = join(datagen_write, 'YbyC.data')
    C = join(datagen_write, 'C.data')
    nc = '50'
    dc = '10.0'
    dr = '1.0'
    fbf = '100.0'
    cbf = '100.0'

    config = dict(nr=row, nf=col, nc=nc, dc=dc, dr=dr, fbf=fbf, cbf=cbf, X=X, C=C, Y=Y,
                  YbyC=YbyC, fmt=DATA_FORMAT)

    config_writer(save_path + '.json', config)
    return save_path
Exemplo n.º 11
0
def multinomial_datagen(matrix_dim, matrix_type, datagen_dir, config_dir):
    path_name = '.'.join(['multinomial', matrix_type, str(matrix_dim)])
    datagen_write = join(datagen_dir, path_name)
    save_path = join(config_dir, path_name)

    row, col = split_rowcol(matrix_dim)

    numSamples = row
    numFeatures = col
    sparsity = MATRIX_TYPE_DICT[matrix_type]
    num_categories = '150'
    intercept = '0'
    X = join(datagen_write, 'X.data')
    Y = join(datagen_write, 'Y.data')
    fmt = DATA_FORMAT

    config = [numSamples, numFeatures, sparsity, num_categories, intercept,
              X, Y, fmt, '1']

    config_writer(save_path + '.json', config)

    return save_path