Python arff_file示例，el_nino_io.arff_file Python示例

示例#1

0

显示文件

文件： el_nino_manip.py 项目： pdnooteboom/hybrid_model_ENSO

def training_test_sets(dic,
                       p_total=100,
                       p_train=70,
                       p_test=30,
                       name_train='train_set',
                       name_test='test_set',
                       dir='',
                       pop=[],
                       typ='arff'):
    """
    From a dictionary of prepared and cleaned instances, in prepares training and test set for weka or in csv form
    :param dic: The dictionary of dataset
    :param p_total: the total percentage of data in the discitonary to use
    :param p_train: the percentage going in training set (temporally ordered)
    :param p_test: the percentage of the test set
    :param name_train: name of produced the training file
    :param name_test: name of produced the test file
    :param dir: the directory where the files are saved
    :param pop: a list containing keys which we want to exclude in classification or regression
    :param typ: the type of file. Arff for weka, csv for other methods
    :return: returns a value associated to the ouptut variable (useful for weka but not needed)
    """
    assert p_train + p_test <= 100

    # dividing the domain into train, void and test parts
    length = len(dic[dic.keys()[0]])
    init_train = 0
    fin_train = int(length * float(p_total) / 100.0 * float(p_train) / 100.0)
    init_void = fin_train + 1
    fin_void = int(length * float(p_total) / 100.0 * float(100.0 - p_test) /
                   100.0)
    init_test = fin_void + 1
    fin_test = int(length * float(p_total) / 100.0) - 1

    # eliminating some of the features
    new_dic = dic.copy()
    for k in pop:
        new_dic.pop(k, None)
    #print new_dic.keys()
    # Brings event as the last key element (both for regression and classification)
    if is_in_list('Event', new_dic.keys()):
        keys = new_dic.keys()
        keys.remove('Event')
        keys.append('Event')
    if is_in_list('ElNino_tau', new_dic.keys()):
        keys = new_dic.keys()
        keys.remove('ElNino_tau')
        keys.append('ElNino_tau')

    p = 0
    # writing the attributes
    attr = []
    for k in keys:
        attr.append([k])
    for i in range(0, len(attr)):
        if attr[i][0] != 'Event':
            attr[i].append('REAL')
        else:
            attr[i].append(['yes', 'no'])
        if (attr[i][0] == 't0'):
            p = i + 1
    dic_train = {}
    dic_test = {}

    for k in new_dic.keys():
        dic_train[k] = np.array([])
        dic_test[k] = np.array([])
    for i in range(init_train, fin_train + 1):
        for k in new_dic.keys():
            dic_train[k] = np.append(dic_train[k], new_dic[k][i])
    for i in range(init_test, fin_test + 1):
        for k in new_dic.keys():
            dic_test[k] = np.append(dic_test[k], new_dic[k][i])

    if typ == 'csv':
        io.csv_file(dic_train, dir, name_train, order=keys)
        io.csv_file(dic_test, dir, name_test, order=keys)
    elif typ == 'arff':
        io.arff_file(dic_train, attr, 'ElNino_training', u'', dir, name_train)
        io.arff_file(dic_test, attr, 'ElNino_test', u'', dir, name_test)
    elif typ == 'all':
        io.csv_file(dic_train, dir, name_train, order=keys)
        io.csv_file(dic_test, dir, name_test, order=keys)
        io.arff_file(dic_train, attr, 'ElNino_training', u'', dir, name_train)
        io.arff_file(dic_test, attr, 'ElNino_test', u'', dir, name_test)
    else:
        print 'Not allowed file format. Exiting!'
        exit(1)
    return p

示例#2

0

显示文件

文件： el_nino_manip.py 项目： pdnooteboom/hybrid_model_ENSO

def random_training_test_sets(dic,
                              p_train=70,
                              p_test=30,
                              name_train='train_set',
                              name_test='test_set',
                              dir='',
                              pop=[],
                              typ='arff',
                              seed=0):
    import random
    length = len(dic[dic.keys()[0]])
    seq = range(0, length)
    random.seed(seed)
    random.shuffle(seq)

    init_train = 0
    fin_train = int(length * float(p_train) / 100.0)
    init_test = fin_train + 1
    fin_test = int(length) - 1

    new_dic = dic.copy()
    for k in pop:
        new_dic.pop(k, None)
    if is_in_list('Event', new_dic.keys()):
        keys = new_dic.keys()
        keys.remove('Event')
        keys.append('Event')
    if is_in_list('ElNino_tau', new_dic.keys()):
        keys = new_dic.keys()
        keys.remove('ElNino_tau')
        keys.append('ElNino_tau')

    p = 0
    # writing the attributes
    attr = []
    for k in keys:
        attr.append([k])
    for i in range(0, len(attr)):
        if attr[i][0] != 'Event':
            attr[i].append('REAL')
        else:
            attr[i].append(['yes', 'no'])
        if (attr[i][0] == 't0'):
            p = i + 1

    dic_train = {}
    dic_test = {}

    for k in new_dic.keys():
        dic_train[k] = np.array([])
        dic_test[k] = np.array([])
    for i in range(init_train, fin_train + 1):
        for k in new_dic.keys():
            dic_train[k] = np.append(dic_train[k], new_dic[k][seq[i]])
    for i in range(init_test, fin_test + 1):
        for k in new_dic.keys():
            dic_test[k] = np.append(dic_test[k], new_dic[k][seq[i]])

    if typ == 'csv':
        io.csv_file(dic_train, dir, name_train, order=keys)
        io.csv_file(dic_test, dir, name_test, order=keys)
    elif typ == 'arff':
        io.arff_file(dic_train, attr, 'ElNino_training', u'', dir, name_train)
        io.arff_file(dic_test, attr, 'ElNino_test', u'', dir, name_test)
    elif typ == 'all':
        io.csv_file(dic_train, dir, name_train, order=keys)
        io.csv_file(dic_test, dir, name_test, order=keys)
        io.arff_file(dic_train, attr, 'ElNino_training', u'', dir, name_train)
        io.arff_file(dic_test, attr, 'ElNino_test', u'', dir, name_test)
    else:
        print 'Not allowed file format. Exiting!'
        exit(1)
    return p

示例#3

0

显示文件

文件： el_nino_manip.py 项目： ruleva1983/climatelearn

def training_test_sets(dic, p_total = 100, p_train = 70 ,p_test = 30 , name_train = 'train_set' , name_test = 'test_set', dir = '', pop = [] , typ = 'arff'):
    assert p_train + p_test <= 100
    
    # dividing the domain into train, void and test parts
    length = len(dic[dic.keys()[0]])
    init_train = 0
    fin_train = int(length*float(p_total)/100.0*float(p_train)/100.0)
    init_void = fin_train + 1
    fin_void = int(length*float(p_total)/100.0*float(100.0-p_test)/100.0)
    init_test = fin_void + 1  
    fin_test = int(length*float(p_total)/100.0) - 1  
    
    # eliminating some of the features
    new_dic = dic.copy()
    for k in pop:
        new_dic.pop(k, None)

    # Brings event as the last key element (both for regression and classification)
    if is_in_list('Event',new_dic.keys()):
        keys = new_dic.keys()
        keys.remove('Event')
        keys.append('Event')
    if is_in_list('ElNino_tau',new_dic.keys()):
        keys = new_dic.keys()
        keys.remove('ElNino_tau')
        keys.append('ElNino_tau')


    p = 0
    # writing the attributes
    attr = []
    for k in keys:
        attr.append([k])
    for i in range(0,len(attr)):
        if attr[i][0] != 'Event':
            attr[i].append('REAL')
        else:
            attr[i].append(['yes','no'])
        if(attr[i][0] == 't0'):
            p = i+1
    dic_train = {}
    dic_test = {}

    for k in new_dic.keys():
        dic_train[k] = np.array([])
        dic_test[k] = np.array([])
    for i in range(init_train,fin_train+1):
        for k in new_dic.keys():
            dic_train[k] = np.append(dic_train[k],new_dic[k][i])
    for i in range(init_test,fin_test+1):
        for k in new_dic.keys():
            dic_test[k] = np.append(dic_test[k],new_dic[k][i])

    if typ == 'csv':
        io.csv_file(dic_train,dir,name_train,order=keys)
        io.csv_file(dic_test,dir,name_test,order=keys)
    elif typ == 'arff':
        io.arff_file(dic_train,attr,'ElNino_training',u'',dir,name_train)
        io.arff_file(dic_test,attr,'ElNino_test',u'',dir,name_test)
    elif typ == 'all':
        io.csv_file(dic_train,dir,name_train,order=keys)
        io.csv_file(dic_test,dir,name_test,order=keys)
        io.arff_file(dic_train,attr,'ElNino_training',u'',dir,name_train)
        io.arff_file(dic_test,attr,'ElNino_test',u'',dir,name_test)
    else:
        print 'Not allowed file format. Exiting!'
        exit(1)
    return p