Exemplo n.º 1
0
def GP(dic, p_total = 100, p_train = 70 ,p_test = 30 , pop = [], working_dir = ""):
    assert p_train + p_test <= 100

    ecj_home = os.path.join(os.getcwd(),'../dist/')
    gp_executable = os.path.join(ecj_home,'gp.jar')
    gp_params     = os.path.join(ecj_home,'gp.params')

    # dividing the domain into train, void and test parts
    length = len(dic[dic.keys()[0]])
    init_train = 0
    fin_train = int(length*float(p_total)/100.0*float(p_train)/100.0)
    init_void = fin_train + 1
    fin_void = int(length*float(p_total)/100.0*float(100.0-p_test)/100.0)
    init_test = fin_void + 1
    fin_test = int(length*float(p_total)/100.0) - 1

    # eliminating some of the features
    new_dic = dic.copy()
    for k in pop:
        new_dic.pop(k, None)

    # Brings event as the last key element (both for regression and classification)
    if manip.is_in_list('ElNino_tau',new_dic.keys()):
        keys = new_dic.keys()
        keys.remove('ElNino_tau')
        keys.append('ElNino_tau')

    dic_train = {}
    dic_test = {}
    keys.remove('t0')

    for k in keys:
        dic_train[k] = np.array([])
        dic_test[k] = np.array([])
    for i in range(init_train,fin_train+1):
        for k in keys:
            dic_train[k] = np.append(dic_train[k],new_dic[k][i])
    for i in range(init_test,fin_test+1):
        for k in keys:
            dic_test[k] = np.append(dic_test[k],new_dic[k][i])

    # Check how to write better the files
    io.gp_file(dic_train,"train_set",order=keys,head=False)
    io.gp_file(dic_test,"test_set",order=keys,head = False)


    command = "java -jar " + gp_executable
    command += " -file " + gp_params
    command += " -Xmx500m -Xmx1024m -p eval.problem.training-file=" + working_dir + "train_set.csv -p"
    command += "eval.problem.testing-file=" + working_dir + "test_set.csv"
    args = shlex.split(command)
    p1 = subprocess.Popen(args, stdout=subprocess.PIPE)


    return None
Exemplo n.º 2
0
def read_Net_partial(inputDir, exten='.dat'):
    """
    Reads files from a directory with a given extension and write them in a dictionary. Used for Armin dataset
    It allows for nan data
    :param inputDir: The input directory
    :param exten: The extension of the file
    :return: returns the dictionary of the data set
    """
    if os.path.isdir(inputDir) == True:
        dic = {}
        dic['date_time'] = np.array([])
        file_num = 0
        for f in os.listdir(inputDir):
            extension = os.path.splitext(f)[1]
            inFile = inputDir + f
            if extension == exten:
                file_num += 1
                feat_name = f[0:len(f) - len(extension)]
                dic[feat_name] = np.array([])
                with open(inFile, 'r') as csvfile:
                    reader = csv.reader(csvfile, delimiter=' ', quotechar='|')
                    ele = -1
                    for row in reader:
                        if len(row) != 0:
                            dt = (float)(row[0].split("\t")[0])
                            try:
                                value = (float)(row[0].split("\t")[1])
                            except:  # should no happen
                                v = float('nan')
                            if file_num == 1:
                                dic['date_time'] = np.append(
                                    dic['date_time'], dt)
                                dic[feat_name] = np.append(
                                    dic[feat_name], value)
                            else:
                                if manip.is_in_list(dt, dic['date_time']):
                                    dic[feat_name] = np.append(
                                        dic[feat_name], value)
                                else:
                                    dic['date_time'] = np.append(
                                        dic['date_time'], dt)
                                    dic[feat_name] = np.append(
                                        dic[feat_name], value)

        return dic
    else:
        print "Wrong input directory provided. Exiting!"
        exit(1)
        return 0
Exemplo n.º 3
0
def read_Net_partial(inputDir,exten = '.dat'):
    if os.path.isdir(inputDir) == True:
        dic = {}
        dic['date_time'] = np.array([])
        file_num = 0
        for f in os.listdir(inputDir) :			
            extension = os.path.splitext(f)[1]
            inFile = inputDir+f
            if extension == exten:
                file_num += 1
                feat_name = f[0:len(f)-len(extension)]
                dic[feat_name] = np.array([])
                with open(inFile, 'r') as csvfile:
                    reader = csv.reader(csvfile, delimiter=' ', quotechar='|')
                    ele = -1
                    for row in reader:
                        if len(row) != 0:
                            dt =(float)(row[0].split("\t")[0])
                            try:
							    value =(float)(row[0].split("\t")[1])
                            except: # should no happen
                                v = float('nan')
                            if file_num == 1:
                                dic['date_time'] = np.append(dic['date_time'],dt)
                                dic[feat_name] = np.append(dic[feat_name],value)
                            else:
                                if manip.is_in_list(dt,dic['date_time']):
                                    dic[feat_name] = np.append(dic[feat_name],value)
                                else: 
                                    dic['date_time'] = np.append(dic['date_time'],dt)
                                    dic[feat_name] = np.append(dic[feat_name],value)

        return dic
    else:
        print "Wrong input directory provided. Exiting!"
        exit(1)
        return 0
Exemplo n.º 4
0
def FFX(dic, p_total = 100, p_train = 70 ,p_test = 30 , pop = []):
    """

    :param dic:
    :param p_total:
    :param p_train:
    :param p_test:
    :param pop:
    :return:
    """
    assert p_train + p_test <= 100

    # dividing the domain into train, void and test parts
    length = len(dic[dic.keys()[0]])
    init_train = 0
    fin_train = int(length*float(p_total)/100.0*float(p_train)/100.0)
    init_void = fin_train + 1
    fin_void = int(length*float(p_total)/100.0*float(100.0-p_test)/100.0)
    init_test = fin_void + 1
    fin_test = int(length*float(p_total)/100.0) - 1

    # eliminating some of the features
    new_dic = dic.copy()
    for k in pop:
        new_dic.pop(k, None)

    # Brings event as the last key element (both for regression and classification)
    if manip.is_in_list('ElNino_tau',new_dic.keys()):
        keys = new_dic.keys()
        keys.remove('ElNino_tau')
        keys.append('ElNino_tau')

    dic_train = {}
    dic_test = {}

    for k in new_dic.keys():
        dic_train[k] = np.array([])
        dic_test[k] = np.array([])
    for i in range(init_train,fin_train+1):
        for k in new_dic.keys():
            dic_train[k] = np.append(dic_train[k],new_dic[k][i])
    for i in range(init_test,fin_test+1):
        for k in new_dic.keys():
            dic_test[k] = np.append(dic_test[k],new_dic[k][i])

    keys = sorted(new_dic.keys())
    print keys
    keys.remove('ElNino_tau')
    keys.remove('t0')
    keys.append('ElNino_tau')
    keys.append('t0')


    y_train = dic_train['ElNino_tau']
    x_train = np.zeros(shape=(len(y_train),len(keys)-2))
    for i, t in enumerate(dic_train["t0"]):
            for k, key in enumerate(keys[:-2]):
                x_train[i,k] = dic_train[key][i]


    y_test = dic_test['ElNino_tau']
    x_test = np.zeros(shape=(len(y_test),len(keys)-2))
    for i, t in enumerate(dic_test["t0"]):
            for k, key in enumerate(keys[:-2]):
                x_test[i,k] = dic_test[key][i]


    keys.remove('t0')
    ffx.core.CONSIDER_THRESH = True
    models_ffx = ffx.run(x_train, y_train, x_test, y_test, keys)
    base_fxx = [model.numBases() for model in models_ffx]
    error_fxx = [model.test_nmse for model in models_ffx]
    model = models_ffx[-1]

    new_pred_FFX = np.array([])
    for i in model.simulate(x_test):
        if i >= 0:
            new_pred_FFX = np.append(new_pred_FFX,i)
        else:
            new_pred_FFX = np.append(new_pred_FFX,0.0)

    time = np.array([])
    for i in range(0,len(dic_test['t0'])):
        time = np.append(time,dic_test['t0'][i])

    return time,y_test,new_pred_FFX
Exemplo n.º 5
0
def GP(dic,
       p_total=100,
       p_train=70,
       p_test=30,
       pop=[],
       working_dir="",
       n_gen=50,
       n_subpop=10000):
    """
    Method for genetic programming (working but new features in progress..Java Package required)
    :param dic: The dictionary of the data set
    :param p_total: The total amount of data to use for regressions
    :param p_train: percentage of training set
    :param p_test: percentage of test set
    :param pop: keys to exclude from the method
    :param working_dir: directly wher to write the files used by GP
    :param n_gen: number of generations of GP
    :param n_subpop: number of subpopulation individuals
    :return: returns the regression predicted results
    """
    assert p_train + p_test <= 100

    # dividing the domain into train, void and test parts
    length = len(dic[dic.keys()[0]])
    init_train = 0
    fin_train = int(length * float(p_total) / 100.0 * float(p_train) / 100.0)
    init_void = fin_train + 1
    fin_void = int(length * float(p_total) / 100.0 * float(100.0 - p_test) /
                   100.0)
    init_test = fin_void + 1
    fin_test = int(length * float(p_total) / 100.0) - 1

    # eliminating some of the features
    new_dic = dic.copy()
    for k in pop:
        new_dic.pop(k, None)

    # Brings event as the last key element (both for regression and classification)
    if manip.is_in_list('ElNino_tau', new_dic.keys()):
        keys = new_dic.keys()
        keys.remove('ElNino_tau')
        keys.append('ElNino_tau')

    dic_train = {}
    dic_test = {}
    keys.remove('t0')

    for k in keys:
        dic_train[k] = np.array([])
        dic_test[k] = np.array([])
    for i in range(init_train, fin_train + 1):
        for k in keys:
            dic_train[k] = np.append(dic_train[k], new_dic[k][i])
    for i in range(init_test, fin_test + 1):
        for k in keys:
            dic_test[k] = np.append(dic_test[k], new_dic[k][i])

    # Check how to write better the files
    io.gp_file(dic_train, "train_set", order=keys)
    io.gp_file(dic_test, "test_set", order=keys)

    command = "java -jar /home/ruggero/Desktop/projects/solar/GP_code/Solar/dist/Solar.jar"
    command += " -file /home/ruggero/Desktop/projects/solar/GP_code/Solar/ecj/ec/app/solar/solar_train.params"
    command += " -Xmx500m -Xmx1024m -p eval.problem.training-file=" + working_dir + "train_set.csv "
    command += "-p eval.problem.testing-file=" + working_dir + "test_set.csv"
    args = shlex.split(command)
    p1 = subprocess.Popen(args, stdout=subprocess.PIPE)
    output = p1.communicate()[0]
    #results = GP_result(working_dir + "out.stat")
    #return results
    return None
Exemplo n.º 6
0
def FFX(dic, p_total=100, p_train=70, p_test=30, pop=[]):
    """
    Method for FFX (regression similar to GP). As fitness it uses Normalized root mean squared error (better fitness?)
    FFX installed needed
    :param dic: The dictionary of the data set
    :param p_total: The total amount of data to use for regressions
    :param p_train: percentage of training set
    :param p_test: percentage of test set
    :param pop: features to pop out before applying the method
    :return: ...
    """
    assert p_train + p_test <= 100

    # dividing the domain into train, void and test parts
    length = len(dic[dic.keys()[0]])
    init_train = 0
    fin_train = int(length * float(p_total) / 100.0 * float(p_train) / 100.0)
    init_void = fin_train + 1
    fin_void = int(length * float(p_total) / 100.0 * float(100.0 - p_test) /
                   100.0)
    init_test = fin_void + 1
    fin_test = int(length * float(p_total) / 100.0) - 1

    # eliminating some of the features
    new_dic = dic.copy()
    for k in pop:
        new_dic.pop(k, None)

    # Brings event as the last key element (both for regression and classification)
    if manip.is_in_list('ElNino_tau', new_dic.keys()):
        keys = new_dic.keys()
        keys.remove('ElNino_tau')
        keys.append('ElNino_tau')

    dic_train = {}
    dic_test = {}

    for k in new_dic.keys():
        dic_train[k] = np.array([])
        dic_test[k] = np.array([])
    for i in range(init_train, fin_train + 1):
        for k in new_dic.keys():
            dic_train[k] = np.append(dic_train[k], new_dic[k][i])
    for i in range(init_test, fin_test + 1):
        for k in new_dic.keys():
            dic_test[k] = np.append(dic_test[k], new_dic[k][i])

    keys = sorted(new_dic.keys())
    print keys
    keys.remove('ElNino_tau')
    keys.remove('t0')
    keys.append('ElNino_tau')
    keys.append('t0')

    y_train = dic_train['ElNino_tau']
    x_train = np.zeros(shape=(len(y_train), len(keys) - 2))
    for i, t in enumerate(dic_train["t0"]):
        for k, key in enumerate(keys[:-2]):
            x_train[i, k] = dic_train[key][i]

    y_test = dic_test['ElNino_tau']
    x_test = np.zeros(shape=(len(y_test), len(keys) - 2))
    for i, t in enumerate(dic_test["t0"]):
        for k, key in enumerate(keys[:-2]):
            x_test[i, k] = dic_test[key][i]

    keys.remove('t0')
    ffx.core.CONSIDER_THRESH = True
    models_ffx = ffx.run(x_train, y_train, x_test, y_test, keys)
    base_fxx = [model.numBases() for model in models_ffx]
    error_fxx = [model.test_nmse for model in models_ffx]
    model = models_ffx[-1]

    new_pred_FFX = np.array([])
    for i in model.simulate(x_test):
        if i >= 0:
            new_pred_FFX = np.append(new_pred_FFX, i)
        else:
            new_pred_FFX = np.append(new_pred_FFX, 0.0)

    time = np.array([])
    for i in range(0, len(dic_test['t0'])):
        time = np.append(time, dic_test['t0'][i])

    return time, y_test, new_pred_FFX