Exemplo n.º 1
0
import numpy as np
from algorithms.utils import _read_split,_class_split
from algorithms.sdae import sdae
from algorithms.clf_utils import _clf_dtree,_clf_svm,_clf_mlp
from sklearn.preprocessing import MinMaxScaler

trX, teX, trY, teY = _read_split("../datasets/nd-data/boundary.csv",read=1)

#implementation of dual encoded features algorithm
#link to paper: http://www.sciencedirect.com/science/article/pii/S0031320316301303


# from algorithms.utils import _read_dat
# trX, teX, trY, teY = _read_dat(
# 	"dataset/page-blocks0.dat",skip=15,
# 	read=1,oneHot=0)


def _sdae_module(X,Y,layer,batch_range):
	scaler=MinMaxScaler()
	X=scaler.fit_transform(X)
	X1=sdae(X,layer,batch_range,"sigmoid")
	X2=sdae(X,layer,batch_range,"tanh")
	Xy1=np.column_stack((X1,Y))
	Xy2=np.column_stack((X2,Y))
	Xy=np.vstack((Xy1,Xy2))
	np.random.shuffle(Xy)
	y=Xy[:,Xy.shape[1]-1]
	X=np.delete(Xy,Xy.shape[1]-1,axis=1)
	return X,y
Exemplo n.º 2
0
import numpy as np
from algorithms.sdae_s import sdae_syn
from algorithms.utils import _read_split, _class_split
from algorithms.clf_utils import _clf_dtree, _clf_svm, _clf_mlp, _clf_softmax
from sklearn.preprocessing import MinMaxScaler

#generate synthetic samples using deep sdae

# from algorithms.utils import _read_dat
# trX, teX, trY, teY = _read_dat(
# 	"dataset/page-blocks0.dat",skip=15,
# 	read=1,oneHot=0)

trX, teX, trY, teY = _read_split(
    "../datasets/nd-data/kddcup2004-protein-homology-train.csv",
    read=1,
    oneHot=0)

scaler = MinMaxScaler()

trX = scaler.fit_transform(trX)
teX = scaler.fit_transform(teX)

X0, X1 = _class_split(trX, trY)
print "smaller class shape", X1.shape
print "Enter hidden layer for SDAE"
layer_sdae = input()
layer_sdae = layer_sdae + [X1.shape[1]]
print "Enter oversampling percent"
P = int(input())
Exemplo n.º 3
0
import numpy as np
from algorithms.sdae_s import sdae_syn
from algorithms.utils import _read_split,_class_split
from algorithms.clf_utils import _clf_dtree,_clf_svm,_clf_mlp,_clf_softmax
from sklearn.preprocessing import MinMaxScaler


#generate synthetic samples using deep sdae

# from algorithms.utils import _read_dat
# trX, teX, trY, teY = _read_dat(
# 	"dataset/page-blocks0.dat",skip=15,
# 	read=1,oneHot=0)

trX, teX, trY, teY = _read_split(
	"../datasets/nd-data/kddcup2004-protein-homology-train.csv",
	read=1,oneHot=0)

scaler=MinMaxScaler()

trX=scaler.fit_transform(trX)
teX=scaler.fit_transform(teX)

X0,X1=_class_split(trX,trY)
print "smaller class shape",X1.shape
print "Enter hidden layer for SDAE"
layer_sdae=input()
layer_sdae=layer_sdae+[X1.shape[1]]
print "Enter oversampling percent"
P=int(input())
Exemplo n.º 4
0
from algorithms.utils import _read_split,_class_split
from visual.plot import plot_data,plot_syn,plot_X
from algorithms.smote import SMOTE
import warnings

trX, teX, trY, teY = _read_split(
	"../datasets/nd-data/segment.csv",
	read=1,oneHot=0)

C0,C1=_class_split(trX,trY)
warnings.filterwarnings("ignore", category=DeprecationWarning)
syn=SMOTE(C1,100,5)

#to analyze training data
plot_data(trX,trY)

#to compare synthetic samples
plot_syn(C1,syn)

#to analyze the class
plot_X(C1)
Exemplo n.º 5
0
import numpy as np
from sklearn.preprocessing import MinMaxScaler
from algorithms.utils import _read_split, _class_split, _one_hot
from algorithms.daego import DAEGO
from algorithms.daf import DAF
from algorithms.clf_utils import _clf_dtree, _clf_svm, _clf_mlp

#improvement of daego method using stacked encoders

#Flowchart
#1. Transform to higher dimension using DAF
#2. Generate Synthetic Samples using DAEGO
#3. Transform back to original dimension

trX, teX, trY, teY = _read_split("../datasets/nd-data/coil2000.csv",
                                 read=1,
                                 oneHot=0)

scaler = MinMaxScaler()
trX = scaler.fit_transform(trX)
print "Enter oversampling percent"
P = int(input())
X0, X1 = _class_split(trX, trY)

print "X0 shape", X0.shape
print "X1 shape", X1.shape
print "Enter layer for DAF"
layer = input()
inp_shape = [trX.shape[1]]
layer_daf = inp_shape + layer
print "Enter batch Range for X0"
Exemplo n.º 6
0
from sklearn.preprocessing import MinMaxScaler
from algorithms.utils import _read_split,_class_split,_one_hot
from algorithms.daego import DAEGO
from algorithms.daf import DAF
from algorithms.clf_utils import _clf_dtree,_clf_svm,_clf_mlp


#improvement of daego method using stacked encoders

#Flowchart
#1. Transform to higher dimension using DAF
#2. Generate Synthetic Samples using DAEGO
#3. Transform back to original dimension


trX, teX, trY, teY = _read_split("../datasets/nd-data/coil2000.csv",read=1,oneHot=0)

scaler=MinMaxScaler()
trX=scaler.fit_transform(trX)
print "Enter oversampling percent"
P=int(input())
X0,X1=_class_split(trX,trY)

print "X0 shape",X0.shape
print "X1 shape",X1.shape
print "Enter layer for DAF"
layer=input()
inp_shape=[trX.shape[1]]
layer_daf=inp_shape+layer
print "Enter batch Range for X0"
x0_batch=input()
Exemplo n.º 7
0
from algorithms.utils import _read_split, _class_split
from visual.plot import plot_data, plot_syn, plot_X
from algorithms.smote import SMOTE
import warnings

trX, teX, trY, teY = _read_split("../datasets/nd-data/segment.csv",
                                 read=1,
                                 oneHot=0)

C0, C1 = _class_split(trX, trY)
warnings.filterwarnings("ignore", category=DeprecationWarning)
syn = SMOTE(C1, 100, 5)

#to analyze training data
plot_data(trX, trY)

#to compare synthetic samples
plot_syn(C1, syn)

#to analyze the class
plot_X(C1)