예제 #1
0
def fetch_digits(data_target=True):
    file_path = maybe_download('../../ztlearn/datasets/digits/', URL)

    with gzip.open(file_path, 'rb') as digits_path:
        digits_data = np.loadtxt(digits_path, delimiter=',')

    data, target = digits_data[:, :-1], digits_data[:, -1].astype(np.int)

    if data_target:
        return DataSet(data, target)
    else:
        return train_test_split(data, target, test_size=0.33, random_seed=5)
예제 #2
0
def fetch_pima_indians(data_target=True):
    file_path = maybe_download('../../ztlearn/datasets/pima/', URL)
    describe = [
        'Pregnancies', 'Glucose', 'BloodPressure', 'SkinThickness',
        'DiabetesPedigreeFunction', 'Age', 'Insulin', 'BMI', 'Outcome (0 or 1)'
    ]

    dataframe = pd.read_csv(file_path, names=describe)
    data, target = dataframe.values[:, 0:8], dataframe.values[:, 8]

    if data_target:
        return DataSet(data, target, describe)
    else:
        return train_test_split(data, target, test_size=0.2, random_seed=2)
예제 #3
0
def fetch_boston(data_target=True):
    file_path = maybe_download('../../ztlearn/datasets/boston/', URL)
    describe = [
        'CRIM', 'ZN', 'INDUS', 'CHAS', 'NOX', 'RM', 'AGE', 'DIS', 'RAD', 'TAX',
        'PTRATIO', 'B', 'LSTAT', 'MEDV'
    ]

    dataframe = pd.read_csv(file_path, delim_whitespace=True, names=describe)
    data, target = dataframe.values[:, 0:13], dataframe.values[:, 13]

    if data_target:
        return DataSet(data, target, describe)
    else:
        return train_test_split(data, target, test_size=0.2, random_seed=2)
예제 #4
0
def fetch_iris(data_target = True):
    file_path = maybe_download('../../ztlearn/datasets/iris/', URL)
    describe  = [
        'sepal-length (cm)',
        'sepal-width (cm)',
        'petal-length (cm)',
        'petal-width (cm)',
        'petal_type'
    ]

    dataframe = pd.read_csv(file_path, names = describe)

    # convert petal type column to categorical data i.e {0:'Iris-setosa', 1:'Iris-versicolor', 2:'Iris-virginica'}
    dataframe.petal_type    = pd.Categorical(dataframe.petal_type)
    dataframe['petal_type'] = dataframe.petal_type.cat.codes

    data, target = dataframe.values[:,0:4], dataframe.values[:,4].astype('int')

    if data_target:
        return DataSet(data, target, describe)
    else:
        return train_test_split(data, target, test_size = 0.2, random_seed = 2)
예제 #5
0
def fetch_steel_plates_faults(data_target=True, custom_path=os.getcwd()):
    file_path = maybe_download(custom_path + '/../../ztlearn/datasets/steel/',
                               URL)
    file_path_2 = maybe_download(
        custom_path + '/../../ztlearn/datasets/steel/', URL_2)
    describe = [
        'Pastry', 'Z_Scratch', 'K_Scatch', 'Stains', 'Dirtiness', 'Bumps',
        'Other_Faults'
    ]

    InputDataHeader = pd.read_csv(file_path_2, header=None)
    InputData = pd.read_csv(file_path, header=None, sep="\t")
    InputData.set_axis(InputDataHeader.values.flatten(), axis=1, inplace=True)

    dataframe = InputData.copy()
    dataframe.drop(describe, axis=1, inplace=True)
    targetframe = InputData[describe].copy()

    data, target = dataframe.values, targetframe.values

    if data_target:
        return DataSet(data, target, describe)
    else:
        return train_test_split(data, target, test_size=0.2, random_seed=2)
예제 #6
0
from ztlearn.dl.layers import Embedding
from ztlearn.dl.models import Sequential
from ztlearn.utils import train_test_split
from ztlearn.dl.optimizers import register_opt


opt = register_opt(optimizer_name = 'sgd_momentum', momentum = 0.01, learning_rate = 0.001)
model = Sequential(init_method = 'he_normal')
model.add(Embedding(10, 2, activation = 'selu', input_shape = (1, 10)))
model.compile(loss = 'categorical_crossentropy', optimizer = opt)

train_data = np.random.randint(10, size=(5, 1, 10))
train_label = np.random.randint(14, size=(5, 1, 10))

train_data, test_data, train_label, test_label = train_test_split(train_data,
                                                                  train_label,
                                                                  test_size = 0.1)

fit_stats = model.fit(train_data, train_label, batch_size = 4, epochs = 50)


"""
works

data = np.arange(0,100,1).reshape(10,1,10)
labels = np.arange(1,101,1).reshape(10,1,10)

model.add(Embedding(100, 5, activation = 'selu', input_shape = (1, 10)))
model.add(RNN(10, activation="tanh", bptt_truncate = 3, input_shape = (10, 10)))

"""