class MemberTraining: train_data: object = None validation_data: object = None test_data: object = None model: object = None def __init__(self): self.reader = FileReader() def hook(self): self.get_data() self.create_model() # self.train_model() self.eval_model() # self.debug_model() @staticmethod def create_train(this): return this.drop('Exited', axis=1) @staticmethod def create_label(this): return this['Exited'] def get_data(self): self.reader.context = os.path.join(baseurl, 'data_preprocessed') self.reader.fname = 'member_preprocessed.csv' data = self.reader.csv_to_dframe() data = data.to_numpy() # print(data[:60]) data_length = len(data) vol_train = round(data_length * 60 / 100) vol_validation = round(data_length * 40 / 100) vol_test = data_length train = data[:vol_train] validation = data[vol_train:data_length] test = data self.train_data = train self.validation_data = validation self.test_data = test # x = self.create_train(data) # y = self.create_label(data) # x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.3) # data = tf.data.Dataset.from_tensor_slices(data) # self.train_data = tf.data.Dataset.from_tensor_slices(train) # self.validation_data = tf.data.Dataset.from_tensor_slices(validation) # self.test_data = tf.data.Dataset.from_tensor_slices(test) # self.train_data, self.validation_data, self.test_data = tf.split( # data, # split=('train[:60%]', 'train[60%:]', 'test') # ) # 결국 train validation test로 나누고 싶은 것 좀 더 생각해보기 # num_validation = 7000 # num_test = 3000 # self.test_data = data[:num_test] # self.validation_data = data[num_test : num_test + num_validation] # self.train_data = data[num_test + num_validation :] # self.train_data, self.validation_data, self.test_data = tfds.load( # name="imdb_reviews", # split=('train[:60%]', 'train[60%:]', 'test'), # as_supervised=True # ) print(training_set) # 모델 생성 (교과서 p.507) # Dense: 완전 연결층 def create_model(self): model = tf.keras.Sequential() model.add(tf.keras.layers.Dense(16, activation='relu')) model.add(tf.keras.layers.Dense(1, activation='sigmoid')) # output model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy']) self.model = model # 모델 훈련 def train_model(self): self.model.fit(self.train_data.shuffle(7000).batch(512), epochs=20, validation_data=self.validation_data.batch(512), verbose=1) # 512 = 2 ^9 # 모델 평가 def eval_model(self): results = self.model.evaluate(self.test_data.batch(512), verbose=2) for name, value in zip(self.model.metrics_names, results): print('%s: %.3f' % (name, value)) # 모델 디버깅 def debug_model(self): print(f'self.train_data: {self.train_data}') print(f'self.validation_data: {self.validation_data}') print(f'self.test_data: {self.test_data}')
class SaveLoad: train_datas: object = None train_labels: object = None test_datas: object = None test_labels: object = None def __init__(self): self.reader = FileReader() def hook(self): self.get_data() self.create_model() self.train_model() # self.save_model() def get_data(self): print(f'baseurl: {baseurl}') self.reader.context = os.path.join( 'C:/Users/saltQ/sbaproject-api/model', 'data_preprocessed') self.reader.fname = 'member_preprocessed.csv' data = self.reader.csv_to_dframe() data = data.to_numpy() print(data[:60]) table_col = data.shape[1] y_col = 1 x_col = table_col - y_col x = data[:, 0:x_col] y = data[:, x_col:] train_datas, test_datas, train_labels, test_labels = train_test_split( x, y, test_size=0.4) self.train_labels = train_labels[:1000] self.test_labels = test_labels[:1000] self.train_datas = train_datas[:1000] self.test_datas = test_datas[:1000] def create_model(self): self.model = tf.keras.models.Sequential([ keras.layers.Dense(512, activation='relu', input_shape=(784, )), keras.layers.Dropout(0.2), keras.layers.Dense(10, activation='sigmoid') ]) self.model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy']) def train_model(self): checkpoint_path = 'training_1/cp.ckpt' cp_callback = tf.keras.callbacks.ModelCheckpoint( checkpoint_path, save_weights_only=True, verbose=1) print('***** fit *****') train_data = tf.data.Dataset.from_tensor_slices( (self.train_datas, self.train_labels)) # ValueError: Input 0 of layer sequential is incompatible with the layer: expected axis -1 of input shape to have value 784 but received input with shape [None, 12] self.model.fit(train_data, epochs=10, validation_data=(self.test_datas, self.test_labels), callbacks=[cp_callback]) # 훈련 단계 콜백 전달 self.model.load_weights(checkpoint_path) # 가중치 추가 loss, acc = self.model.evaluate(self.test_datas, self.test_labels, verbose=2) # verbose는 학습 진행상황을 보여줄지 말지에 대한 옵션 print('복원된 모델의 정확도: {:5.2f}%'.format(100 * acc)) # 파일 이름에 에포크 번호를 포함시킨다. checkpoint_path = os.path.join(baseurl, 'training_2', 'cp-{epoch: 04d}.ckpt') checkpoint_dir = os.path.dirname(checkpoint_path) cp_callback = tf.keras.callbacks.ModelCheckpoint( checkpoint_path, verbose=1, save_weights_only=True, period=5 # 5번째 에포크마다 가중치를 저장한다. ) print(f'checkpoint: {checkpoint_path}') self.model.save_weights(checkpoint_path.format(epoch=0)) self.model.fit(self.train_datas, self.train_labels, epochs=50, callbacks=[cp_callback], validation_data=(self.test_datas, self.test_labels), verbose=0) # 전체 모델을 HDF5 파일로 저장한다. def save_model(self): context = os.path.join(baseurl, 'saved_model') self.model.save(os.path.join(context, 'my_model.h5')) print('=' * 30) def load_model(self): self.new_model = keras.models.load_model('my_model.h5') self.new_model.summary() loss, acc = self.new_model.evaluate(self.test_images, self.test_labels, verbose=2) def debug_model(self): print(f'모델정보: {self.model.summary()}')
class MemberTraining: x_train: object = None y_train: object = None x_validation: object = None y_validation: object = None x_test: object = None y_test: object = None model: object = None def __init__(self): self.reader = FileReader() def hook(self): self.get_data() self.create_model() self.train_model() self.eval_model() self.debug_model() @staticmethod def create_train(this): return this.drop('Exited', axis=1) @staticmethod def create_label(this): return this['Exited'] def get_data(self): self.reader.context = os.path.join(baseurl, 'data_preprocessed') self.reader.fname = 'member_preprocessed.csv' data = self.reader.csv_to_dframe() data = data.to_numpy() # print(data[:60]) table_col = data.shape[1] y_col = 1 x_col = table_col - y_col x = data[:, 0:x_col] y = data[:, x_col:] x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.4) x_test, x_validation, y_test, y_validation = train_test_split( x_test, y_test, test_size=0.4) self.x_train = x_train self.x_validation = x_validation self.x_test = x_test self.y_train = y_train self.y_validation = y_validation self.y_test = y_test # 모델 생성 (교과서 p.507) # Dense: 완전 연결층 def create_model(self): print('********** create model **********') model = tf.keras.Sequential() model.add(tf.keras.layers.Dense(16, activation='relu')) model.add(tf.keras.layers.Dense(1, activation='sigmoid')) # output model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy']) self.model = model # 모델 훈련 def train_model(self): print('********** train model **********') self.model.fit(x=self.x_train, y=self.y_train, validation_data=(self.x_validation, self.y_validation), epochs=20, verbose=1) # 모델 평가 def eval_model(self): print('********** eval model **********') results = self.model.evaluate(x=self.x_test, y=self.y_test, verbose=2) for name, value in zip(self.model.metrics_names, results): print('%s: %.3f' % (name, value)) # 모델 디버깅 def debug_model(self): print(f'self.train_data: {(self.x_train, self.y_train)}') print( f'self.validation_data: {(self.x_validation, self.y_validation)}') print(f'self.test_data: {(self.x_test, self.y_test)}')