Пример #1
0
def classify(tr_file, test_file):
    # 数据集名称,数据集要放在你的工作目录下

    if not os.path.exists(tr_file):
        print('no such train file')
    if not os.path.exists(test_file):
        print('no such test_file')
    train_data = tr_file
    test_data = test_file
    # 数据集读取,训练集和测试集
    training_set = tf.contrib.learn.datasets.base.load_csv_without_header(
        filename=train_data, target_dtype=np.int, features_dtype=np.float32)
    test_set = tf.contrib.learn.datasets.base.load_csv_without_header(
        filename=test_data, target_dtype=np.int, features_dtype=np.float32)

    # 特征
    feature_columns = [
        tf.contrib.layers.real_valued_column("", dimension=2763)
    ]

    # 构建DNN网络,3层,每层分别为10,20,10个节点
    classifier = SKCompat(
        tf.contrib.learn.DNNClassifier(
            feature_columns=feature_columns,
            hidden_units=[4096, 8192, 4096, 2048, 512, 128, 32],
            n_classes=13,
            model_dir=
            r'C:\Users\XUEJW\Desktop\兴业数据\分类用数据集\数据集\input dataset\model'))

    # 拟合模型,迭代2000步

    classifier.fit(x=training_set.data, y=training_set.target, steps=2000)

    # 计算精度
    accuracy_score = classifier.evaluate(x=test_set.data,
                                         y=test_set.target)["accuracy"]

    print('Accuracy: {0:f}'.format(accuracy_score))
Пример #2
0
def predict_done():
    LOG_DIR = './ops_logs'
    TIMESTEPS = 10
    RNN_LAYERS = [{'num_units': TIMESTEPS}]
    DENSE_LAYERS = [10, 10]
    TRAINING_STEPS = 1000
    BATCH_SIZE = 100
    PRINT_STEPS = TRAINING_STEPS / 100

    dateparse = lambda dates: pd.datetime.strptime(dates, '%d/%m/%Y %H:%M')
    rawdata = pd.read_csv(
        "./model/input/ElectricityPrice/RealMarketPriceDataPT.csv",
        parse_dates={'timeline': ['date', '(UTC)']},
        index_col='timeline',
        date_parser=dateparse)

    X, y = load_csvdata(rawdata, TIMESTEPS, seperate=False)

    regressor = SKCompat(
        learn.Estimator(model_fn=lstm_model(TIMESTEPS, RNN_LAYERS,
                                            DENSE_LAYERS), ))

    validation_monitor = learn.monitors.ValidationMonitor(
        X['val'],
        y['val'],
        every_n_steps=PRINT_STEPS,
        early_stopping_rounds=1000)

    SKCompat(
        regressor.fit(X['train'],
                      y['train'],
                      monitors=[validation_monitor],
                      batch_size=BATCH_SIZE,
                      steps=TRAINING_STEPS))

    predicted = regressor.predict(X['test'])
    mse = mean_absolute_error(y['test'], predicted)

    plot_predicted, = plt.plot(predicted, label='predicted')
    plt.legend(handles=[plot_predicted])

    plt.show()
Пример #3
0
def test():
    if((os.path.isfile("input.csv") == False) and (os.path.isfile("output.csv") == False)):
        gatherData()
   
    regressor = SKCompat(learn.Estimator(model_fn=lstm_model(TIMESTEPS, RNN_LAYERS, DENSE_LAYERS),))

    X, y = load_csvdata(TIMESTEPS,seperate=False)


    print('-----------------------------------------')
    print('train y shape',y['train'].shape)
    print('train y shape_num',y['train'][1:5])
    print(y['val'].shape)
    y['val'] = y['val'].reshape(359,8)
    # create a lstm instance and validation monitor
    validation_monitor = learn.monitors.ValidationMonitor(X['val'], y['val'],)
    y['train'] = y['train'].reshape(3239,8)

    SKCompat(regressor.fit(X['train'], y['train'],
                monitors=[validation_monitor],
                batch_size=BATCH_SIZE,
                steps=TRAINING_STEPS))

    print('X train shape', X['train'].shape)
    print('y train shape', y['train'].shape)
    y['test'] = y['test'].reshape(399,8)
    print('X test shape', X['test'].shape)
    print('y test shape', y['test'].shape)
    lol2 = regressor.predict(X['test'])
    predicted = np.asmatrix(lol2,dtype = np.float32) #,as_iterable=False))

    lol = np.asarray((predicted - y['test'])) ** 2
    rmse = np.sqrt((lol).mean()) 
    print(rmse.shape)
    # this previous code for rmse was incorrect, array and not matricies was needed: rmse = np.sqrt(((predicted - y['test']) ** 2).mean())  
    score = mean_squared_error(predicted, y['test']) #.reshape(397,8))
    nmse = score / np.var(y['test']) # should be variance of original data and not data from fitted model, worth to double check

    print("RSME: %f" % rmse)
    print("NSME: %f" % nmse)
    print("MSE: %f" % score)
Пример #4
0
def main():
    localtime = time.asctime(time.localtime(time.time()))
    print(localtime)
    label_dict = {}
    file_train_data = []
    file_train_label = []
    train_data = []
    train_label = []
    filecount = 0
    labels = {
        'agriculture': 0,
        'artisinal_mine': 1,
        'bare_ground': 2,
        'blooming': 3,
        'blow_down': 4,
        'clear': 5,
        'cloudy': 6,
        'conventional_mine': 7,
        'cultivation': 8,
        'habitation': 9,
        'haze': 10,
        'partly_cloudy': 11,
        'primary': 12,
        'road': 13,
        'selective_logging': 14,
        'slash_burn': 15,
        'water': 16
    }
    df = pd.read_csv('train_v2.csv', header=None, dtype=object)
    for x in df.as_matrix()[1:]:
        label_lst = []
        for y in str(x[1:][0]).split(' '):
            label_lst.append(labels[y])
        label_dict[x[0]] = label_lst
    explore_path = "/Users/praneet/Documents/Kaggle/Amazon/train"
    for root, dirs, files in os.walk(explore_path):
        for file_name in files:
            if file_name.endswith(".jpg"):
                filecount += 1
                print(file_name)
                file_path = os.path.abspath(os.path.join(root, file_name))
                img = cv2.imread(file_path)
                img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
                fixed_size = (56, 56)
                img = cv2.resize(img, dsize=fixed_size)
                file_title = file_name.split('.')[0]
                file_train_data.append(img)
                file_train_label.append(label_dict[file_title])
    print(filecount)
    for x in range(len(file_train_label)):
        for y in range(len(file_train_label[x])):
            train_data.append(file_train_data[x])
            train_label.append(file_train_label[x][y])
    file_train_data = None
    file_train_label = None
    del file_train_data
    del file_train_label
    gc.collect()
    print(len(train_data))
    print(len(train_label))
    train_data = np.array(train_data, dtype=np.float32) / 255.
    train_label = np.array(train_label, dtype=np.int32)
    # Create the Estimator
    classifier = SKCompat(
        learn.Estimator(model_fn=train,
                        model_dir="/Users/praneet/Downloads/model"))
    # Set up logging for predictions
    tensors_to_log = {"probabilities": "softmax_tensor"}
    logging_hook = tf.train.LoggingTensorHook(tensors=tensors_to_log,
                                              every_n_iter=50)
    # Train the model
    classifier.fit(x=train_data,
                   y=train_label,
                   batch_size=100,
                   steps=1750,
                   monitors=[logging_hook])
    localtime = time.asctime(time.localtime(time.time()))
    print(localtime)
val_X = val_X.reshape(val_X.shape[0], 1, val_X.shape[1])
validation_monitor = learn.monitors.ValidationMonitor(
    val_X,
    val_y,
)
# every_n_steps=PRINT_STEPS,)
# early_stopping_rounds=1000)
# print(X['train'])
# print(y['train'])
train_X, train_y = train[:, 0:n_lag], train[:, n_lag:]
train_X = train_X.reshape(train_X.shape[0], 1, train_X.shape[1])

SKCompat(
    regressor.fit(train_X,
                  train_y,
                  monitors=[validation_monitor],
                  batch_size=BATCH_SIZE,
                  steps=TRAINING_STEPS))

print('X train shape', train_X.shape)
print('y train shape', train_y.shape)

print('X test shape', test_X.shape)
print('y test shape', test_y.shape)
predicted = np.asmatrix(regressor.predict(test_X),
                        dtype=np.float32)  # ,as_iterable=False))
predicted = np.transpose(predicted)

rmse = np.sqrt((np.asarray((np.subtract(predicted, test_y)))**2).mean())
# this previous code for rmse was incorrect, array and not matricies was needed: rmse = np.sqrt(((predicted - y['test']) ** 2).mean())
score = mean_squared_error(predicted, test_y)
Пример #6
0
    learn.Estimator(model_fn=train.cnn_model, model_dir="./model"))

epoch = 50

for i in range(epoch):

    # Set up logging for predictions
    # Log the values in the "Softmax" tensor with label "probabilities"
    tensors_to_log = {"probabilities": "softmax_tensor"}
    logging_hook = tf.train.LoggingTensorHook(tensors=tensors_to_log,
                                              every_n_iter=50)

    # Train the model
    mnist_classifier.fit(x=train_data,
                         y=train_label_sub,
                         batch_size=128,
                         steps=400,
                         monitors=[logging_hook])

    # Configure the accuracy metric for evaluation
    metrics = {
        "accuracy":
        learn.MetricSpec(metric_fn=tf.metrics.accuracy,
                         prediction_key="classes"),
    }

    # Evaluate the model and print results
    eval_results = mnist_classifier.score(x=test_data,
                                          y=test_label_sub,
                                          metrics=metrics,
                                          batch_size=128)
Пример #7
0
# test data length
train_len = round(3 * data_len / 4)

x_train = data[0:train_len, :, :]
# start with just predicting the opening value the next day
y_train = data[1:train_len + 1, :, 1]

x_test = data[train_len + 1:-1, :, :]
# start with just predicting the opening value the next day
y_test = data[train_len + 2:, :, 1]

# let's start by just flattening the data
x_train = np.reshape(x_train, (train_len, -1))
x_test = np.reshape(x_test, (len(y_test), -1))

# Specify that all features have real-value data
feature_columns = [
    tf.contrib.layers.real_valued_column("", dimension=x_train.shape[1])
]

classifier = SKCompat(
    learn.DNNRegressor(label_dimension=y_train.shape[1],
                       feature_columns=feature_columns,
                       hidden_units=[100, 50, 20]))
classifier.fit(x_train, y_train, steps=100000, batch_size=100)
score = metrics.r2_score(y_test, classifier.predict(x_test)['scores'])
accuracy = metrics.mean_squared_error(y_test,
                                      classifier.predict(x_test)['scores'])
# score= np.linalg.norm(y_test-classifier.predict(x_test)['scores'])/np.linalg.norm(y_test)
print("Score: %f, Accuracy: %f" % (score, accuracy))
Пример #8
0
print(y['train'])

# create a lstm instance and validation monitor

validation_monitor = learn.monitors.ValidationMonitor(
    X['val'], y['val'], every_n_steps=PRINT_STEPS, early_stopping_rounds=1000)
#validation_monitor = tf.train.SessionRunHook(X['val'], y['val'], every_n_steps=PRINT_STEPS, early_stopping_rounds=1000)

# print(X['train'])
# print(y['train'])

for batch_idx in range(1):
    print(batch_idx)
    regressor.fit(X['train'],
                  y['train'],
                  monitors=[validation_monitor],
                  batch_size=BATCH_SIZE,
                  steps=TRAINING_STEPS)

print(X['test'].shape)
print(y['test'].shape)
predicted = regressor.predict(X['test'])  # ,as_iterable=False)
#rmse = np.sqrt(((predicted - y['test']) ** 2).mean(axis=0))
score = mean_squared_error(predicted, y['test'])
print("MSE: %f" % score)

print(predicted)

plot_predicted, = plt.plot(predicted, label='predicted')
plot_test, = plt.plot(y['test'], label='test')
plt.legend([plot_predicted, plot_test], ['predicted', 'real'])
Пример #9
0
def bag_of_words_model(features, target):
    """先转成词袋模型"""
    target = tf.one_hot(target, 15, 1, 0)
    features = encoders.bow_encoder(features,
                                    vocab_size=n_words,
                                    embed_dim=EMBEDDING_SIZE)
    logits = tf.contrib.layers.fully_connected(features,
                                               15,
                                               activation_fn=None)
    loss = tf.contrib.losses.softmax_cross_entropy(logits, target)
    train_op = tf.contrib.layers.optimize_loss(
        loss,
        tf.contrib.framework.get_global_step(),
        optimizer='Adam',
        learning_rate=0.01)
    return ({
        'class': tf.argmax(logits, 1),
        'prob': tf.nn.softmax(logits)
    }, loss, train_op)


from tensorflow.contrib.learn.python import SKCompat

model_fn = bag_of_words_model
classifier = SKCompat(learn.Estimator(model_fn=model_fn))

# Train and predict
classifier.fit(x_train, y_train, steps=1000)
y_predicted = classifier.predict(x_test)['class']
score = metrics.accuracy_score(y_test, y_predicted)
print('Accuracy: {0:f}'.format(score))
Пример #10
0
               TIMESTEPS)

# create a lstm instance and validation monitor
validation_monitor = learn.monitors.ValidationMonitor(
    x['val'],
    y['val'],
)

# Get the RNN model
model = SKCompat(
    learn.Estimator(model_fn=lstm_model(TIMESTEPS, RNN_LAYERS, fcDim)))

# TRAIN START HERE ===========================================
model.fit(x['train'],
          y['train'],
          monitors=[validation_monitor],
          batch_size=BATCH_SIZE,
          steps=TRAINING_STEPS)

# TEST START HERE ===========================================
predicted = np.asmatrix(model.predict(x['test']), dtype=np.float32)

# Analyse Test Result
rmse = np.sqrt((np.asarray((np.subtract(predicted, y['test'])))**2).mean())
score = mean_squared_error(predicted, y['test'])
nmse = score / np.var(
    y['test']
)  # should be variance of original data and not data from fitted model, worth to double check

print("RSME: %f" % rmse)
print("NSME: %f" % nmse)
X['train'] = arrayX[0:12000]
X['test'] = arrayX[12000:13000]
X['val'] = arrayX[13000:14000]

y['train'] = arrayy[0:12000]
y['test'] = arrayy[12000:13000]
y['val'] = arrayy[13000:14000]

# print y['test'][0]
# print y2['test'][0]

# X1, y2 = generate_data(np.sin, np.linspace(0, 100, 10000), TIMESTEPS, seperate=False)
# create a lstm instance and validation monitor
validation_monitor = learn.monitors.ValidationMonitor(
    X['val'], y['val'], every_n_steps=PRINT_STEPS, early_stopping_rounds=1000)

regressor.fit(X['train'], y['train'], monitors=[validation_monitor])

predicted = regressor.predict(X['test'])
rmse = np.sqrt(((predicted - y['test'])**2).mean(axis=0))
score = mean_squared_error(predicted, y['test'])
print(("MSE: %f" % score))

# plot_predicted, = plt.plot(array[:1000], label='predicted')

plt.subplot()
plot_predicted, = plt.plot(predicted, label='predicted')

plot_test, = plt.plot(y['test'], label='test')
plt.legend(handles=[plot_predicted, plot_test])
Пример #12
0
class Cloud_Forecaster(Base_Forecaster):
    '''
    Description: Cloud cover forecast based on LSTM - RNN
    '''
    def __init__(self, raw_data, configs, mode, scale=1, outpath='../outputs'):
        '''
        scale: relationship bet. time steps and data interval (1/60)
        '''

        logging.info('Initializing Cloud Forecaster ...')

        super(Cloud_Forecaster, self).__init__(raw_data, configs, outpath)

        # load configurations
        self.scale = scale
        self.mode = mode

        logging.info('Cloud Forecaster is initialized.')

    def fit(self, datakey='total_cloud_fraction', ubd_min=8, lbd_max=15):

        logging.info('Start fitting data ...')

        # create tensorflow model
        self._init_model()

        # preprocess data
        ccp = Cloud_Cover_Preprocessor(self.raw_data,
                                       self.configs['time_steps'], datakey,
                                       self.scale, ubd_min, lbd_max, self.mode)
        self.feats, self.labels = ccp.preprocess()
        self._fit()

        logging.info('Fitting data is complete.')

    def _init_model(self):

        logging.info('Initializing LSTM model ...')

        self.regressor = SKCompat(
            learn.Estimator(model_fn=lstm_model(
                self.configs['time_steps'],
                self.configs['rnn_layers'],
                dense_layers=self.configs['dense_layers'],
                learning_rate=self.configs['learning_rate'])))

        logging.info('LSTM model is initialized.')

    def _fit(self):

        logging.info('Fitting training data ...')

        # adjust matrix dimensions
        x_train = np.expand_dims(self.feats['train'], axis=2)
        y_train = np.expand_dims(self.labels['train'], axis=2)

        logging.debug('Shape of x_train is: %s' % str(x_train.shape))
        logging.debug('Shape of y_train is: %s' % str(y_train.shape))

        # start training
        self.regressor.fit(x_train,
                           y_train,
                           batch_size=self.configs['batch_size'],
                           steps=self.configs['training_steps'])

        logging.info('Training data is fitted.')

    def get_test_score(self):

        logging.info('Testing on test data sets ...')

        x_test = np.expand_dims(self.feats['test'], axis=2)

        # start prediction
        preds = self.regressor.predict(x_test)

        # calculate MSE error
        mse = mean_squared_error(self.labels['test'], preds)

        rst_dict = {'preds': preds, 'labels': self.labels['test']}

        sio.savemat('../outputs/data_%s_fmt_%s.mat' %\
                    (self.configs['data_name'], self.mode),
                    rst_dict)

        logging.info('Testing is completed.')

        return np.sqrt(mse)
Пример #13
0
import tensorflow as tf
import numpy as np
from tensorflow.contrib.learn.python import SKCompat

iris = np.loadtxt('Data/Data/iris_softmax.csv',
                  delimiter=',',
                  dtype=np.float32)

x = iris[:, :-3]
y = iris[:, -3:]
y = np.argmax(y, axis=1)

feature_columns = [tf.feature_column.numeric_column('', shape=[5])]

clf = tf.contrib.learn.DNNClassifier(hidden_units=[10, 20, 5],
                                     feature_columns=feature_columns,
                                     n_classes=3)

clf = SKCompat(clf)

clf.fit(x=x, y=y, max_steps=1000)

print(clf.score(x=x, y=y))
def main(unused_arguments):
    localtime = time.asctime(time.localtime(time.time()))
    print(localtime)
    content = np.loadtxt('train_data')
    train_data = []
    temp = 0
    tmp = []
    for x in content:
        if temp == 255:
            tmp.append(x)
            train_data.append(tmp)
            temp = 0
            tmp = []
        else:
            tmp.append(x)
            temp += 1
    with open('train_labels', 'rb') as fp:
        train_labels = pickle.load(fp)
    train_data = np.array(train_data, dtype=np.float32)
    train_labels = np.array(train_labels, dtype=np.int32)
    # Create the Estimator
    mnist_classifier = SKCompat(
        learn.Estimator(model_fn=trainTypes,
                        model_dir="/Users/praneet/Downloads/model"))
    # Set up logging for predictions
    tensors_to_log = {"probabilities": "softmax_tensor"}
    logging_hook = tf.train.LoggingTensorHook(tensors=tensors_to_log,
                                              every_n_iter=50)
    # Train the model
    mnist_classifier.fit(x=train_data,
                         y=train_labels,
                         batch_size=100,
                         steps=2500,
                         monitors=[logging_hook])
    # Accuracy metric for evaluation
    metrics = {
        "accuracy":
        learn.MetricSpec(metric_fn=tf.metrics.accuracy,
                         prediction_key="classes"),
    }
    # Evaluation
    print('Evaluation: ')
    content = np.loadtxt('eval_data')
    eval_data = []
    temp = 0
    tmp = []
    for x in content:
        if temp == 255:
            tmp.append(x)
            eval_data.append(tmp)
            temp = 0
            tmp = []
        else:
            tmp.append(x)
            temp += 1
    with open('eval_labels', 'rb') as fp:
        eval_labels = pickle.load(fp)
    eval_data = np.array(eval_data, dtype=np.float32)
    eval_labels = np.array(eval_labels, dtype=np.int32)
    # Evaluate the model
    eval_results = mnist_classifier.score(x=eval_data,
                                          y=eval_labels,
                                          metrics=metrics)
    print(eval_results)
    # Predictions
    print('Predictions: ')
    evallst = []
    outputlst = []
    fileCount = 1
    evaluatePath = "/Users/praneet/Downloads/test/"
    lst = []
    lst.append("image_name")
    lst.append("Type_1")
    lst.append("Type_2")
    lst.append("Type_3")
    evallst.append(lst)
    outputlst.append(lst)
    for root, dirs, files in os.walk(evaluatePath):
        for fileName in files:
            if fileName.endswith(".jpg"):
                eval_data = []
                filePath = os.path.abspath(os.path.join(root, fileName))
                img = cv2.imread(filePath)
                img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
                img = processImage(img)
                fixedSize = (256, 256)
                img = cv2.resize(img, dsize=fixedSize)
                img = cv2.cvtColor(img, cv2.COLOR_RGB2GRAY)
                tmp_data = np.zeros((256, 256), dtype=np.float32)
                for x in range(img.shape[0]):
                    for y in range(img.shape[1]):
                        tmp_data[x][y] = img[x][y] / 255.0
                eval_data.append(tmp_data)
                eval_data = np.array(eval_data)
                # Predict values for each image
                predictions = mnist_classifier.predict(x=eval_data)
                print(fileName, predictions)
                lst = []
                lst.append(fileName)
                for x in predictions['probabilities']:
                    for y in x:
                        lst.append(y)
                outputlst.append(lst)
                if lst[1] > 0.25:
                    lst = []
                    lst.append(fileName)
                    lst.append(1)
                    lst.append(0)
                    lst.append(0)
                if lst[3] > 0.29:
                    lst = []
                    lst.append(fileName)
                    lst.append(0)
                    lst.append(0)
                    lst.append(1)
                if lst[2] > 0.58:
                    lst = []
                    lst.append(fileName)
                    lst.append(0)
                    lst.append(1)
                    lst.append(0)
                if lst[1] == 0 or lst[1] == 1:
                    print("Non Ambiguous Prediction")
                evallst.append(lst)
                fileCount += 1
    print('Total files: ', fileCount)
    df = pd.DataFrame(evallst)
    df.to_csv('output_normalized.csv', index=False, header=False)
    df = pd.DataFrame(outputlst)
    df.to_csv('output_integers.csv', index=False, header=False)
    localtime = time.asctime(time.localtime(time.time()))
    print(localtime)