def run(): #load data in dataframe data = util.get_dataset() # print(data.head()) # print(data.tail()) weighted_price = data.Weighted_Price.values.astype('float32') # print(weighted_price) weighted_price = weighted_price.reshape(len(weighted_price), 1) # print(weighted_price) #scale data scaler = MinMaxScaler(feature_range=(0, 1)) data_scaled = scaler.fit_transform(weighted_price) # print(data_scaled) look_back = 5 train_set, test_set = util.split_data(data_scaled, train_percentage=0.85) x_train, y_train = util.create_labels(train_set, look_back=5) x_test, y_test = util.create_labels(test_set, look_back=5) model = util.build_model() history = util.train_model(model, x_train, y_train) util.plot_training_history(history) model.load_weights('saved_models/weights.best.lstm.hdf5')
def setup(self, stage=None): self.root_folder = "../../data/flickr8k/images" self.annotation_file = "../../data/flickr8k/training_captions.txt" #transform needed for inputing images into inception self.transform = transforms.Compose([ transforms.Resize((356, 356)), transforms.RandomCrop((299, 299)), transforms.ToTensor(), transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)), ]) self.train, self.pad_idx = get_dataset(self.root_folder, self.annotation_file, self.transform)
def main(hparams, cluster): ''' Once we receive this round's parameters we can load our model and our datamodule. We kept the trainer's parameters fixed for convenience and avoided using functionality that would make it hard to compare between models. ''' dm = FlickrDataModule(batch_size = hparams.batch_size, num_workers = hparams.num_workers) dm.setup() # each trial has a separate version number which can be accessed from the cluster train, pad_idx = get_dataset( "../../data/flickr8k/images", "../../data/flickr8k/training_captions.txt", dm.transform) vocab_size = len(train.vocab) # loading our model with this run's parameters model = CaptionGenerator(embed_size = hparams.embed_size, hidden_size = hparams.hidden_size, vocab_size = vocab_size, num_layers = hparams.num_layers, batch_size = hparams.batch_size, pad_idx = pad_idx) logger = TensorBoardLogger(save_dir = '../../data/caption_generator/', version = cluster.hpc_exp_number, name = 'lightning_logs') trainer = Trainer(logger = logger, gpus = 2, num_nodes = 13, max_epochs = 1000, auto_select_gpus = True, profiler = True, distributed_backend='ddp', early_stop_callback=False) trainer.fit(model, dm)
from keras.layers.core import Dense, Activation, Dropout from keras.layers.recurrent import LSTM from keras.models import Sequential, load_model from sklearn.preprocessing import MinMaxScaler import matplotlib.pyplot as plt import utilities # ### Step 1 Load & Process Data # In[2]: import pudb pu.db currency = "BTC" currency_data = utilities.get_dataset(currency=currency) currency_close_price = currency_data.close.values.astype('float32') currency_close_price = currency_close_price.reshape(len(currency_close_price), 1) # In[3]: def create_datasets(dataset, sequence_length): sequence_length += 1 seq_dataset = [] for i in range(len(dataset) - sequence_length): seq_dataset.append(dataset[i:i + sequence_length]) seq_dataset = np.array(seq_dataset)
import pickle import utilities from sklearn.model_selection import train_test_split, KFold, GridSearchCV from sklearn.metrics import mean_absolute_error from sklearn.ensemble import IsolationForest from matplotlib import pyplot import numpy as np n_per_in = 5 n_per_out = 1 X, y = utilities.get_dataset(n_per_in, n_per_out) x_train, x_test, y_train, y_test = train_test_split(X, y, test_size=0.10, random_state=42) print('Shape before outlier removal') print(x_train.shape, y_train.shape) ri = pickle.load(open('models/no_rfe_model_ri_%d.sav' % n_per_in, 'rb')) dt = pickle.load(open('models/no_rfe_model_dt_%d.sav' % n_per_in, 'rb')) rf = pickle.load(open('models/no_rfe_model_rf_%d.sav' % n_per_in, 'rb')) gbr = pickle.load(open('models/no_rfe_model_gbr_%d.sav' % n_per_in, 'rb')) models_with_outliers = {'ri': ri, 'dt': dt, 'rf': rf, 'gbr': gbr} iso = IsolationForest(contamination="auto", n_estimators=250, bootstrap=True) yhat = iso.fit_predict(x_train) mask = yhat != -1 x_train_iso, y_train_iso = x_train[mask, :], y_train[mask] print('Shape after outlier removal') print(x_train_iso.shape, y_train_iso.shape) outliers = y_train[yhat == -1]
Change the **classifer** or the **dataset** on the left to see how different models perform. """) # Add a select box widget to the side dataset_name = st.sidebar.selectbox("Select Dataset", ("Iris", "Breast Cancer", "Wine")) classifier = st.sidebar.selectbox("Select Classifiers", ("KNN", "SVM", "Random Forest")) scaling = st.sidebar.checkbox("Scaling?") # Get the data X, y = utilities.get_dataset(dataset_name) st.write("Shape of the data:", X.shape) st.write("Number of Classes:", len(np.unique(y))) # Add parameters to the UI based on the classifier params = utilities.add_parameter_ui(classifier) # Get our classifier with the correct classifiers clf = utilities.get_classifier(classifier, params) # Check if scaling is required if scaling: X = utilities.scale_data(X) # Make predictions and get accuray accuracy = utilities.classification(X, y, clf)
help="Number of trees that have to be trained", dest="number_of_trees") parser.add_argument( "-f", "--features", required=False, type=int, default=None, help="Number of features to consider when looking for the best split", dest="max_features") #parser.add_argument("-s", "--seed", required=False, type=int, # help="Number used to initialize the internal state of the random number generator", # dest="seed") args = parser.parse_args() train_dataset, test_dataset = get_dataset(args.dataset, args.label_name, args.training_fraction) forest = random_forest(train_dataset, args.number_of_trees, args.max_features) for index, decision_tree in enumerate(forest): export_graphviz( str(args.output_directory) + "/tree" + str(index) + ".dot", decision_tree) predictions = random_forest_classify(forest, test_dataset) feature_importances = get_feature_importances(forest) accuracy = get_accuracy(test_dataset, predictions) output = open(str(args.output_directory) + "/output", 'w') output.write("DATASET\n") output.write("\t" + str(train_dataset.count_instances()) + " training examples\n") output.write("\t" + str(test_dataset.count_instances()) + " test examples\n") output.write("FEATURE IMPORTANCES\n")
shape=[config['canvas_size'], config['canvas_size']], file_path='./datasets', save_only=False, gtsrb_raw_file_path=GTSRB_DATASET_PATH, gtsrb_classes=config['classes']) testset = get_gtsrb( 'test', shape=[config['canvas_size'], config['canvas_size']], file_path='./datasets', save_only=False, gtsrb_raw_file_path=GTSRB_DATASET_PATH, gtsrb_classes=config['classes']) else: trainset = get_dataset( config['dataset'], 'train', shape=[config['canvas_size'], config['canvas_size']], file_path='./datasets', save_only=False) testset = get_dataset( config['dataset'], 'test', shape=[config['canvas_size'], config['canvas_size']], file_path='./datasets', save_only=False) path = snapshot[:snapshot.rindex('/')] if not os.path.exists(path): raise FileExistsError('Cannot access checkpoint files') len_trainset = len(trainset['image']) len_testset = len(testset['image'])
from pl_model import CaptionGenerator, FlickrDataModule from PIL import Image from bleu import BLEU import torchvision.transforms as transforms #Loading testing dataset transform = transforms.Compose([ transforms.Resize((224, 224)), transforms.ToTensor(), transforms.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)), ]) dataset, pad_idx = get_dataset('../../data/flickr8k/images', '../../data/flickr8k/training_captions.csv', transform) test, pad_idx = get_dataset( "../../data/flickr8k/images", "../../data/caption_generator/testing_captions.csv", transform) file_names = np.unique(np.asarray(test.df['image'])) #Transforming all images to our CNN's input format imgs = [] for name in file_names: path = '../../data/flickr8k/images/' + name imgs.append(transform(Image.open(path).convert('RGB')).unsqueeze(0))
from bleu import BLEU import torchvision.transforms as transforms #setting random seed np.random.seed(163) #loading training dataset transform = transforms.Compose([ transforms.Resize((224, 224)), transforms.ToTensor(), transforms.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)), ]) dataset, pad_idx = get_dataset('../../data/flickr8k/images', '../../data/flickr8k/training_captions.csv', transform) #load COCO API coco = COCO('../level_generator/annotations/instances_val2014.json') coco_caps = COCO('../level_generator/annotations/captions_val2014.json') #load model epoch_file = '../../data/caption_generator/version_24/checkpoints/epoch=998.ckpt' model = CaptionGenerator.load_from_checkpoint(checkpoint_path=epoch_file, pad_idx=pad_idx) #load levels with open('../../data/levels.pkl', 'rb') as input: levels = pickle.load(input)
from keras.layers.recurrent import LSTM from keras.models import Sequential from sklearn.preprocessing import MinMaxScaler import matplotlib.pyplot as plt import utilities # # Predict Cryptocurrency Prices With Machine Learning # # ### Step 1 Load & Process Data # In[3]: currency = "BTC" # moeda a ser operad currency_data = utilities.get_dataset( currency=currency ) #colocar todos os dados da "currency"(abertura, fechamento, data, valor) em currency_data #<class 'pandas.core.frame.DataFrame'> currency_close_price = currency_data.close.values.astype( 'float32') #convert os dados para dtype, podendo ser interpreto pelo numpy #<class 'numpy.ndarray'> currency_close_price = currency_close_price.reshape(len(currency_close_price), 1) #convert para array # In[24]: def create_datasets(dataset, sequence_length): sequence_length += 1 seq_dataset = [] #lista for i in range(len(dataset) - sequence_length):
def get_data_loader(args): print('getting dataset...{}.....\n'.format(args.dataset)) train_loader, test_loader = utilities.get_dataset(args.dataset)(args) return train_loader, test_loader
from pl_model import CaptionGenerator, FlickrDataModule import pytorch_lightning as pl import os from PIL import Image transform = transforms.Compose( [ transforms.Resize((356, 356)), transforms.RandomCrop((299, 299)), transforms.ToTensor(), transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)), ] ) train, pad_idx = get_dataset( "../data/flickr8k/images", "../data/flickr8k/training_captions.txt", transform) # Hyperparameters embed_size = 250 hidden_size = 250 vocab_size = len(train.vocab) num_layers = 1 learning_rate = 3e-4 #num_epochs = 1 #Training parameters num_nodes = 2 gpus = 2 #2 GPUs/node #for loader