def load(): pd.set_option('display.max_columns', 10) pd.set_option('display.max_rows', 100) # Download the Titanic dataset titanic.download_dataset('titanic_dataset.csv') # Load CSV file, indicate that the first column represents labels data, labels = load_csv('titanic_dataset.csv', target_column=0, has_header=True, categorical_labels=False, n_classes=2) # Make a df out of it for convenience df = pd.DataFrame(data, columns=[ "pclass", "name", "sex", "age", "sibsp", "parch", "ticket", "fare" ]) df = df.drop(columns=['name', 'ticket']) # bin ages df['age'] = df['age'].astype('float64') group_names = ['A', 'B', 'C', 'D', 'E'] bins = [-1, 17, 35, 50, 65, 1000] df['age_group'] = pd.cut(df['age'], bins=bins, labels=group_names) res = pd.get_dummies(df['age_group'], prefix='age_group') df = pd.concat([df, res], axis=1) df = df.drop(columns=['age', 'age_group']) # normalize parch # df['parch'] = df['parch'].astype('float64') # df["parch"] = df["parch"] / df["parch"].max() # normalize age # df['age'] = df['age'].astype('float64') # df["age"] = df["age"] / df["age"].max() # normalize fare df['fare'] = df['fare'].astype('float64') df["fare"] = df["fare"] / df["fare"].max() # normalize sibsp df['sibsp'] = df['sibsp'].astype('float64') df["sibsp"] = df["sibsp"] / df["sibsp"].max() # normalize parch df['parch'] = df['parch'].astype('float64') df["parch"] = df["parch"] / df["parch"].max() # convert sex df = df.replace(["male", "female"], [0, 1]) print(df) return df, labels
def prepare_csv(): # Download the Titanic dataset from tflearn.datasets import titanic titanic.download_dataset('titanic_dataset.csv') # Load CSV file, indicate that the first column represents labels from tflearn.data_utils import load_csv data, labels = load_csv('titanic_dataset.csv', target_column=0, has_header=False, categorical_labels=True, n_classes=2) # Preprocess data data = preprocess_csv(data, to_ignore) return data, labels
def load_titanic(): import tflearn import numpy as np """ Download Titanic dataset len = 1309 """ from tflearn.datasets import titanic titanic.download_dataset('titanic_dataset.csv') # Load CSV file, indicate that the first column represents labels from tflearn.data_utils import load_csv data, titanic_labels = load_csv('titanic_dataset.csv', target_column=0, categorical_labels=True, n_classes=2) # Preprocessing function def preprocess(passengers, columns_to_delete): # Sort by descending id and delete columns for column_to_delete in sorted(columns_to_delete, reverse=True): [passenger.pop(column_to_delete) for passenger in passengers] for i in range(len(passengers)): # Converting 'sex' field to float (id is 1 after removing labels column) passengers[i][1] = 1. if passengers[i][1] == 'female' else 0. return np.array(passengers, dtype=np.float32) # Ignore 'name' and 'ticket' columns (id 1 & 6 of data array) to_ignore = [1, 6] # Preprocess data x_train = preprocess(data, to_ignore) y_train = titanic_labels x_test = x_train y_test = y_train input_dim = (6, ) num_classes = 2 return x_train, y_train, x_test, y_test, input_dim, num_classes
import tflearn import numpy as np from tflearn.datasets import titanic titanic.download_dataset('titanic_dataset.csv') from tflearn.data_utils import load_csv data, labels = load_csv('titanic_dataset.csv', target_column=0, categorical_labels=True, n_classes=2) def preprocess(data, columns_to_ignore): # Sort by descending id and delete columns for id in sorted(columns_to_ignore, reverse=True): [r.pop(id) for r in data] for i in range(len(data)): # Converting 'sex' field to float (id is 1 after removing labels column) data[i][1] = 1. if data[i][1] == 'female' else 0. return np.array(data, dtype=np.float32) # Ignore 'name' and 'ticket' columns (id 1 & 6 of data array) to_ignore = [1, 6] # Preprocess data data = preprocess(data, to_ignore) net = tflearn.input_data(shape=[None, 6])
# -*- coding: utf-8 -*- from __future__ import print_function import numpy as np import tflearn # Download the Titanic dataset from tflearn.datasets import titanic titanic.download_dataset('titanic_dataset.csv') # Load CSV file, indicate that the first column represents labels from tflearn.data_utils import load_csv data, labels = load_csv('titanic_dataset.csv', target_column=0, categorical_labels=True, n_classes=2) # Preprocessing function def preprocess(data, columns_to_ignore): # Sort by descending id and delete columns for id in sorted(columns_to_ignore, reverse=True): [r.pop(id) for r in data] for i in range(len(data)): # Converting 'sex' field to float (id is 1 after removing labels column) data[i][1] = 1. if data[i][1] == 'female' else 0. return np.array(data, dtype=np.float32) # Ignore 'name' and 'ticket' columns (id 1 & 6 of data array) to_ignore=[1, 6] # Preprocess data data = preprocess(data, to_ignore)
import tflearn import numpy from tflearn.datasets import titanic titanic.download_dataset("titanic_dataset.csv") from tflearn.data_utils import load_csv data, labels = load_csv( "titanic_dataset.csv", target_column=0, categorical_labels=True, n_classes=2, columns_to_ignore=[2, 7]) # two target columns/labels :survived or dead for p in data: if p[1] == "female": p[1] = 1 else: p[1] = 0 net = tflearn.input_data(shape=[None, 6]) # first layer of network has 6 layers net = tflearn.fully_connected(net, 20) net = tflearn.fully_connected(net, 20) net = tflearn.fully_connected(net, 2, activation='softmax') net = tflearn.regression(net) # define model model = tflearn.DNN(net)
from __future__ import print_function import numpy as np import tflearn import pandas as pd from tflearn.datasets import titanic from tflearn.data_utils import load_csv filename = 'var/titanic_dataset.csv' titanic.download_dataset(filename) dataset = pd.read_csv(filename) labels = pd.get_dummies(dataset['survived']).values dataset = dataset.drop(columns='name').drop(columns='ticket').drop( columns='survived') dataset['sex'] = dataset['sex'].apply(lambda sex: 1 if sex == 'female' else 0) log_dir = '' tensorboard_callback = tf.keras.callbacks.TensorBoard(log_dir=log_dir, histogram_freq=1) data = dataset.values # Build neural network net = tflearn.input_data(shape=[None, 6]) net = tflearn.fully_connected(net, 32) net = tflearn.fully_connected(net, 32) net = tflearn.fully_connected(net, 2, activation='softmax') net = tflearn.regression(net) model = tflearn.DNN(net) # Start training (apply gradient descent algorithm)
# -*- coding: utf-8 -*- """ Created on Fri Jul 21 10:10:50 2017 @author: O222069 """ import numpy as np import pandas as pd import tflearn #download the titanic dataset from tflearn.datasets import titanic data = pd.read_csv(titanic.download_dataset('titanic_dataset.csv')) #load csv file from tflearn.data_utils import load_csv, samplewise_std_normalization data, labels = load_csv('titanic_dataset.csv', target_column=0, categorical_labels=True, n_classes=2) #preprocessing function def preprocess(data, columns_to_ignore): for id in sorted(columns_to_ignore, reverse=True): [r.pop(id) for r in data] for i in range(len(data)): data[i][1] = 1. if data[i][1] == 'female' else 0. return np.array(data, dtype=np.float32)
import numpy as np import tflearn from os.path import join, dirname mypath = '/Users/raghav/mypython/' # Download the Titanic dataset from tflearn.datasets import titanic titanic.download_dataset(join(mypath, 'titanic_dataset.csv')) # Load CSV file, indicate that the first column represents labels from tflearn.data_utils import load_csv data, labels = load_csv(join(mypath, 'titanic_dataset.csv'), target_column=0, categorical_labels=True, n_classes=2) # Preprocessing function def preprocess(data, columns_to_ignore): # Sort by descending id and delete columns for id in sorted(columns_to_ignore, reverse=True): [r.pop(id) for r in data] for i in range(len(data)): # Converting 'sex' field to float (id is 1 after removing labels column) data[i][1] = 1. if data[i][1] == 'female' else 0. return np.array(data, dtype=np.float32) # Ignore 'name' and 'ticket' columns (id 1 & 6 of data array) to_ignore = [1, 6]
# DATE: Sep 22, 2018 # SUMMARY: Data Parser to scan through the various raw data # module for parsing CSV data sets from __future__ import print_function import csv import pandas as pd import tensorflow as tf import numpy as np import math import tflearn from tflearn.datasets import titanic from tflearn.data_utils import load_csv # Going over the Titanic Machine learning example DataSetFileLocation = 'titanic_dataset.csv' titanic.download_dataset(DataSetFileLocation) data, labels = load_csv(DataSetFileLocation, target_column=0, categorical_labels=True, n_classes=2) def preprocess(data, columns_to_ignore): # Sort by descending id and delete columns for id in sorted(columns_to_ignore, reverse=True): [r.pop(id) for r in data] for i in range(len(data)): # Converting 'sex' field to float (id is 1 after removing labels column) data[i][1] = 1. if data[i][1] == 'female' else 0. return np.array(data, dtype=np.float32)
import datetime print(datetime.datetime.now()) import numpy as np import tflearn from tflearn.datasets import titanic print(datetime.datetime.now()) titanic.download_dataset("titanic_dataset.csv") from tflearn.data_utils import load_csv data, labels = load_csv( "titanic_dataset.csv", target_column=0, categorical_labels=True, n_classes=2 ) def preprocess(data, columns_to_ignore ) : for id in sorted(columns_to_ignore,reverse=True) : [r.pop(id) for r in data] for i in range(len(data)): data[i][1] = 1. if data[i][1] == "female" else 0. return np.array(data, dtype=np.float32) to_ignore=[1,6] data = preprocess(data,to_ignore) print("preprocess done", datetime.datetime.now()) #
# python2 using tflearn import numpy as np from tensorflow.contrib import learn import tflearn # Download the Titanic dataset from tflearn.datasets import titanic titanic.download_dataset('./data/titanic_dataset.csv') # Load CSV file, indicate that the first column represents labels from tflearn.data_utils import load_csv data_1, labels = load_csv('./data/titanic_dataset.csv', target_column=0, categorical_labels=True, n_classes=2) data = [] for it in data_1: data.append(it[1]) print data tokenizer = learn.preprocessing.VocabularyProcessor(10) data = list(tokenizer.fit_transform(data)) data = tflearn.data_utils.pad_sequences(data, maxlen=10) print data print labels trainX = data