Exemplo n.º 1
0
def load():
    pd.set_option('display.max_columns', 10)
    pd.set_option('display.max_rows', 100)

    # Download the Titanic dataset
    titanic.download_dataset('titanic_dataset.csv')

    # Load CSV file, indicate that the first column represents labels
    data, labels = load_csv('titanic_dataset.csv',
                            target_column=0,
                            has_header=True,
                            categorical_labels=False,
                            n_classes=2)

    # Make a df out of it for convenience
    df = pd.DataFrame(data,
                      columns=[
                          "pclass", "name", "sex", "age", "sibsp", "parch",
                          "ticket", "fare"
                      ])

    df = df.drop(columns=['name', 'ticket'])

    # bin ages
    df['age'] = df['age'].astype('float64')

    group_names = ['A', 'B', 'C', 'D', 'E']
    bins = [-1, 17, 35, 50, 65, 1000]
    df['age_group'] = pd.cut(df['age'], bins=bins, labels=group_names)

    res = pd.get_dummies(df['age_group'], prefix='age_group')
    df = pd.concat([df, res], axis=1)
    df = df.drop(columns=['age', 'age_group'])

    # normalize parch
    # df['parch'] = df['parch'].astype('float64')
    # df["parch"] = df["parch"] / df["parch"].max()

    # normalize age
    # df['age'] = df['age'].astype('float64')
    # df["age"] = df["age"] / df["age"].max()

    # normalize fare
    df['fare'] = df['fare'].astype('float64')
    df["fare"] = df["fare"] / df["fare"].max()

    # normalize sibsp
    df['sibsp'] = df['sibsp'].astype('float64')
    df["sibsp"] = df["sibsp"] / df["sibsp"].max()

    # normalize parch
    df['parch'] = df['parch'].astype('float64')
    df["parch"] = df["parch"] / df["parch"].max()

    # convert sex
    df = df.replace(["male", "female"], [0, 1])

    print(df)

    return df, labels
Exemplo n.º 2
0
def prepare_csv():
    # Download the Titanic dataset
    from tflearn.datasets import titanic
    titanic.download_dataset('titanic_dataset.csv')

    # Load CSV file, indicate that the first column represents labels
    from tflearn.data_utils import load_csv
    data, labels = load_csv('titanic_dataset.csv',
                            target_column=0,
                            has_header=False,
                            categorical_labels=True,
                            n_classes=2)

    # Preprocess data
    data = preprocess_csv(data, to_ignore)

    return data, labels
Exemplo n.º 3
0
def load_titanic():

    import tflearn
    import numpy as np
    """
        Download Titanic dataset
        len = 1309
    """
    from tflearn.datasets import titanic
    titanic.download_dataset('titanic_dataset.csv')

    # Load CSV file, indicate that the first column represents labels
    from tflearn.data_utils import load_csv
    data, titanic_labels = load_csv('titanic_dataset.csv',
                                    target_column=0,
                                    categorical_labels=True,
                                    n_classes=2)

    # Preprocessing function
    def preprocess(passengers, columns_to_delete):
        # Sort by descending id and delete columns
        for column_to_delete in sorted(columns_to_delete, reverse=True):
            [passenger.pop(column_to_delete) for passenger in passengers]
        for i in range(len(passengers)):
            # Converting 'sex' field to float (id is 1 after removing labels column)
            passengers[i][1] = 1. if passengers[i][1] == 'female' else 0.
        return np.array(passengers, dtype=np.float32)

    # Ignore 'name' and 'ticket' columns (id 1 & 6 of data array)
    to_ignore = [1, 6]

    # Preprocess data
    x_train = preprocess(data, to_ignore)
    y_train = titanic_labels

    x_test = x_train
    y_test = y_train

    input_dim = (6, )
    num_classes = 2

    return x_train, y_train, x_test, y_test, input_dim, num_classes
Exemplo n.º 4
0
import tflearn
import numpy as np

from tflearn.datasets import titanic
titanic.download_dataset('titanic_dataset.csv')

from tflearn.data_utils import load_csv

data, labels = load_csv('titanic_dataset.csv',
                        target_column=0,
                        categorical_labels=True,
                        n_classes=2)


def preprocess(data, columns_to_ignore):
    # Sort by descending id and delete columns
    for id in sorted(columns_to_ignore, reverse=True):
        [r.pop(id) for r in data]
    for i in range(len(data)):
        # Converting 'sex' field to float (id is 1 after removing labels column)
        data[i][1] = 1. if data[i][1] == 'female' else 0.
    return np.array(data, dtype=np.float32)


# Ignore 'name' and 'ticket' columns (id 1 & 6 of data array)
to_ignore = [1, 6]

# Preprocess data
data = preprocess(data, to_ignore)

net = tflearn.input_data(shape=[None, 6])
Exemplo n.º 5
0
# -*- coding: utf-8 -*-
from __future__ import print_function

import numpy as np
import tflearn

# Download the Titanic dataset
from tflearn.datasets import titanic
titanic.download_dataset('titanic_dataset.csv')

# Load CSV file, indicate that the first column represents labels
from tflearn.data_utils import load_csv
data, labels = load_csv('titanic_dataset.csv', target_column=0,
                        categorical_labels=True, n_classes=2)


# Preprocessing function
def preprocess(data, columns_to_ignore):
    # Sort by descending id and delete columns
    for id in sorted(columns_to_ignore, reverse=True):
        [r.pop(id) for r in data]
    for i in range(len(data)):
      # Converting 'sex' field to float (id is 1 after removing labels column)
      data[i][1] = 1. if data[i][1] == 'female' else 0.
    return np.array(data, dtype=np.float32)

# Ignore 'name' and 'ticket' columns (id 1 & 6 of data array)
to_ignore=[1, 6]

# Preprocess data
data = preprocess(data, to_ignore)
Exemplo n.º 6
0
import tflearn
import numpy

from tflearn.datasets import titanic

titanic.download_dataset("titanic_dataset.csv")

from tflearn.data_utils import load_csv
data, labels = load_csv(
    "titanic_dataset.csv",
    target_column=0,
    categorical_labels=True,
    n_classes=2,
    columns_to_ignore=[2, 7])  # two target columns/labels :survived or dead

for p in data:
    if p[1] == "female":
        p[1] = 1
    else:
        p[1] = 0

net = tflearn.input_data(shape=[None,
                                6])  # first layer of network has 6 layers
net = tflearn.fully_connected(net, 20)
net = tflearn.fully_connected(net, 20)
net = tflearn.fully_connected(net, 2, activation='softmax')
net = tflearn.regression(net)

# define model
model = tflearn.DNN(net)
Exemplo n.º 7
0
from __future__ import print_function
import numpy as np
import tflearn
import pandas as pd
from tflearn.datasets import titanic
from tflearn.data_utils import load_csv

filename = 'var/titanic_dataset.csv'
titanic.download_dataset(filename)

dataset = pd.read_csv(filename)
labels = pd.get_dummies(dataset['survived']).values
dataset = dataset.drop(columns='name').drop(columns='ticket').drop(
    columns='survived')
dataset['sex'] = dataset['sex'].apply(lambda sex: 1 if sex == 'female' else 0)

log_dir = ''
tensorboard_callback = tf.keras.callbacks.TensorBoard(log_dir=log_dir,
                                                      histogram_freq=1)

data = dataset.values

# Build neural network
net = tflearn.input_data(shape=[None, 6])
net = tflearn.fully_connected(net, 32)
net = tflearn.fully_connected(net, 32)
net = tflearn.fully_connected(net, 2, activation='softmax')
net = tflearn.regression(net)
model = tflearn.DNN(net)

# Start training (apply gradient descent algorithm)
Exemplo n.º 8
0
# -*- coding: utf-8 -*-
"""
Created on Fri Jul 21 10:10:50 2017

@author: O222069
"""

import numpy as np
import pandas as pd
import tflearn

#download the titanic dataset
from tflearn.datasets import titanic
data = pd.read_csv(titanic.download_dataset('titanic_dataset.csv'))

#load csv file
from tflearn.data_utils import load_csv, samplewise_std_normalization
data, labels = load_csv('titanic_dataset.csv',
                        target_column=0,
                        categorical_labels=True,
                        n_classes=2)

#preprocessing function


def preprocess(data, columns_to_ignore):
    for id in sorted(columns_to_ignore, reverse=True):
        [r.pop(id) for r in data]
    for i in range(len(data)):
        data[i][1] = 1. if data[i][1] == 'female' else 0.
    return np.array(data, dtype=np.float32)
Exemplo n.º 9
0
import numpy as np
import tflearn
from os.path import join, dirname
mypath = '/Users/raghav/mypython/'

# Download the Titanic dataset
from tflearn.datasets import titanic
titanic.download_dataset(join(mypath, 'titanic_dataset.csv'))

# Load CSV file, indicate that the first column represents labels
from tflearn.data_utils import load_csv
data, labels = load_csv(join(mypath, 'titanic_dataset.csv'),
                        target_column=0,
                        categorical_labels=True,
                        n_classes=2)


# Preprocessing function
def preprocess(data, columns_to_ignore):
    # Sort by descending id and delete columns
    for id in sorted(columns_to_ignore, reverse=True):
        [r.pop(id) for r in data]
    for i in range(len(data)):
        # Converting 'sex' field to float (id is 1 after removing labels column)
        data[i][1] = 1. if data[i][1] == 'female' else 0.
    return np.array(data, dtype=np.float32)


# Ignore 'name' and 'ticket' columns (id 1 & 6 of data array)
to_ignore = [1, 6]
Exemplo n.º 10
0
# DATE: Sep 22, 2018
# SUMMARY: Data Parser to scan through the various raw data

# module for parsing CSV data sets
from __future__ import print_function
import csv
import pandas as pd
import tensorflow as tf
import numpy as np
import math
import tflearn
from tflearn.datasets import titanic
from tflearn.data_utils import load_csv
# Going over the Titanic Machine learning example
DataSetFileLocation = 'titanic_dataset.csv'
titanic.download_dataset(DataSetFileLocation)
data, labels = load_csv(DataSetFileLocation,
                        target_column=0,
                        categorical_labels=True,
                        n_classes=2)


def preprocess(data, columns_to_ignore):
    # Sort by descending id and delete columns
    for id in sorted(columns_to_ignore, reverse=True):
        [r.pop(id) for r in data]
    for i in range(len(data)):
        # Converting 'sex' field to float (id is 1 after removing labels column)
        data[i][1] = 1. if data[i][1] == 'female' else 0.
    return np.array(data, dtype=np.float32)
Exemplo n.º 11
0
import datetime

print(datetime.datetime.now())

import numpy as np
import tflearn

from tflearn.datasets import titanic

print(datetime.datetime.now())

titanic.download_dataset("titanic_dataset.csv")

from tflearn.data_utils import load_csv

data, labels = load_csv( "titanic_dataset.csv", target_column=0, categorical_labels=True,  n_classes=2 )

def preprocess(data, columns_to_ignore ) :
    for id in sorted(columns_to_ignore,reverse=True) :
        [r.pop(id) for r in data]
    for i in range(len(data)):
        data[i][1] = 1. if data[i][1] == "female" else 0.
    return np.array(data, dtype=np.float32)

to_ignore=[1,6]

data = preprocess(data,to_ignore)


print("preprocess done", datetime.datetime.now())
#
# python2 using tflearn

import numpy as np
from tensorflow.contrib import learn
import tflearn

# Download the Titanic dataset
from tflearn.datasets import titanic
titanic.download_dataset('./data/titanic_dataset.csv')

# Load CSV file, indicate that the first column represents labels
from tflearn.data_utils import load_csv
data_1, labels = load_csv('./data/titanic_dataset.csv',
                          target_column=0,
                          categorical_labels=True,
                          n_classes=2)

data = []
for it in data_1:
    data.append(it[1])

print data

tokenizer = learn.preprocessing.VocabularyProcessor(10)
data = list(tokenizer.fit_transform(data))
data = tflearn.data_utils.pad_sequences(data, maxlen=10)

print data
print labels

trainX = data