示例#1
0
from eli5.permutation_importance import get_score_importances
from MachineLearningModels.ridge import Ridge
from MachineLearningModels.lasso import Lasso
from MachineLearningModels.pls import PLS
from sklearn.metrics import mean_squared_error, r2_score
from MachineLearningModels.gradientboost import GradientBoost
from MachineLearningModels.linearregression import LinearRegression
from DataPreprocessor.datacleaner import Cleaner
from DataPreprocessor.dataspliter import Spliter
from Evaluation.evaluation import Evaluation
import matplotlib.pyplot as plt
from sklearn.ensemble import GradientBoostingClassifier
import pandas as pd
import numpy as np

csvreader = CsvReader()
data = csvreader.read('data.csv')
data = data.drop(columns='Unnamed: 32')
data.set_index('id', inplace=True)

labels = data[['diagnosis']].copy()

features = data.drop(columns='diagnosis')
feature_list = list(features)

train_features, test_features = np.split(features, [int(.9 * len(features))])
train_labels, test_labels = np.split(labels, [int(.9 * len(labels))])

print('Training Features Shape:', train_features.shape)
print('Training Labels Shape:', train_labels.shape)
print('Testing Features Shape:', test_features.shape)
from sklearn.model_selection import train_test_split
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import KFold
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import MinMaxScaler
from tensorflow.python.keras.models import Sequential
from tensorflow.python.keras.layers import Dense
from tensorflow.python.keras.wrappers.scikit_learn import KerasRegressor
from Utils.csvread import CsvReader
from MachineLearningModels.randomforest import RandomForest
from DataPreprocessor.datacleaner import Cleaner
from DataPreprocessor.dataspliter import Spliter
from Evaluation.evaluation import Evaluation
from pandas import DataFrame

csvreader = CsvReader()
data = csvreader.read('memocode_adder_inputs.csv')
labels = data[['output']].copy()
features = data[['data1', 'data2']].copy()

Data_f = {'data1': [10000, 30000], 'data2': [20000, -5000]}
test_features = DataFrame(Data_f, columns=['data1', 'data2'])

Data_l = {'output': [26897, 27837]}
test_labels = DataFrame(Data_l, columns=['output'])

model = Sequential()
model.add(Dense(5, input_dim=2, activation='linear', use_bias=False))
model.add(Dense(1, activation='linear', use_bias=False))
model.summary()
示例#3
0
import os
import warnings
import argparse
from Utils.csvread import CsvReader
import numpy as np
from DataPreprocessor.datacleaner import Cleaner

def init_arg_parser():
    parser = argparse.ArgumentParser(description="Automatically generates score table.")

    parser.add_argument('-train', '--trainingdata', help="Get the training data", required=True)
    parser.add_argument('-s', '--strategy', help="Data clean strategy", required=True)
    parser.add_argument('-o', '--output', help="Output path", required=True)
    return parser

if __name__ == "__main__":
    parser = init_arg_parser()
    args = parser.parse_args()

    csvreader = CsvReader()
    data = csvreader.read(args.trainingdata)

    cleaner = Cleaner(data)
    df = cleaner.clean(args.strategy, 'df')

    df.to_csv(args.output)
示例#4
0
                            indexarray[i] = ''
                plt.xticks(df.index, indexarray, rotation='vertical')
                plt.legend(handles=[p, a])
                plt.savefig(figpath)
    df = pd.DataFrame(dict.items(), columns=['key', 'value'])
    df = df.sort_values(by=['key'])
    df = df.set_index('key')
    df = df.T
    return df


if __name__ == "__main__":
    parser = init_arg_parser()
    args = parser.parse_args()

    csvreader = CsvReader()
    alldata = csvreader.read(args.trainingdata)

    feature_headers = read_list(args.input)
    label_headers = read_list(args.output)

    if args.type == 'c':
        type = 'classifier'
    else:
        type = 'regressor'

    alldata = alldata.fillna(0)
    data = alldata
    #data = data.sample(frac=1).reset_index(drop=True)
    if args.expandingwindow:
        vr = (len(data) - 52) / len(data)