from matplotlib.pyplot import figure, boxplot, xlabel, ylabel, show
import numpy as np
from scipy.io import loadmat
from sklearn.neighbors import KNeighborsClassifier
from sklearn import model_selection
from scipy.io import loadmat
import torch
from sklearn import model_selection
from __init__ import train_neural_net, draw_neural_net
from scipy import stats
from clean_data import clean_data, transform_data

#-----------------------LOADING DATA----------------------------

data = clean_data('Datasets/**videos.csv')
data = transform_data(
    data, ['likes', 'dislikes', 'views', 'comment_count', 'trending_time'])
np.random.seed(180820)
data = data.head(100000)
X = np.array(
    data[['likes', 'dislikes', 'views', 'comment_count', 'trending_time']])
#y = np.array(data['views']).squeeze()
data['class'] = np.where(data["trending_time"] <= 3., 1, 0.)
y = np.where(data["trending_time"] <= 3., 1, 0.)
#X = np.array(data)
#y = X[:,[4]]
#X = X[:,0:4]
attributeNames = [
    'likes', 'dislikes', 'views', 'comment_count', 'trending_time'
]
N, M = X.shape
from matplotlib.pylab import (figure, semilogx, loglog, xlabel, ylabel, legend, 
                           title, subplot, show, grid)
import numpy as np
from scipy.io import loadmat
import sklearn.linear_model as lm
from sklearn import model_selection
from __init__ import rlr_validate
from clean_data import clean_data, transform_data
import pandas as pd
import matplotlib.pyplot as plt

# SETUP ------------------------------------------------------------------------------------------------------
cols = ["likes", "dislikes", "views", "comment_count", "trending_time"]
data = clean_data('Datasets/**videos.csv')
data_norm = transform_data(data, cols)

np.random.seed(180820)

"""
index = np.random.choice(range(0, len(data_norm)), size = 10000, replace = False)
index = data_norm.index in index
data_norm = data_norm[index,:]
"""


#data_norm.head(100) #viser at man sagtens kan plotte training error med mindre data

X = np.array(data_norm[["likes", "dislikes","views", "comment_count"]])#, "trending_time"]])
y = np.array(data_norm["trending_time"]).squeeze()
attributeNames = ["likes", "dislikes", "views", "comment_count"]#, "trending_time"]