# -*- coding: utf-8 -*-
import dataMngr as dm
import compInfo

def preprocess(fileName, train=True):
    df = dm.file2Dataframe(fileName)
    inputArr, outputArr = dm.processDataframeForNP(df, train)
    inputFileName, outputFileName = dm.makeFileName(fileName)
    
    if train:
        dm.save(outputFileName, outputArr)
        
    dm.save(inputFileName, inputArr)
    
    return inputArr, outputArr

print compInfo.dataPoints500File
inA, outA = preprocess(compInfo.dataPoints500File)

print inA.shape
print outA.shape

loadedInA = dm.load(compInfo.outputDataDirectory + 'Innocentive_500_Sample_input.np')
loadedOutA = dm.load(compInfo.outputDataDirectory + 'Innocentive_500_Sample_output.np')

print loadedInA.shape
print loadedOutA.shape
def makeTestOutput(modelfp, inTestFileName, outTestFileName):
  ML.generateTestOutput(dm.load(modelfp), dm.load(inTestFileName), outTestFileName)
from sklearn.ensemble import RandomForestClassifier
from sklearn.cross_validation import cross_val_score
import dataMngr as dm

# only generated model, doesn't attempt to fit model
def generateModel(model, modelArrParams, modelDictParams):
    if modelArrParams is None:
        modelArrParams = []
    if modelDictParams is None:
        modelDictParams = {}
    standardModel = model(*modelArrParams, **modelDictParams)
    return standardModel

def generateModelScores(stdModel, inData, outData, modelFileName, statisticsFileName):
  outData = outData.ravel()
  scores = cross_val_score(stdModel, inData, outData)
  saveModelAndScores(modelFileName, stdModel, statisticsFileName, scores)
  return scores.mean()

def saveModelAndScores(modelFileName, stdModel, statisticsFileName, scores):
  dm.save(modelFileName, stdModel)
  dm.save(statisticsFileName, scores.mean())

def generateModelAndScores(model, modelArrParams, modelDictParams, inData, outData, numRuns):
  for x in xrange(numRuns):
    stdModel = generateModel(model, modelArrParams, modelDictParams)
    print generateModelScores(stdModel, inData, outData, "RF" + str(x) + ".np", "RF_Stats" + str(x) + ".np")


print generateModelAndScores(RandomForestClassifier, None, None, dm.load("Innocentive_500_Sample_input.np"), dm.load("Innocentive_500_Sample_output.np"), 5)
def generateTestOutput(modelfp, testOutfp, inData):
  ML.generateTestOutput(dm.load(modelfp), inData, testOutfp)
def checkCrossValidation(inDatafp, outDatafp, modelInfo, numFolds):
  inData = dm.load(inDatafp)
  outData = dm.load(outDatafp).ravel()
  model = generateUnfittedModel(modelInfo["model"], modelInfo["modelArrParameters"], modelInfo["modelDictParameters"])
  return cv.cross_val_score(model, inData, outData, cv=numFolds)