Exemplos de LendingClubFeatureExtractor.writeFeaturesToCSV em Python

Linguagem de programação: Python

Espaço para nome / nome do pacote: lendingClubFeatureExtractor

Classe / Tipo: LendingClubFeatureExtractor

Método / Função: writeFeaturesToCSV

Exemplos em hotexamples.com: 1

LendingClubFeatureExtractor.writeFeaturesToCSV em Python - 1 exemplos encontrados. Esses são os exemplos do mundo real mais bem avaliados de lendingClubFeatureExtractor.LendingClubFeatureExtractor.writeFeaturesToCSV em Python extraídos de projetos de código aberto. Você pode avaliar os exemplos para nos ajudar a melhorar a qualidade deles.

Métodos Frequentes

Exibir Ocultar

listIdx(3)

extractFeatures(2)

getTrainingData(2)

pcntRemove(1)

termConversion(1)

statusConversion(1)

stateEnumerator(1)

setTrainingData(1)

setOutCSVPath(1)

purposeEnumerator(1)

applyFeatureFilter(1)

loanGradeHash(1)

earlyCrLineConversion(1)

incomeVerifiedConversion(1)

homeOwnershipEnumerator(1)

getSampleCnt(1)

getRmvSampleCnt(1)

empLengthConversion(1)

writeFeaturesToCSV(1)

Métodos Frequentes

listIdx (3)

extractFeatures (2)

getTrainingData (2)

pcntRemove (1)

termConversion (1)

statusConversion (1)

stateEnumerator (1)

setTrainingData (1)

setOutCSVPath (1)

purposeEnumerator (1)

Métodos Frequentes

applyFeatureFilter (1)

loanGradeHash (1)

earlyCrLineConversion (1)

incomeVerifiedConversion (1)

homeOwnershipEnumerator (1)

getSampleCnt (1)

getRmvSampleCnt (1)

empLengthConversion (1)

writeFeaturesToCSV (1)

Exemplo n.º 1

0

Exibir arquivo

Arquivo: loanLearner.py Projeto: hosse005/LoanLearner

def main(): # Construct an argument parser for cmd line interaction parser = argparse.ArgumentParser( description="This is a risk valuation \ software package for loan analysis. The software is used for predicting \ whether a given applicant is likely or not to repay a given loan." ) # Application version readback option parser.add_argument("-v", "--version", action="version", version=appVersion) # Option to pass in an input file to be processed parser.add_argument("-i", "--input", dest="inputFile", help="Input File Name", required=False, default=defaultInput) # Option to specify the type of learning agent to be used parser.add_argument( "--classifier", dest="cls", help="Machine Learning classifier type. \n \ Current possible options are: \n \ 'logistic'(default), 'SVM', 'dTree'", required=False, default="logistic", ) # Option to specify the SVM kernel to be used parser.add_argument( "-k", "--kernel", dest="kernel", help="SVM kernel type. \n Possible options are: \n \ 'linear', 'poly', 'rbf'(default), or 'sigmoid' ", required=False, default="rbf", ) # Option to specify the test fraction used for learning parser.add_argument( "--testFraction", dest="tstFrac", help="Fraction of data to be used for test, must be \ between 0 and 1", required=False, default=0.2, ) # Option to specify pre-training dump file parser.add_argument("-d", "--dump", dest="dumpFile", help="File location for pre-trained data dump", required=False) # Option to specify learning regularization parameter parser.add_argument( "-C", "--reg", dest="reg", help="Classifier regularization parameter", required=False, default=1 ) # Option to specify filter path parser.add_argument( "--filter", dest="filterPath", help="Feature Filter resource file", required=False, default="../res/FeatureFilter.csv", ) # Option to predict output of some input sample(s) parser.add_argument( "-p", "--predict", dest="predict", help="Run application in prediction mode. \ Prediction input samples must be located at \ ${PROJ_DIR}/tmp/predictInputSamples.csv \ (must first train a classifier!!)", required=False, action="store_true", ) # Grab the inputs passed args = parser.parse_args() m_inputFile = args.inputFile m_cls = args.cls m_kernel = args.kernel m_tstFrac = float(args.tstFrac) m_reg = float(args.reg) if args.dumpFile is not None: m_dumpFile = args.dumpFile else: m_dumpFile = None m_filter = args.filterPath m_predict = args.predict # Generate time stamp for performance monitoring t0 = time.time() # Branch on predict flag if m_predict is False: # Construct the InputReader w/ our input file mInputReader = InputReader(m_inputFile) # Next, construct our LendingClubFeatureExtractor object mFeatureExtractor = LendingClubFeatureExtractor(mInputReader, m_filter) # Use the FeatureExtractor to convert the data for learning mFeatureExtractor.extractFeatures() mFeatureExtractor.applyFeatureFilter() # Dump pre-trained data if specified by user if m_dumpFile is not None: mFeatureExtractor.setOutCSVPath(m_dumpFile) mFeatureExtractor.writeFeaturesToCSV() # Construct a LearningAgent based on user input if m_cls == "SVM": mLearningAgent = SVMClassifier(mFeatureExtractor, m_kernel) elif m_cls == "logistic": mLearningAgent = LogisticClassifier(mFeatureExtractor) elif m_cls == "dTree": mLearningAgent = DecisionTreeClassifier(mFeatureExtractor) else: print("Invalid classifier passed. See --help for valid options") return # Set the test fraction of data to use for validation mLearningAgent.setTstFraction(m_tstFrac) # Set the learning regularization parameter mLearningAgent.setRegularization(m_reg) # Apply preprocessing to the training samples mLearningAgent.shuffleSamples() mLearningAgent.sampleSlice() mLearningAgent.standardizeSamples() # Train the classifier and report the accuracy against the test subset mLearningAgent.trainModel() print("Cross Validation accuracy on the test subset = %0.3f" % mLearningAgent.crossValidate()) # Dump the classifier object to file mLearningAgent.dumpClassifier() # Print out the classifier coefficients if m_cls == "logistic" or m_cls == "dTree": print("Classifier coefficients:") print(mLearningAgent.getClfCoeffs()) # Generate end time stamp and report processing time t1 = time.time() total = t1 - t0 print("Total processing time = %3.2f seconds" % total) # Predict flag set, try read a stored classifier and push our inputs # through it else: # Construct an input reader mInputReader = InputReader(predictInput) # Next, construct our LendingClubFeatureExtractor object mFeatureExtractor = LendingClubFeatureExtractor(mInputReader, m_filter) # Use the FeatureExtractor to convert the data mFeatureExtractor.extractFeatures() mFeatureExtractor.applyFeatureFilter() # Dump pre-trained data if specified by user if m_dumpFile is not None: mFeatureExtractor.setOutCSVPath(m_dumpFile) mFeatureExtractor.writeFeaturesToCSV() # Try to read in the stored classifier try: clf = joblib.load(clfDumpLoc) except FileNotFoundError: print("Error! No classifier binary file found.") print("Did you train a classifier yet??") return # Get the output idx and remove the appropriate column from the input outputIdx = mFeatureExtractor.listIdx("loan_status") data = mFeatureExtractor.getTrainingData() data = np.delete(data, outputIdx, 1) # Standarize data to zero mean and unit variance - this should ideally # be same as classifier's scaling factor with open(scalerDumpLoc, "rb") as f: scaler = pickle.load(f) data = scaler.transform(data) # Loop through all of the inputs and make a prediction print() for sample in data: if clf.predict(sample) == 0: prediction = "Charged Off" result = 0 else: prediction = "Fully Paid" result = 1 print("Predicted outcome of loan: %s" % prediction) print("Certainty in outcome is: %.1f percent" % (clf.predict_proba(sample)[0][result] * 100)) print() print(clf) print()