def setup_workspace(): """ Returns 0 on success, non-0 on failure.""" Clean.clean() copy_git_hooks() # Generate and build result = GenerateAndBuild.generate_and_build() if result != 0: print("generate_and_build failed with return value '{}'".format(result)) return result return 0
def binary_naive_bayes(): model = nb.NaiveBayesModel() clean = cn.DataCLean() doc_vector = dv.DocumentVector() df_clean, uniqueWords = clean.Clean() df_clean_test, df_clean_train = split( df_clean, 0, int(.3 * (df_clean['class'].count()))) docVector = doc_vector.binary_docvector(df_clean_train, uniqueWords) # print(docVector) df_WordGivenPI, df_WordGivenNoPi, Prob_PI, Prob_NoPI, numWordsInPI, numWordsInNoPI = model.TrainModel( docVector, uniqueWords) # print("Model Trained") predict_df, test_data = model.predict(Prob_PI, Prob_NoPI, uniqueWords, df_WordGivenPI, df_WordGivenNoPi, numWordsInPI, numWordsInNoPI, df_clean_test, clean) print( "--------------Binary Naive Bayes Accuracy Stats---------------------------" ) stats = em.Evaluate() TP, FN, TN, FP = stats.confusion_matrix(test_data, predict_df) print("Accuracy = ", stats.Accuracy(TP, TN, FP, FN)) print("Precision = ", stats.Precision(TP, FP)) print("Recall = ", stats.Recall(TP, FN)) print("fScore = ", stats.fScore(TP, FN, FP)) print("True Negative = ", stats.TrueNegative(TN, FP)) print( "---------------------------------------------------------------------" )
def process_file(path): with open(path, 'r+') as f: #json_objs = json.load(f,encoding='utf-8') #frequency = statistic_word_frequency_json(json_objs) print path frequency = Counter() reader = csv.reader(f) next(reader) for l in reader: line = Clean.clean_data(l[-1], Sign) frequency = statistic_word_frequency_from_str(line) + frequency return frequency
def setup_workspace(): """ Returns 0 on success, non-0 on failure. """ Clean.clean() # Copy Git hooks. try: # First, remove any existing hooks. shutil.rmtree(Directories.GIT_HOOKS_ROOT_DIR, ignore_errors=True) # Next, use the hooks specified in the depot. shutil.copytree(src=Directories.GIT_HOOKS_DEPOT_DIR, dst=Directories.GIT_HOOKS_ROOT_DIR) except Error as err: print("Error copying Git hooks: '{}'.".format(err)) # Generate and build result = GenerateAndBuild.generate_and_build() if result != 0: print("generate_and_build failed with return value '{}'".format(result)) return result return 0
def binary_naive_bayes_kfold(): model = nb.NaiveBayesModel() clean = cn.DataCLean() doc_vector = dv.DocumentVector() final_df, df = clean.extract(pathData) count = 0 start = -200 end = 0 accuracy = [] precision = [] recall = [] fscore = [] true_neg = [] stats = em.Evaluate() for count in range(5): start = start + 200 end = end + 200 df_test, df_train = split(final_df, start, end) # print(df_train) li_clean_text, df_clean = clean.clean_data(df_train) uniqueWords = clean.make_unique_li(li_clean_text) # # print(uniqueWords) docVector = doc_vector.binary_docvector(df_clean, uniqueWords) df_WordGivenPI, df_WordGivenNoPi, Prob_PI, Prob_NoPI, numWordsInPI, numWordsInNoPI = model.TrainModel( docVector, uniqueWords) predict_df, punc_df = model.predict(Prob_PI, Prob_NoPI, uniqueWords, df_WordGivenPI, df_WordGivenNoPi, numWordsInPI, numWordsInNoPI, df_test, clean) # print("--------------Naive Bayes Accuracy Stats---------------------------") TP, FN, TN, FP = stats.confusion_matrix(punc_df, predict_df) accuracy.append(stats.Accuracy(TP, TN, FP, FN)) precision.append(stats.Precision(TP, FP)) recall.append(stats.Recall(TP, FN)) fscore.append(stats.fScore(TP, FN, FP)) true_neg.append(stats.TrueNegative(TN, FP)) # print("---------------------------------------------------------------------") print( "---------------------------------------------------------------------" ) print("Binary Naive Bayes wit k-fold Accuracy Stats") print("accuracy = ", accuracy) print("precison = ", precision) print("recall = ", recall) print("f-score = ", fscore) print("True Negative = ", true_neg) print("accuracy = ", Average(accuracy)) print("precison = ", Average(precision)) print("recall = ", Average(recall)) print("f-score = ", Average(fscore)) print("true negative = ", Average(true_neg))
def generatingTrainSet(): _dcl = cl.DataCLean() final_df, uniqueWords = _dcl.Clean() _dv = dv.DocumentVector() # docVector = _dv.tf_idf(final_df, uniqueWords) docVector = _dv.DocVector(final_df, uniqueWords) # docVector = _dv.binary_docvector(final_df, uniqueWords) # ------------------------------------------------------------------------- # using textblob dict approach # import NaiveBayesTextBlob as tb # polarity_docVector = tb.text_blob(docVector, uniqueWords) # docVector = polarity_docVector # ------------------------------------------------------------------------- df = docVector.values X_train, Y = df[:, :-1], df[:, -1] Y_train = convert_to_0_or_1(Y) return (X_train, Y_train)
def restoreImage(self): '''Passes the current image through the restoration model, displaying the resulting restored image.''' if self.image_filename is None: QtHelper.dialog( 'Before restoring an image, please open it using File->Open (Ctrl+O)' ) return #QtHelper.dialog( 'image restoration is not yet implemented' ) input_filename = self.image_filename output_filename = 'temp_restored_image.png' Clean.Clean(input_filename[0], output_filename) read_image = FileIO.readImage(output_filename) if read_image is None: pass # Error (very weird) else: self.image = read_image self.image_filename = output_filename self.image_label.setPixmap(QPixmap(self.image))
def generate_str_file(Dir, file): str_path = Dir + os.sep + 'clean/' if not os.path.isdir(str_path): os.mkdir(str_path) if os.path.isfile(str_path + 'str.txt'): cmd = raw_input("生成的文件已经存在,是否覆盖 y/n : ") if cmd == 'y': os.remove(str_path + 'str.txt') else: return str_path + 'str.txt' with open(Dir + os.sep + file, 'r+') as f: with open(str_path + 'str.txt', 'a+') as w: #json_objs = json.load(f,encoding='utf-8') #frequency = statistic_word_frequency_json(json_objs) print(Dir + os.sep + file) reader = csv.reader(f) next(reader) for l in reader: line = Clean.clean_data(l[-1], Sign) if line.strip() != '': w.writelines(line + '\n') return str_path + 'str.txt'
import matplotlib.pyplot as plt import datetime import numpy as np # dictionary that contains all the filenames filenames = FileNameReading.get_file_names() all_sensors = [] for i in filenames.keys(): current_sensor = [] data = Parsing.parse(i) print("Current file being read is " + i) data = Clean.remove_empty(data) for row in data: for k, v in row.items(): if k == "Timestamp": line = row[k].split(' ') second_value = line[1].split('A') or line[1].split('P') row[k] = ((line[0]), (second_value[0])) # row[k] = (v, str(v)) current_sensor.append(row) # datetime.datetime.strptime() all_sensors.append(current_sensor) # print(all_sensors) x = [] y = []
# Deleting outliers as identified by EDA train.drop(train[(train['grlivarea'] > 4000) & (train['saleprice'] < 300000)].index, inplace=True) train_objs_num = len(train) y = train['saleprice'] dataset = pd.concat(objs=[train.drop(columns=['saleprice']), test], axis=0, ignore_index=True) all_data = dataset.copy() all_data.shape import Clean, Feature_Engineering, Simple_Stacking all_data = Clean.model(all_data, train_objs_num) all_data = Feature_Engineering.model(all_data) ## Apply Log tranformation to target variable as it is right skewed y = np.log1p(y) f_train = all_data[:train_objs_num] f_test = all_data[train_objs_num:] predictions = Simple_Stacking.model(f_train, y, f_test) final_predictions = predictions submission = pd.DataFrame({ 'Id': sub_index, 'SalePrice': final_predictions.astype(float) })
import Mark import Edit import Init import Move import Remove import Show except ImportError, e: try: why = "(" + str(e) + ")" except: why = "" print("Cannot import package's commands " + why + " ...") sys.exit(-1) commands = { 'add': Add.SubCommand(), 'clean': Clean.SubCommand(), 'config': Config.SubCommand(), 'mark': Mark.SubCommand(), 'edit': Edit.SubCommand(), 'init': Init.SubCommand(), 'move': Move.SubCommand(), 'remove': Remove.SubCommand(), 'show': Show.SubCommand(), } # Test if (__name__ == '__main__'): debug("Test completed") sys.exit(0)
import Clean import pickle import os import sys separator = "" if sys.platform == 'win32': separator = "\\" else: separator = "/" with open(os.path.dirname(os.path.abspath(__file__))+separator+"settings.txt", 'rb') as setting_file: folders = pickle.load(setting_file) Clean.main(folders)
import Clean Clean.main()
def generate_json_file(): Clean.clean_process('./csv')
import Clean import pickle import os import sys separator = "" if sys.platform == 'win32': separator = "\\" else: separator = "/" with open( os.path.dirname(os.path.abspath(__file__)) + separator + "settings.txt", 'rb') as setting_file: folders = pickle.load(setting_file) Clean.main(folders)
import Clean import unittest import HTMLTestRunner # from Common.Report import HTML_FILE_PATH, HTML_FILE_NAME # 一键清理日志,测试数据,测试报告,测试截图 Clean.Clean() # from Common.RunTime_Log import Log Case_Dir = r'E:\ZenTao_Test_Project\TestCases' # file = open(HTML_FILE_PATH + HTML_FILE_NAME, 'wb') suite = unittest.defaultTestLoader.discover( Case_Dir, pattern='Test_Case_ZenTao_Login.py') # runner = HTMLTestRunner.HTMLTestRunner(stream=file, title='禅道登录测试报告', description='用例执行情况') # runner.run(suite) # Log().W_log_warning("写入测试报告") print('测试结束') # file.close()
import argparse import os import pandas as pd #from Sample3 import __dframe__ #from Sample3 import dataFrameTest parser = argparse.ArgumentParser(description = " This is program for analysing ") parser.add_argument('-c','--clean',help="Clean the dataset",action = "store_true") parser.add_argument('-a','--analyse',help="Implement classifier",action="store_true") send_args=parser.parse_args() if (send_args.clean) : import Clean Clean.cleanTestData() Clean.cleanTrainData() print " Cleaning Task Successful" elif (send_args.analyse) : import Implement print " The analysis : is :\n" Implement.getRandomForestClassifier() Implement.getNaiveBayesClassifier() Implement.getLogisticRegression() else : print "Run with parameters "
def normpath(path): result = Clean(path) if isinstance(path, unicode): return unicode(result, 'utf-8') return result