Пример #1
0
def setup_workspace():
    """ Returns 0 on success, non-0 on failure."""
    Clean.clean()

    copy_git_hooks()

    # Generate and build
    result = GenerateAndBuild.generate_and_build()
    if result != 0:
        print("generate_and_build failed with return value '{}'".format(result))
        return result

    return 0
def binary_naive_bayes():
    model = nb.NaiveBayesModel()
    clean = cn.DataCLean()
    doc_vector = dv.DocumentVector()
    df_clean, uniqueWords = clean.Clean()
    df_clean_test, df_clean_train = split(
        df_clean, 0, int(.3 * (df_clean['class'].count())))
    docVector = doc_vector.binary_docvector(df_clean_train, uniqueWords)
    # print(docVector)
    df_WordGivenPI, df_WordGivenNoPi, Prob_PI, Prob_NoPI, numWordsInPI, numWordsInNoPI = model.TrainModel(
        docVector, uniqueWords)
    # print("Model Trained")
    predict_df, test_data = model.predict(Prob_PI, Prob_NoPI, uniqueWords,
                                          df_WordGivenPI, df_WordGivenNoPi,
                                          numWordsInPI, numWordsInNoPI,
                                          df_clean_test, clean)

    print(
        "--------------Binary Naive Bayes Accuracy Stats---------------------------"
    )
    stats = em.Evaluate()
    TP, FN, TN, FP = stats.confusion_matrix(test_data, predict_df)
    print("Accuracy = ", stats.Accuracy(TP, TN, FP, FN))
    print("Precision = ", stats.Precision(TP, FP))
    print("Recall = ", stats.Recall(TP, FN))
    print("fScore = ", stats.fScore(TP, FN, FP))
    print("True Negative = ", stats.TrueNegative(TN, FP))
    print(
        "---------------------------------------------------------------------"
    )
Пример #3
0
def process_file(path):
    with open(path, 'r+') as f:
        #json_objs = json.load(f,encoding='utf-8')
        #frequency =  statistic_word_frequency_json(json_objs)
        print path
        frequency = Counter()
        reader = csv.reader(f)
        next(reader)
        for l in reader:
            line = Clean.clean_data(l[-1], Sign)
            frequency = statistic_word_frequency_from_str(line) + frequency
        return frequency
Пример #4
0
def setup_workspace():
	""" Returns 0 on success, non-0 on failure.
	"""
	Clean.clean()

	# Copy Git hooks.
	try:
		# First, remove any existing hooks.
		shutil.rmtree(Directories.GIT_HOOKS_ROOT_DIR, ignore_errors=True)

		# Next, use the hooks specified in the depot.
		shutil.copytree(src=Directories.GIT_HOOKS_DEPOT_DIR, dst=Directories.GIT_HOOKS_ROOT_DIR)
	except Error as err:
		print("Error copying Git hooks: '{}'.".format(err))

	# Generate and build
	result = GenerateAndBuild.generate_and_build()
	if result != 0:
		print("generate_and_build failed with return value '{}'".format(result))
		return result

	return 0
def binary_naive_bayes_kfold():
    model = nb.NaiveBayesModel()
    clean = cn.DataCLean()
    doc_vector = dv.DocumentVector()
    final_df, df = clean.extract(pathData)
    count = 0
    start = -200
    end = 0
    accuracy = []
    precision = []
    recall = []
    fscore = []
    true_neg = []
    stats = em.Evaluate()
    for count in range(5):
        start = start + 200
        end = end + 200
        df_test, df_train = split(final_df, start, end)
        # print(df_train)
        li_clean_text, df_clean = clean.clean_data(df_train)
        uniqueWords = clean.make_unique_li(li_clean_text)
        # # print(uniqueWords)
        docVector = doc_vector.binary_docvector(df_clean, uniqueWords)
        df_WordGivenPI, df_WordGivenNoPi, Prob_PI, Prob_NoPI, numWordsInPI, numWordsInNoPI = model.TrainModel(
            docVector, uniqueWords)
        predict_df, punc_df = model.predict(Prob_PI, Prob_NoPI, uniqueWords,
                                            df_WordGivenPI, df_WordGivenNoPi,
                                            numWordsInPI, numWordsInNoPI,
                                            df_test, clean)
        # print("--------------Naive Bayes Accuracy Stats---------------------------")
        TP, FN, TN, FP = stats.confusion_matrix(punc_df, predict_df)
        accuracy.append(stats.Accuracy(TP, TN, FP, FN))
        precision.append(stats.Precision(TP, FP))
        recall.append(stats.Recall(TP, FN))
        fscore.append(stats.fScore(TP, FN, FP))
        true_neg.append(stats.TrueNegative(TN, FP))
        # print("---------------------------------------------------------------------")
    print(
        "---------------------------------------------------------------------"
    )
    print("Binary Naive Bayes wit k-fold Accuracy Stats")
    print("accuracy = ", accuracy)
    print("precison = ", precision)
    print("recall = ", recall)
    print("f-score = ", fscore)
    print("True Negative = ", true_neg)
    print("accuracy = ", Average(accuracy))
    print("precison = ", Average(precision))
    print("recall = ", Average(recall))
    print("f-score = ", Average(fscore))
    print("true negative = ", Average(true_neg))
Пример #6
0
def generatingTrainSet():
    _dcl = cl.DataCLean()
    final_df, uniqueWords = _dcl.Clean()
    _dv = dv.DocumentVector()
    # docVector = _dv.tf_idf(final_df, uniqueWords)
    docVector = _dv.DocVector(final_df, uniqueWords)
    # docVector = _dv.binary_docvector(final_df, uniqueWords)

    # -------------------------------------------------------------------------
    # using textblob dict approach
    # import NaiveBayesTextBlob as tb

    # polarity_docVector = tb.text_blob(docVector, uniqueWords)
    # docVector = polarity_docVector
    # -------------------------------------------------------------------------

    df = docVector.values
    X_train, Y = df[:, :-1], df[:, -1]
    Y_train = convert_to_0_or_1(Y)
    return (X_train, Y_train)
    def restoreImage(self):
        '''Passes the current image through the restoration model, displaying the resulting restored image.'''

        if self.image_filename is None:
            QtHelper.dialog(
                'Before restoring an image, please open it using File->Open (Ctrl+O)'
            )
            return

        #QtHelper.dialog( 'image restoration is not yet implemented' )

        input_filename = self.image_filename
        output_filename = 'temp_restored_image.png'

        Clean.Clean(input_filename[0], output_filename)

        read_image = FileIO.readImage(output_filename)

        if read_image is None:
            pass  # Error (very weird)
        else:
            self.image = read_image
            self.image_filename = output_filename
            self.image_label.setPixmap(QPixmap(self.image))
Пример #8
0
def generate_str_file(Dir, file):
    str_path = Dir + os.sep + 'clean/'
    if not os.path.isdir(str_path):
        os.mkdir(str_path)
    if os.path.isfile(str_path + 'str.txt'):
        cmd = raw_input("生成的文件已经存在,是否覆盖 y/n : ")
        if cmd == 'y':
            os.remove(str_path + 'str.txt')
        else:
            return str_path + 'str.txt'

    with open(Dir + os.sep + file, 'r+') as f:
        with open(str_path + 'str.txt', 'a+') as w:

            #json_objs = json.load(f,encoding='utf-8')
            #frequency =  statistic_word_frequency_json(json_objs)
            print(Dir + os.sep + file)
            reader = csv.reader(f)
            next(reader)
            for l in reader:
                line = Clean.clean_data(l[-1], Sign)
                if line.strip() != '':
                    w.writelines(line + '\n')
    return str_path + 'str.txt'
Пример #9
0
import matplotlib.pyplot as plt
import datetime
import numpy as np


# dictionary that contains all the filenames
filenames = FileNameReading.get_file_names()

all_sensors = []

for i in filenames.keys():
    current_sensor = []

    data = Parsing.parse(i)
    print("Current file being read is " + i)
    data = Clean.remove_empty(data)
    for row in data:
        for k, v in row.items():
            if k == "Timestamp":
                line = row[k].split(' ')
                second_value = line[1].split('A') or line[1].split('P')
                row[k] = ((line[0]), (second_value[0]))
                # row[k] = (v, str(v))
        current_sensor.append(row)
        # datetime.datetime.strptime()
    all_sensors.append(current_sensor)

# print(all_sensors)

x = []
y = []
Пример #10
0
# Deleting outliers as identified by EDA
train.drop(train[(train['grlivarea'] > 4000)
                 & (train['saleprice'] < 300000)].index,
           inplace=True)

train_objs_num = len(train)
y = train['saleprice']
dataset = pd.concat(objs=[train.drop(columns=['saleprice']), test],
                    axis=0,
                    ignore_index=True)
all_data = dataset.copy()
all_data.shape

import Clean, Feature_Engineering, Simple_Stacking
all_data = Clean.model(all_data, train_objs_num)
all_data = Feature_Engineering.model(all_data)

## Apply Log tranformation to target variable as it is right skewed
y = np.log1p(y)
f_train = all_data[:train_objs_num]
f_test = all_data[train_objs_num:]

predictions = Simple_Stacking.model(f_train, y, f_test)

final_predictions = predictions

submission = pd.DataFrame({
    'Id': sub_index,
    'SalePrice': final_predictions.astype(float)
})
Пример #11
0
    import Mark
    import Edit
    import Init
    import Move
    import Remove
    import Show
except ImportError, e:
    try:
        why = "(" + str(e) + ")"
    except:
        why = ""
    print("Cannot import package's commands " + why + " ...")
    sys.exit(-1)

commands = {
    'add': Add.SubCommand(),
    'clean': Clean.SubCommand(),
    'config': Config.SubCommand(),
    'mark': Mark.SubCommand(),
    'edit': Edit.SubCommand(),
    'init': Init.SubCommand(),
    'move': Move.SubCommand(),
    'remove': Remove.SubCommand(),
    'show': Show.SubCommand(),
}

# Test
if (__name__ == '__main__'):
    debug("Test completed")
    sys.exit(0)
Пример #12
0
import Clean
import pickle
import os
import sys
separator = ""
if sys.platform == 'win32':
    separator = "\\"
else:
    separator = "/"
with open(os.path.dirname(os.path.abspath(__file__))+separator+"settings.txt", 'rb') as setting_file:
    folders = pickle.load(setting_file)

Clean.main(folders)
Пример #13
0
import Clean
Clean.main()
Пример #14
0
def generate_json_file():
    Clean.clean_process('./csv')
Пример #15
0
import Clean
import pickle
import os
import sys
separator = ""
if sys.platform == 'win32':
    separator = "\\"
else:
    separator = "/"
with open(
        os.path.dirname(os.path.abspath(__file__)) + separator +
        "settings.txt", 'rb') as setting_file:
    folders = pickle.load(setting_file)

Clean.main(folders)
Пример #16
0
import Clean
import unittest
import HTMLTestRunner
# from Common.Report import HTML_FILE_PATH, HTML_FILE_NAME

# 一键清理日志,测试数据,测试报告,测试截图
Clean.Clean()
# from Common.RunTime_Log import Log

Case_Dir = r'E:\ZenTao_Test_Project\TestCases'
# file = open(HTML_FILE_PATH + HTML_FILE_NAME, 'wb')
suite = unittest.defaultTestLoader.discover(
    Case_Dir, pattern='Test_Case_ZenTao_Login.py')
# runner = HTMLTestRunner.HTMLTestRunner(stream=file, title='禅道登录测试报告', description='用例执行情况')
# runner.run(suite)
# Log().W_log_warning("写入测试报告")
print('测试结束')
# file.close()
Пример #17
0
import argparse
import os
import pandas as pd

#from Sample3 import __dframe__
#from Sample3 import dataFrameTest

parser = argparse.ArgumentParser(description = " This is program for analysing ")
parser.add_argument('-c','--clean',help="Clean the dataset",action = "store_true")
parser.add_argument('-a','--analyse',help="Implement classifier",action="store_true")
send_args=parser.parse_args()



if (send_args.clean) :
    import Clean
    Clean.cleanTestData()
    Clean.cleanTrainData()
    print " Cleaning Task Successful"
elif  (send_args.analyse) : 
    import Implement
    print " The analysis : is :\n"
    Implement.getRandomForestClassifier()
    Implement.getNaiveBayesClassifier()
    Implement.getLogisticRegression()
else :
    print "Run with parameters "
Пример #18
0
def normpath(path):
  result = Clean(path)
  if isinstance(path, unicode):
    return unicode(result, 'utf-8')
  return result