Пример #1
0
    def on_click(self):
        """
        'Load csv file' button's on_click call-back listener. It will do a couple of things:
        1. Get the filename of the csv file
        2. Load csv file, and convert the data into the grid data for the Sudoku solver class
        3. Open up a pygame graph to show the simulation of solving the puzzle
        """
        init_dir = './Data'
        name = 'Choose your file'
        filetype = (("csv files","*.csv"))

        self.get_filename(init_dir, name, filetype)
        self.grid = utility.load_csv(self.filename)

        while len(self.grid) != 81 or len(set(self.grid).difference(set('1234567890'))) != 0:
            """
            This part checks if the csv file is in correct format: 9 * 9 grid with only '1234567890' digits
            """
            self.pop_up("Wrong content in csv file. Make sure it is a 9 * 9 grid with only '1234567890' digits")
            self.get_filename(init_dir, name, filetype)
            self.grid = utility.load_csv(self.filename)

        # Solve the puzzle here
        self.sudoku = Solver(self.grid)
        t, final_values = self.sudoku.solve()
        if final_values == False:
            self.pop_up('There is no solution for this puzzle. Try another one!')
            return
        else:
            # If puzzle is successfully solved, write to csv file and open pygame to demo the result
            output = self.filename[:-4] + '_sol.csv'
            utility.write_csv(output, self.sudoku.final_values)
            self.open_pygame()
Пример #2
0
def compute_predictions_metrics(name, algorithm, current_results, labels):
    # Compute for all each (dataset, algorithm) couple the score for all criterion, on the RUN_NUMBER runs
    # Note : This function currently recompute all metrics if at least one value is missing (cf. dataset_already_processed() function)

    with open(RESULTS_FILE, 'a+') as result_file:

        # For each of the RUN_NUMBER prediction for this case :
        for run_number in range(RUN_NUMBER):

            # Loading prediction
            print("\tCOMPUTING METRICS VALUES FOR PREDICTIONS WITH " +
                  algorithm + "(" + str(run_number + 1) + "/" +
                  str(RUN_NUMBER) + ")")
            clustering_prediction = utility.load_csv(
                PREDICTED_CLUSTERING_FOLDER + name + "_" + algorithm + "_" +
                str(run_number))

            # Computing metrics for the prediction number "run_number"
            metrics_dictionnary = compute_prediction_metrics(
                labels, clustering_prediction)

            # Saving values
            for criteria in CRITERION_LIST:
                if current_results.find(name + "\t" + algorithm + "\t" +
                                        criteria + "\t" +
                                        str(run_number)) == -1:
                    result_file.write(name + "\t" + algorithm + "\t" +
                                      criteria + "\t" + str(run_number) +
                                      "\t" +
                                      str(metrics_dictionnary.get(criteria)) +
                                      "\n")
    result_file.close()
Пример #3
0
    def from_nodes_csv(file_path):
        obj = Tree()
        data = load_csv(file_path)[1:]
        for v in data:
            node = Node(v[0], v[1])
            obj.attach_node(node)

        return obj
Пример #4
0
    # positional
    parser.add_argument("table", type=str, help="data source table name")
    # optional
    parser.add_argument("-x", "--crosstab", type=str, help="crosstab output")
    parser.add_argument("-c", "--column", type=str, help="column name in the table")
    parser.add_argument("-r", "--row", type=int, help="number of rows to select")
    parser.add_argument("-n", "--bins", type=int, help="number of bins to select")
    args = parser.parse_args()

    # identify csv / teradata
    iscsv = args.table.endswith(".csv")

    # read csv / teradata
    if iscsv:
        try:
            df = load_csv(args.table, args.row)
        except:
            print("Oops!  Data table not found...")
            sys.exit
    else:
        try:
            cnxn = connect_td()
        except:
            print("Oops!  Fail to connect to TD...")
            sys.exit
        try:
            df = load_td(args.table, cnxn, args.row)
        except:
            print("Oops!  Data table not found...")
            sys.exit
Пример #5
0
from categorize import Categorizer
import utility as ut
import explore as ex
import model as ml
import numpy as np
import transform as tf
from imp import reload
reload(ut)

from sklearn.pipeline import Pipeline
from sklearn.impute import SimpleImputer

data_file = "./data/sales_train.csv"

df = ut.load_csv(data_file)
df.shape
df.columns
df.describe()

ex.get_type_dict(df)

# -----------------------------------------------------------------------------
# clean data: remove nan and remove zero target
# find missing values
ex.count_missing(df)

# remove zero contract price
targetcol = 'CL_Contract_Price'
reload(tf)
reload(ml)
Пример #6
0
import pandas as pd
import os, sys
# sys.path
mlibpath = r"C:\Users\m038402\Documents\myWork\pythoncodes\mlib"
sys.path.append(mlibpath)

from categorize import Categorizer
import utility as ut
import explore as ex 
import model as ml

data_file = "./data/data.csv"
TARGET_F ="TARGET_F"

df = ut.load_csv(data_file)
df.head()

# normal sklearn split
# train_df, test_df = ut.train_test_split_with_target(df, test_size=0.2)
# df must be a data frame
train_df, test_df, train_target, test_target = ut.train_test_split_with_target(df, test_size=0.2, target=TARGET_F)

train_df.shape
test_df.shape
train_target.shape
test_target.shape

train_df.columns

trainX = ml.full_pipeline.fit_transform(train_df)
testX = ml.full_pipeline.fit_transform(test_df)
Пример #7
0
# sys.path
import sys
mlibpath = r"C:\Users\m038402\Documents\myWork\pythoncodes\mlib"
sys.path.append(mlibpath)

from utility import load_csv, load_td, connect_td
from categorize import Categorizer

data_file = "./data/data.csv"

df = load_csv(data_file)
df.head()

# -------------------------------------------------------------------
# categorizer
bprofile = Categorizer(df, "TARGET_F")

sdf = df.copy()
# mutable...
bprofile.categorize(sdf)
sdf.head()
df.head()
bprofile.get_df_column_type()
bprofile.get_likelihood("CUNCZZ_AGE_YEARS", False)

bprofile = Categorizer(df, "TARGET_F")
bprofile.df.head()
bprofile.get_crosstab("CUNCZZ_AGE_YEARS", False)
# bprofile.unique_values
# bprofile.num_bins
Пример #8
0
from benchmark import Benchmark
from invocation import Invocation
from execution import Execution

ONLY_QCOMP_2020_BENCHMARKS = False
QComp2020_benchmarks = []
#
if ONLY_QCOMP_2020_BENCHMARKS and len(QComp2020_benchmarks) == 0:
    from utility import load_csv
    print(
        "\n\nNOTE: Will filter on the 100 benchmarks selected for QComp 2020\n"
    )
    QComp2020_benchmarks = [
        b[0] + "." + b[3] + "." + b[4]
        for b in load_csv("qcomp2020_benchmarks.csv")
    ]
    assert (len(QComp2020_benchmarks) == 100)


def get_name():
    """
	Returns the name of the tool as listed on http://qcomp.org/competition/2020/
	"""
    return "DFTRES"  # https://doi.org/10.1016/j.ress.2019.02.004


def is_benchmark_supported(benchmark: Benchmark):
    """
	Returns True if the provided benchmark is supported by the tool and
	if the given benchmark should appear on the generated benchmark list
	"""
Пример #9
0
        cols.append('Label')  #如果是ARMA等时序预测数据,则自己提供Label
    if 'Date' not in df1.columns:
        cols.append('Date')  #如果是ARMA等时序预测数据,则自己提供Date
    if 'StockID' not in df1.columns:
        cols.append('StockID')  #如果是ARMA等时序预测数据,则自己提供StockID
    return addColumns(df1, df2, 'FileName', cols)


#split_date = '2015-11-22' #这是小数据baseline的评估起始点
#split_year,split_month,split_day = 2015,11,22
split_date = '2016-03-23'  #大数据
split_year, split_month, split_day = 2016, 3, 23

bARMAWeight = False
fnTmpF = load_csv(
    'validation\\0117bkupFeatureARMA\\svm-big-rbf-OpenClose-noerror-0117-addFeatureARMA.csv'
)
fnFilter = fnTmpF['FileName'].values
#fnBuyList='validation\\TotalreturnResult_1.csv'
fnBuyList = 'validation\\0117bkupFeatureARMA\\svm-big-rbf-OpenClose-noerror-0117-addFeatureARMA.csv'
#fnBuyList='validation\\best 50-100-fm-big-OpenClose-noerror-0117-addFeatureARMA - No_FM.csv'

#fnBuyList='validation\\0117bkupFeatureARMA\\knn-big-OpenClose-noerror-0117-addFeatureARMA.csv'
#fnBuyList='validation\\TotalreturnResult_1.csv'

fndFMData = 'data\\dFM-big-oldLabel.csv'  #包含情感倾向和涨跌信息
fnErrorSentiment = 'validation\\errorFileName-baselinebig.txt'

dfM = pandas.read_csv(fndFMData, encoding='utf-8')

#这是ARMA时序预测模型的结果