def read_file(file_to_read): _X = [] _Y = [] test_X = [] test_Y = [] #the file should be a .arff file if (not (file_to_read.endswith('.arff'))): print("File Should end with .arff") quit(0) data, meta = arff.loadarff(file_to_read) #if i get every 5th element it should be 20% of the total counter = 0 split = 4 for i in range(0, len(data)): data_list = data[i].tolist() data_list = list(data_list) label = data_list[-1].decode('utf-8') del (data_list[-1]) if counter == split: test_X.append(data_list) test_Y.append(label) counter = 0 else: _X.append(data_list) _Y.append(label) counter += 1 return _X, _Y, test_X, test_Y
def load_binary_class_data(arff_file): """Returns 2 values - training instances and labels from an ARFF file. Labels should have attribute 'Class' in the ARFF file. We assume that the first and second values are the negative and positive labels respectively. Then, negative labels are labeled 0 and positive labels are labeled 1. Returns: X - n_instances-length list of n_feat-length lists y - n_instances-length list of integers, either of value 0 or 1. """ # Load ARFF file data, metadata = arff.loadarff(arff_file) # Get the labels for attribute 'Class' # We assume that the first and second values are the negative and positive # labels respectively norminality, labels = metadata['Class'] negative_label, positive_label = labels # Convert ARFF data to X and y # X is a list of instances, where each instance is itself a list of # features # y is a list of labels either 0 or 1 X, y = [], [] for instance in data: # Split instance into a list of features and a string label features = list(instance.tolist()[:-1]) string_label = instance[-1] # Check that string label is one of the possible labels assert string_label in (positive_label, negative_label) # Convert label from string to 0 or 1 integer_label = 0 if string_label == negative_label else 1 # Push into matrices X.append(features) y.append(integer_label) return X, y, negative_label, positive_label
from flask import ( Flask, render_template, redirect, request, ) from mlxtend.frequent_patterns import ( apriori, association_rules, ) from scipy.io import arff load_dotenv() app = Flask(__name__) data, meta = arff.loadarff(open('datasets/supermarket.arff', 'r')) df = pd.DataFrame(data).drop('total', axis=1) map_f = lambda v: 0 if (v == b'?' or v == b'low') else 1 df = df.applymap(map_f) frequent_itemsets = apriori(df, min_support=0.1, use_colnames=True) rules = association_rules(frequent_itemsets, metric='lift', min_threshold=1) itemset_count = len(frequent_itemsets) rules_count = len(rules) items = sorted( set(itertools.chain.from_iterable(frequent_itemsets.itemsets.values))) basket = set()
# Parse arguments parser = optparse.OptionParser() options, args = parser.parse_args() assert len(args) == 3 # First positional argument: name of ARFF file # Second positional argument: number of minimum instances to allow a node to # split train_filename, test_filename, min_instances = args min_instances = int(min_instances) # Cast to integer #################### Declare inputs for learning #################### # Load ARFF file data, metadata = arff.loadarff(train_filename) # Change data to Python native list of lists # data_list = [[x for x in list_] for list_ in data] data_list = [list_ for list_ in data] # Length n+1 list of booleans for whether each feature is norminal # Feature is numeric if it's not norminal # The additional 1 is the class feature type norminalities = [type_ == "nominal" for type_ in metadata.types()] # enumeration i is a tuple of all possible values of feature i value_enumerations = [] for name in metadata.names(): norminality, value_enumeration = metadata[name] value_enumerations.append(value_enumeration)
import matplotlib.pyplot as plt # Parse arguments parser = optparse.OptionParser() options, args = parser.parse_args() assert len(args) == 2 # First positional argument: name of ARFF file # Second positional argument: number of minimum instances to allow a node to # split train_filename, test_filename = args #################### Declare inputs for learning #################### # Load ARFF file data, metadata = arff.loadarff(train_filename) # Change data to Python native list of lists #data_list = [[x for x in list_] for list_ in data] data_list = [list_ for list_ in data] # Length n+1 list of booleans for whether each feature is norminal # Feature is numeric if it's not norminal # The additional 1 is the class feature type norminalities = [type_ == 'nominal' for type_ in metadata.types()] # enumeration i is a tuple of all possible values of feature i value_enumerations = [] for name in metadata.names(): norminality, value_enumeration = metadata[name] value_enumerations.append(value_enumeration)
test_F1 = SVC(train_Data, train_Defective, test_Data, test_Defective) except: func(myset) return test_auc, test_F1 '''新建数据表''' wb = Workbook() ws = wb.active '''fileList = ['CM1', 'KC1', 'KC3', 'MC2', 'MW1', 'PC1', 'PC2', 'PC3', 'PC4', 'JM1', 'MC1']#, 'PC5']''' '''JM1 MC1 PC5''' '''for filename in fileList:''' filename = 'CM1' ws.append([filename]) '''导入数据''' dataset, mate = arff.loadarff(filename + '.arff') #dataset = arff.load(open(filename+'.arff')) #dataset_list = list(dataset) #originData = np.array(dataset) dataset = pandas.DataFrame(dataset) originData = np.array(dataset) data, instance, attribute, Defective = getdata(originData) DefectiveNum = getDefectiveNum(Defective) '''标准化''' data = data.astype('float64') scaled = preprocessing.scale(data) #print(scaled) '''计算k''' '''k = np.sum(Defective == "N")/np.sum(Defective == "Y") if k == k: k = int(k)
import numpy as np import arff data = arff.load(open('chronic_kidney_disease_full.arff', 'rb')) print(data) data, meta = arff.loadarff('chronic_kidney_disease_full.arff') print(data) print(meta) class LogReg: def __init__(self, loops, a=0.01): self.lr = a self.loops = loops def cost(self, h, y): return (-y * np.log(h) - (1 - y) * np.log(1 - h)).mean() def sigmoid(z): y = 1 / (1 + np.exp(-z)) return y def fit(self, X, y): # weights vector self.theta = np.zeros(X.shape[1]) # weights training for i in range(self.loops):