Пример #1
0
def read_file(file_to_read):
    _X = []
    _Y = []
    test_X = []
    test_Y = []
    #the file should be a .arff file
    if (not (file_to_read.endswith('.arff'))):
        print("File Should end with .arff")
        quit(0)
    data, meta = arff.loadarff(file_to_read)
    #if i get every 5th element it should be 20% of the total
    counter = 0
    split = 4
    for i in range(0, len(data)):
        data_list = data[i].tolist()
        data_list = list(data_list)
        label = data_list[-1].decode('utf-8')
        del (data_list[-1])
        if counter == split:
            test_X.append(data_list)
            test_Y.append(label)
            counter = 0
        else:
            _X.append(data_list)
            _Y.append(label)
            counter += 1
    return _X, _Y, test_X, test_Y
Пример #2
0
def load_binary_class_data(arff_file):
    """Returns 2 values - training instances and labels from an ARFF file.

    Labels should have attribute 'Class' in the ARFF file.  We assume that the
    first and second values are the negative and positive labels respectively.
    Then, negative labels are labeled 0 and positive labels are labeled 1.

    Returns:
    X - n_instances-length list of n_feat-length lists
    y - n_instances-length list of integers, either of value 0 or 1.
    """

    # Load ARFF file
    data, metadata = arff.loadarff(arff_file)

    # Get the labels for attribute 'Class'
    # We assume that the first and second values are the negative and positive
    # labels respectively
    norminality, labels = metadata['Class']
    negative_label, positive_label = labels

    # Convert ARFF data to X and y
    # X is a list of instances, where each instance is itself a list of
    # features
    # y is a list of labels either 0 or 1
    X, y = [], []
    for instance in data:

        # Split instance into a list of features and a string label
        features = list(instance.tolist()[:-1])
        string_label = instance[-1]

        # Check that string label is one of the possible labels
        assert string_label in (positive_label, negative_label)

        # Convert label from string to 0 or 1
        integer_label = 0 if string_label == negative_label else 1

        # Push into matrices
        X.append(features)
        y.append(integer_label)

    return X, y, negative_label, positive_label
from flask import (
    Flask,
    render_template,
    redirect,
    request,
)
from mlxtend.frequent_patterns import (
    apriori,
    association_rules,
)
from scipy.io import arff

load_dotenv()
app = Flask(__name__)

data, meta = arff.loadarff(open('datasets/supermarket.arff', 'r'))
df = pd.DataFrame(data).drop('total', axis=1)

map_f = lambda v: 0 if (v == b'?' or v == b'low') else 1
df = df.applymap(map_f)

frequent_itemsets = apriori(df, min_support=0.1, use_colnames=True)
rules = association_rules(frequent_itemsets, metric='lift', min_threshold=1)

itemset_count = len(frequent_itemsets)
rules_count = len(rules)

items = sorted(
    set(itertools.chain.from_iterable(frequent_itemsets.itemsets.values)))
basket = set()
# Parse arguments
parser = optparse.OptionParser()
options, args = parser.parse_args()
assert len(args) == 3

# First positional argument: name of ARFF file
# Second positional argument: number of minimum instances to allow a node to
# split
train_filename, test_filename, min_instances = args
min_instances = int(min_instances)  # Cast to integer

#################### Declare inputs for learning ####################

# Load ARFF file
data, metadata = arff.loadarff(train_filename)

# Change data to Python native list of lists
# data_list = [[x for x in list_] for list_ in data]
data_list = [list_ for list_ in data]

# Length n+1 list of booleans for whether each feature is norminal
# Feature is numeric if it's not norminal
# The additional 1 is the class feature type
norminalities = [type_ == "nominal" for type_ in metadata.types()]

# enumeration i is a tuple of all possible values of feature i
value_enumerations = []
for name in metadata.names():
    norminality, value_enumeration = metadata[name]
    value_enumerations.append(value_enumeration)
Пример #5
0
import matplotlib.pyplot as plt

# Parse arguments
parser = optparse.OptionParser()
options, args = parser.parse_args()
assert len(args) == 2

# First positional argument: name of ARFF file
# Second positional argument: number of minimum instances to allow a node to
# split
train_filename, test_filename = args

#################### Declare inputs for learning ####################

# Load ARFF file
data, metadata = arff.loadarff(train_filename)

# Change data to Python native list of lists
#data_list = [[x for x in list_] for list_ in data]
data_list = [list_ for list_ in data]

# Length n+1 list of booleans for whether each feature is norminal
# Feature is numeric if it's not norminal
# The additional 1 is the class feature type
norminalities = [type_ == 'nominal' for type_ in metadata.types()]

# enumeration i is a tuple of all possible values of feature i
value_enumerations = []
for name in metadata.names():
    norminality, value_enumeration = metadata[name]
    value_enumerations.append(value_enumeration)
Пример #6
0
        test_F1 = SVC(train_Data, train_Defective, test_Data, test_Defective)
    except:
        func(myset)
    return test_auc, test_F1


'''新建数据表'''
wb = Workbook()
ws = wb.active
'''fileList = ['CM1', 'KC1', 'KC3', 'MC2', 'MW1', 'PC1', 'PC2', 'PC3', 'PC4', 'JM1', 'MC1']#, 'PC5']'''
'''JM1 MC1 PC5'''
'''for filename in fileList:'''
filename = 'CM1'
ws.append([filename])
'''导入数据'''
dataset, mate = arff.loadarff(filename + '.arff')
#dataset = arff.load(open(filename+'.arff'))
#dataset_list = list(dataset)
#originData = np.array(dataset)
dataset = pandas.DataFrame(dataset)
originData = np.array(dataset)
data, instance, attribute, Defective = getdata(originData)
DefectiveNum = getDefectiveNum(Defective)
'''标准化'''
data = data.astype('float64')
scaled = preprocessing.scale(data)
#print(scaled)
'''计算k'''
'''k = np.sum(Defective == "N")/np.sum(Defective == "Y")
if k == k:
    k = int(k)
Пример #7
0
import numpy as np
import arff
data = arff.load(open('chronic_kidney_disease_full.arff', 'rb'))
print(data)
data, meta = arff.loadarff('chronic_kidney_disease_full.arff')
print(data)
print(meta)


class LogReg:

    def __init__(self, loops, a=0.01):
        self.lr = a
        self.loops = loops



    def cost(self, h, y):
        return (-y * np.log(h) - (1 - y) * np.log(1 - h)).mean()

    def sigmoid(z):
        y = 1 / (1 + np.exp(-z))
        return y

    def fit(self, X, y):

        # weights vector
        self.theta = np.zeros(X.shape[1])
        # weights training
        for i in range(self.loops):