def create_rules():

    fname = './Sample_Data/sample_variants.txt'
    dataset = []
    with open(fname, 'r') as fhandle:
        lines = fhandle.readlines()
        for line in lines:
            if len(line) > 0:
                patientid, genes = line.split('\t')
                genes = genes.split(',')
                tmp = genes[-1]
                tmp = tmp[0:len(tmp) - 1]
                genes[-1] = tmp
                dataset.append(genes)
    print(dataset)
    oht = OnehotTransactions()
    oht_ary = oht.fit(dataset).transform(dataset)
    df = pd.DataFrame(oht_ary, columns=oht.columns_)
    frequent_itemsets = apriori(df, min_support=0.6, use_colnames=True)
    association_rules(frequent_itemsets,
                      metric="confidence",
                      min_threshold=0.5)
    rules = association_rules(frequent_itemsets,
                              metric="lift",
                              min_threshold=1.0)
    return rules
Пример #2
0
def get_itemsets(inputdata):
    oht = OnehotTransactions()
    oht_ary = oht.fit(inputdata).transform(inputdata)
    df = pd.DataFrame(oht_ary, columns=oht.columns_)
    frequent_itemsets = apriori(df,
                                min_support=0.1,
                                max_len=4,
                                use_colnames=True)
    #rules = association_rules(frequent_itemsets, metric="lift", min_threshold=1.0)
    frequent_itemsets.to_csv('AANFrequentItemsets.csv')
def test_cloning():

    oht = OnehotTransactions()
    oht.fit(dataset)
    oht2 = clone(oht)

    msg = ("'OnehotTransactions' object has no attribute 'columns_'")
    assert_raises(AttributeError, msg, oht2.transform, dataset)

    trans = oht2.fit_transform(dataset)
    np.testing.assert_array_equal(expect, trans)
	def get_oht_dataframe(self):
		self.short_list = self.get_processed_list()
		if len(self.short_list) < 15000:
			self.short_list = self.short_list[:]
		else:
			self.short_list = self.short_list[:15000]
		print(len(self.short_list))
		oht = OnehotTransactions()
		oht_ary = oht.fit(self.short_list).transform(self.short_list)
		data_frame = pd.DataFrame(oht_ary, columns = oht.columns_)
		return data_frame
Пример #5
0
def main():
    oht = OnehotTransactions()
    oht_ary = oht.fit(dataset).transform(dataset)
    df = pd.DataFrame(oht_ary, columns=oht.columns_)

    frequent_itemsets = apriori(df, min_support=0.5, use_colnames=True)
    print(frequent_itemsets)

    rules = association_rules(frequent_itemsets,
                              metric="confidence",
                              min_threshold=0.6)
    print(rules)
Пример #6
0
def define_segments(QLINK_URLS, UNKNOWN_URLS, QUOTA):
    global OH, lr, quota, i, km, part
    quota = QUOTA
    list_q = []
    obj_q = []
    for url in QLINK_URLS:
        prop = get_prop(url)
        list_q += prop
        obj_q.append(prop)


    params, count = np.unique(list_q, return_counts=True)
    params = np.array(params, dtype='object')
    count = np.array(count, dtype='object')
    feat_q_num = np.hstack((params.reshape(-1,1), count.reshape(-1,1)))
    feat_q_num =  feat_q_num[np.argsort(feat_q_num[:,1])]
    feat_q_num = feat_q_num[::-1]

    obj_g = []

    for url in UNKNOWN_URLS:
        prop = get_prop(url)
        obj_g.append(prop)


    imp_features = (feat_q_num[:100][:,0]).reshape(-1,1)


    OH = OnehotTransactions()
    OH.fit(imp_features)

    q_matrix = OH.transform(obj_q)
    g_matrix = OH.transform(obj_g)


    X = np.vstack((q_matrix, g_matrix))
    y = [1]*500 + [0]*500
    y = np.array(y)

    lr =  LinearRegression()
    lr.fit(X,y)
    i = 0

## divide by n clusters set part for each one
    n = 15
    km = KMeans(n_clusters=n)
    a = np.hstack((km.fit_predict(X).reshape(-1,1), y.reshape(-1,1)))
    part = np.zeros(n)
    for i in range(n):
        part[i] = a[a[:,0] == i][:,1].sum()
    part /= part.sum()
def test_cloning():

    oht = OnehotTransactions()
    oht.fit(dataset)
    oht2 = clone(oht)

    msg = ("'OnehotTransactions' object has no attribute 'columns_'")
    assert_raises(AttributeError,
                  msg,
                  oht2.transform,
                  dataset)

    trans = oht2.fit_transform(dataset)
    np.testing.assert_array_equal(expect, trans)
def findAssociationWord(sentenceList):
    transactionID = list()
    for sentence in sentenceList:
        word_list = word_tokenize(sentence, engine='mm')
        for word in reversed(word_list):
            if word in stopword or re.match('[\W]+', word):
                word_list.remove(word)
        transactionID.append(word_list)
    oht = OnehotTransactions()
    oht_ary = oht.fit(transactionID).transform(transactionID)
    df = pd.DataFrame(oht_ary, columns=oht.columns_)
    frequent_itemsets = apriori(df, min_support=0.01, use_colnames=True)
    result = association_rules(frequent_itemsets,
                               metric="lift",
                               min_threshold=0.5)
    print(result)
Пример #9
0
def define_segments(QLINK_URLS, UNKNOWN_URLS, QUOTA):
    global OH, lr, quota, i, km, part
    quota = QUOTA
    list_q = []
    obj_q = []
    for url in QLINK_URLS:
        prop = get_prop(url)
        list_q += prop
        obj_q.append(prop)

    params, count = np.unique(list_q, return_counts=True)
    params = np.array(params, dtype='object')
    count = np.array(count, dtype='object')
    feat_q_num = np.hstack((params.reshape(-1, 1), count.reshape(-1, 1)))
    feat_q_num = feat_q_num[np.argsort(feat_q_num[:, 1])]
    feat_q_num = feat_q_num[::-1]

    obj_g = []

    for url in UNKNOWN_URLS:
        prop = get_prop(url)
        obj_g.append(prop)

    imp_features = (feat_q_num[:100][:, 0]).reshape(-1, 1)

    OH = OnehotTransactions()
    OH.fit(imp_features)

    q_matrix = OH.transform(obj_q)
    g_matrix = OH.transform(obj_g)

    X = np.vstack((q_matrix, g_matrix))
    y = [1] * 500 + [0] * 500
    y = np.array(y)

    lr = LinearRegression()
    lr.fit(X, y)
    i = 0

    ## divide by n clusters set part for each one
    n = 15
    km = KMeans(n_clusters=n)
    a = np.hstack((km.fit_predict(X).reshape(-1, 1), y.reshape(-1, 1)))
    part = np.zeros(n)
    for i in range(n):
        part[i] = a[a[:, 0] == i][:, 1].sum()
    part /= part.sum()
Пример #10
0
def experiment3():
    print('Experiment 3 Association Rules of Actors and Directors')
    print('-------------------------------------------------------')
    
    # read in data as datafile in pandas
    df = pd.read_csv('./data/assocRules_withDirector.csv')
    df = df.dropna()
    # get just the values, without the header
    df_values = df.values
    # transform the data with a onehot transform, to vectorize categorical data
    oht = OnehotTransactions()
    df_processed = oht.fit(df_values).transform(df_values)
    # rebuild the dataframe with transformed data


    df = pd.DataFrame(df_processed, columns=oht.columns_)

    # find frequencies with apriori algorithm
    frequent_combinations = apriori(df, min_support=0.001, use_colnames=True)
    # create tabular ruleset
    rules = association_rules(frequent_combinations, metric="lift", min_threshold=1)
    rules.to_csv('./results/association_rules.csv', sep='\t')
    print(rules)
def Apriori(id, dataset, principal):
    pasos = "Dataset cargado" + '\n'
    dataset = pickdataset(int(id), dataset)
    oht = OnehotTransactions()
    oht_ary = oht.fit(dataset).transform(dataset)
    df = pd.DataFrame(oht_ary, columns=oht.columns_)
    frequent_itemsets = apriori(df, min_support=0.6, use_colnames=True)
    association_rules(frequent_itemsets,
                      metric="confidence",
                      min_threshold=0.7)
    rules = association_rules(frequent_itemsets,
                              metric="lift",
                              min_threshold=1.2)
    pasos += "Dataset Procesado: " + '\n'
    pasos += str(df) + '\n'
    pasos += "Item Set: " + '\n'
    pasos += str(frequent_itemsets) + '\n'
    avgReal = str(np.mean(rules.as_matrix(columns=['support'])) *
                  100) + "% soporte promedio"
    reglas = rules[['antecedents', 'consequents', 'support']]
    if principal:
        context = {
            'algoritmoPrincipal': 'Apriori',
            'resultado': avgReal,
            'pasos': pasos,
            'reglas': reglas,
            'img': 'No aplica'
        }
    else:
        context = {
            'algoritmoComparar': 'Regresión Lineal',
            'resultado2': avgReal,
            'pasos2': pasos,
            'reglas2': reglas,
            'img2': 'No aplica'
        }
    return context
def test_fit():
    oht = OnehotTransactions()
    oht.fit(dataset)
    assert (oht.columns_ == [
        'Apple', 'Bananas', 'Beer', 'Chicken', 'Milk', 'Rice'
    ])
Пример #13
0
from mlxtend.preprocessing import OnehotTransactions
from mlxtend.frequent_patterns import apriori
from mlxtend.frequent_patterns import association_rules

# reading file as lines of string
with open("categories.txt") as file:
    default=file.read().splitlines()

# converting the strings into list type
data=[]
for s in default:
    data.append(list(map(str,s.split(';'))))
# dataNew = pd.get_dummies(data)

# converting the dataset into one hot trnasaction [0 if exist,1 in not]
oht = OnehotTransactions()
dataNew = oht.fit(data).transform(data)
dataNew=pd.DataFrame(dataNew,columns=oht.columns_)


# print(dataNew.head())
# apriori algorithm, parameter are give
frequent_itemsets = apriori(dataNew, min_support=0.01, use_colnames=True) 
frequent_itemsets=np.array(frequent_itemsets)

# for i in range(len(frequent_itemsets)):
# 	print("{}:{}".format(int(frequent_itemsets[i][0]*len(data)),frequent_itemsets[i][1][0]))

# writing in the text file
# write_file=open("new_pattern.txt","w")
# for i in range(len(frequent_itemsets)):
Пример #14
0
import pandas as pd
from mlxtend.preprocessing import OnehotTransactions
from mlxtend.frequent_patterns import apriori
from mlxtend.frequent_patterns import association_rules

data = pd.read_csv('./inout_flow_data.csv')
data = data.loc[(data['station_id'] == 519) & ((data['in_flow_count'] != 0) |
                                               (data['out_flow_count'] != 0)),
                ['in_flow_count', 'out_flow_count']]

lst = []
for value in data.itertuples():
    lst.append(['i' + str(int(value[1] / 5)), 'o' + str(int(value[2] / 5))])

oht = OnehotTransactions()
oht_ary = oht.fit(lst).transform(lst)
df = pd.DataFrame(oht_ary, columns=oht.columns_)
frequent_itemsets = apriori(df, min_support=0.03, use_colnames=True)
print(frequent_itemsets)
rules = association_rules(frequent_itemsets,
                          metric="confidence",
                          min_threshold=0.4)
print(rules)
Пример #15
0
def test_inverse_transform():
    oht = OnehotTransactions()
    oht.fit(dataset)
    np.testing.assert_array_equal(np.array(data_sorted),
                                  np.array(oht.inverse_transform(expect)))
Пример #16
0
def test_fit_transform():
    oht = OnehotTransactions()
    trans = oht.fit_transform(dataset)
    np.testing.assert_array_equal(expect, trans)
Пример #17
0
Файл: ar01.py Проект: mrok88/es
#            ['Dill', 'Onion', 'Nutmeg', 'Kidney Beans', 'Eggs', 'Yogurt'],
#            ['Milk', 'Apple', 'Kidney Beans', 'Eggs'],
#            ['Milk', 'Unicorn', 'Corn', 'Kidney Beans', 'Yogurt'],
#            ['Corn', 'Onion', 'Onion', 'Kidney Beans', 'Ice cream', 'Eggs']]
from datetime import timedelta, date
from es02 import es02 
dataset = []
if __name__ == "__main__":
    global dataset
    es = es02()
    es.set_service("display")
    es.load_datas2(date(2017,12,1),date(2018,1,8))
    dataset2 = es.dset

dataset = [ item for item in dataset2  if len(item) > 1 ]
for item in dataset:
    print(item)

import pandas as pd
from mlxtend.preprocessing import OnehotTransactions

oht = OnehotTransactions()
oht_ary = oht.fit(dataset).transform(dataset)
df = pd.DataFrame(oht_ary, columns=oht.columns_)
from mlxtend.frequent_patterns import apriori
frequent_itemsets = apriori(df, min_support=0.3,use_colnames=True)
print(frequent_itemsets)

from mlxtend.frequent_patterns import association_rules
arule = association_rules(frequent_itemsets, metric="confidence", min_threshold=0.7)
print(arule)
Пример #18
0
@author: AliOthman
"""

import pandas as pd
from mlxtend.preprocessing import OnehotTransactions
from mlxtend.frequent_patterns import apriori

#dataset = [['Milk', 'Onion', 'Nutmeg', 'Kidney Beans', 'Eggs', 'Yogurt'],
#           ['Dill', 'Onion', 'Nutmeg', 'Kidney Beans', 'Eggs', 'Yogurt'],
#           ['Milk', 'Apple', 'Kidney Beans', 'Eggs'],
#           ['Milk', 'Unicorn', 'Corn', 'Kidney Beans', 'Yogurt'],
#           ['Corn', 'Onion', 'Onion', 'Kidney Beans', 'Ice cream', 'Eggs']]

data = pd.read_csv('bakery.csv')
process = OnehotTransactions()
matrix = process.fit(data).transform(data)

df = pd.DataFrame(matrix, columns=process.columns_)

#apriori(df, min_support=0.6)
#apriori(df, min_support=0.6, use_colnames=True)

frequent_itemsets = apriori(df, min_support=0.0123, use_colnames=True)
frequent_itemsets['length'] = frequent_itemsets['itemsets'].apply(
    lambda x: len(x))

#frequent_itemsets[ (frequent_itemsets['length'] == 2) &
#                  (frequent_itemsets['support'] >= 0.8) ]

print frequent_itemsets
Пример #19
0
            'register': u_r,
            'trannum': u_t,
            'basket': basket_SKUs
        }
        list_of_baskets.append(d)
    else:
        pass

basket_df = pd.DataFrame(list_of_baskets)
all_baskets = basket_df['basket'].tolist()

from mlxtend.preprocessing import OnehotTransactions
from mlxtend.frequent_patterns import apriori
from mlxtend.frequent_patterns import association_rules

oht = OnehotTransactions()
oht_array = oht.fit(all_baskets).transform(all_baskets)

oht_df = pd.DataFrame(oht_array, columns=oht.columns_)
print 'One Hot Encoding Successful'
print '-----------------------------'

frequent_SKUs = apriori(oht_df, min_support=0.0004, use_colnames=True)
print 'Frequent SKUs Successful'
print '----------------------------------------------------------'

assoc_rules = association_rules(frequent_SKUs,
                                metric="confidence",
                                min_threshold=0.5)

# pickle the association rules dataframe for easy reference
# In[19]:

numpyMatrix = df.as_matrix()

# In[21]:

numpyMatrix

# In[22]:

from mlxtend.preprocessing import OnehotTransactions

# In[23]:

oht = OnehotTransactions()
oht_ary = oht.fit(numpyMatrix).transform(numpyMatrix)
dataframe = pd.DataFrame(oht_ary, columns=oht.columns_)

# In[35]:

dataframe.drop(dataframe.columns[0], axis=1)

# In[56]:

from mlxtend.frequent_patterns import apriori

frequent_itemsets = apriori(dataframe, min_support=0.05, use_colnames=True)
frequent_itemsets

# In[57]:
Пример #21
0
Data = [['Power Bank', 'Screen Guard', 'Travel Charger'],
        ['Screen Guard', 'Bluetooth Headset', 'Mobile Cover'],
        ['Screen Guard', 'Arm Band', 'Mobile Cover'],
        ['Power Bank', 'Screen Guard', 'Leather Pouch'],
        ['Bluetooth Headset', 'Power Bank', 'Mobile Cover']]

import pandas as pd
from mlxtend.preprocessing import OnehotTransactions
from mlxtend.frequent_patterns import apriori

###Start code here
oht = OnehotTransactions()
oht_ary = oht.fit(Data).transform(Data)
dataFrame = pd.DataFrame(oht_ary, columns=oht.columns_)
frequent_itemsets = apriori(dataFrame, min_support=0.1, use_colnames=True)
###End code(approx 4 lines)

from mlxtend.frequent_patterns import association_rules
###Start code here
association_rule = association_rules(frequent_itemsets,
                                     metric="confidence",
                                     min_threshold=0.7)
print(association_rule)
###End code(approx 2 lines)
''' 
                          antecedants                   consequents  \
0                          (Arm Band)                (Screen Guard)   
1        (Travel Charger, Power Bank)                (Screen Guard)   
2      (Travel Charger, Screen Guard)                  (Power Bank)   
3                    (Travel Charger)    (Power Bank, Screen Guard)   
4                 (Bluetooth Headset)                (Mobile Cover)   
Пример #22
0
    def get(self, request, *args, **kwargs):
        context = self.get_context_data(**kwargs)
        threshold = request.GET.get('parameter')
        percentage = float(request.GET.get('percentage'))
        support = float(request.GET.get('support'))
        nodes = int(request.GET.get('nodes'))
        # Read working file data and prepare transactions for
        # Apriori algorithm
        file = WFile.objects.get(pk=kwargs['pk'])
        store_data = pd.read_csv(file.file.path)
        columns = request.GET.getlist('selected')
        dataset = []
        for _, row in store_data.iterrows():
            aux = []
            for c in columns:
                aux.append(c + '=' + row[c])
            dataset.append(aux)

        # A priori algorithm apply: Extraction of frequent itemsets
        # and association rules
        oht = OnehotTransactions()
        oht_ary = oht.fit(dataset).transform(dataset)
        df = pd.DataFrame(oht_ary, columns=oht.columns_)

        frequent_itemsets = apriori(df, min_support=support, use_colnames=True)
        context['frequent_itemsets'] = frequent_itemsets

        rules = association_rules(frequent_itemsets,
                                  metric=threshold,
                                  min_threshold=percentage)
        # rules = association_rules(frequent_itemsets, metric="lift", min_threshold=0.2)
        context['rules'] = rules

        # Building of scatter plot of support vs. confidence
        support = rules.as_matrix(columns=['support'])
        confidence = rules.as_matrix(columns=['confidence'])

        for i in range(len(support)):
            support[i] = support[i] + 0.0025 * (random.randint(1, 10) - 5)
            confidence[i] = confidence[i] + 0.0025 * (random.randint(1, 10) -
                                                      5)

        plt.gcf().clear()
        plt.scatter(support, confidence, alpha=0.5, marker="*")
        plt.xlabel('support')
        plt.ylabel('confidence')
        plt.tight_layout()

        file1 = io.BytesIO()
        plt.savefig(file1)
        file1 = ContentFile(file1.getvalue())

        # Building of histogram
        frequency_array = frequent_itemsets[frequent_itemsets['itemsets'].map(
            len) == 1]
        total_transactions = len(dataset)
        histogram_labels = []
        histogram_frequency = []
        for index, row in frequency_array.iterrows():
            histogram_frequency.append(int(row['support'] *
                                           total_transactions))
            histogram_labels.append(list(row['itemsets'])[0])

        histogram_data = []
        for i in range(len(histogram_labels)):
            histogram_data += [
                histogram_labels[i] for x in range(histogram_frequency[i])
            ]

        #####
        # Create the plot
        #####
        plt.gcf().clear()

        fig, ax = plt.subplots()

        # the histogram of the data
        n, bins, patches = ax.hist(histogram_data)

        # add a 'best fit' line
        ax.set_ylabel('Frequency')
        plt.xticks(histogram_labels, rotation=90, fontsize='x-small')

        # Tweak spacing to prevent clipping of ylabel
        fig.tight_layout()

        file2 = io.BytesIO()
        plt.savefig(file2)
        file2 = ContentFile(file2.getvalue())

        # Building of heat plot
        # Convert the input into a 2D dictionary
        freqMap = {}
        for line in dataset:
            for item in line:
                if not item in freqMap:
                    freqMap[item] = {}

                for other_item in line:
                    if not other_item in freqMap:
                        freqMap[other_item] = {}

                    freqMap[item][other_item] = freqMap[item].get(
                        other_item, 0) + 1
                    freqMap[other_item][item] = freqMap[other_item].get(
                        item, 0) + 1

        df = DataFrame(freqMap).T.fillna(0)

        #####
        # Create the plot
        #####
        plt.gcf().clear()
        plt.pcolormesh(df, edgecolors='black')
        plt.yticks(np.arange(0.5, len(df.index), 1),
                   df.index,
                   fontsize='x-small')
        plt.xticks(np.arange(0.5, len(df.columns), 1),
                   df.columns,
                   rotation=90,
                   fontsize='x-small')
        plt.tight_layout()

        file3 = io.BytesIO()
        plt.savefig(file3)
        file3 = ContentFile(file3.getvalue())

        # Draw graph for association rules
        file4 = draw_graph(rules, nodes)
        print(file1, file2, file3, file4)
        results = AssociationRules.objects.create()
        results.scatter.save('scatter.png', file1)
        results.histogram.save('histogram.png', file2)
        results.heat_map.save('heat_map.png', file3)
        results.graph.save('graph.png', file4)
        context['results'] = results
        return self.render_to_response(context)
Пример #23
0
import json

recipe_data = pd.read_json('./train.json')
print(recipe_data)

# In[2]:

ingredients = recipe_data['ingredients'].tolist()
print(ingredients[:5])

# In[3]:

from mlxtend.preprocessing import OnehotTransactions
from mlxtend.frequent_patterns import apriori

oht = OnehotTransactions()
oht_ary = oht.fit(ingredients).transform(ingredients)
df_train = pd.DataFrame(oht_ary, columns=oht.columns_)
df_train

# In[4]:

frequent_itemsets_train = apriori(df_train,
                                  min_support=0.05,
                                  use_colnames=True)
print(frequent_itemsets_train)

# In[5]:

from mlxtend.frequent_patterns import association_rules
def test_fit_transform():
    oht = OnehotTransactions()
    trans = oht.fit_transform(dataset)
    np.testing.assert_array_equal(expect, trans)
Пример #25
0
import pandas as pd
from mlxtend.preprocessing import OnehotTransactions
from mlxtend.frequent_patterns import apriori, association_rules
import os
import csv

os.chdir('E:\\20.association analysis\\dataset\\75000')

file = open('75000-out1.csv')
rows = csv.reader(file)
rows = list(rows)
for row in rows:
    row.pop(0)

oht = OnehotTransactions()
oht_ary = oht.fit(rows).transform(rows)
df = pd.DataFrame(oht_ary, columns=oht.columns_)
df

frequent_itemsets = apriori(df, min_support=0.05, use_colnames=True)

rules = association_rules(frequent_itemsets, metric="lift", min_threshold=1)
rules[(rules['lift'] >= 6) & (rules['confidence'] >= 0.8)]
def test_inverse_transform():
    oht = OnehotTransactions()
    oht.fit(dataset)
    np.testing.assert_array_equal(np.array(data_sorted),
                                  np.array(oht.inverse_transform(expect)))
Пример #27
0
def test_fit():
    oht = OnehotTransactions()
    oht.fit(dataset)
    assert(oht.columns_ == ['Apple', 'Bananas', 'Beer',
                            'Chicken', 'Milk', 'Rice'])
all_cells_list = []
# Loop that sets cell_list to the value of each sell appended onto each other
for cell in sheet.col(spreadsheet_column):
    # Sets cell_value to the value found in the cell
    cell_value = cell.value
    # Removes unneeded characters to reduce clutter
    cell_value = cell_value.replace('"', "")
    cell_value = cell_value.replace(r'[', '')
    cell_value = cell_value.replace(r']', '')
    # Splits each value apart based on spaces and appends them to the cell list
    cell_value_parsed = cell_value.split(',')
    all_cells_list.append(cell_value_parsed)
print(all_cells_list)

# Creation of the data frame based on the cell_list
oht = OnehotTransactions()
oht_ary = oht.fit(all_cells_list).transform(all_cells_list)
df = pd.DataFrame(oht_ary, columns=oht.columns_)
print(df)

# ----------------------------------------------------------------------------------------------------------------------
# Configuration for apriori algorithm
# ----------------------------------------------------------------------------------------------------------------------
# minimum value of the coefficient
min_co = 0.3
# True or False whether or not to include the column names in the output
use_colnames_bool = True
# max number of associations
max_len_value = None
frequent_itemsets = apriori(df,
                            min_support=min_co,
Пример #29
0
    disease = ""
    print("ENTER")
    with open("rollup_dataset.csv", "rt", encoding='utf-8') as csvfile:
        reader = csv.reader(csvfile)
        for row in reader:
            row_clean = [i for i in row if i]
            bucket_clean = [i for i in bucket if i]
            if len(row_clean) == (len(bucket_clean) + 1):
                if all(values in row_clean for values in bucket_clean):
                    disease = row_clean[0]
                    break

    return disease


buckets = []

with open("buckets_new.csv") as csvfile:
    reader = csv.reader(csvfile)

    for row in reader:
        buckets.append(row)

oht = OnehotTransactions()
oht_ary = oht.fit(buckets).transform(buckets)
df = pd.DataFrame(oht_ary, columns=oht.columns_)
frequent_itemsets = apriori(df, min_support=0.4, use_colnames=True)
frequent_itemsets = frequent_itemsets[frequent_itemsets['support'] >= 0.7]
frequent_itemsets = frequent_itemsets[frequent_itemsets['itemsets']]
print(list(frequent_itemsets))
#get_disease(frequent_itemsets[0]['itemsets'], buckets)
Пример #30
0
import csv

import pandas as pd
from IPython.display import display
from mlxtend.frequent_patterns import apriori
from mlxtend.frequent_patterns import association_rules
from mlxtend.preprocessing import OnehotTransactions

dataset = []
with open('supermarket.csv', newline='') as f:
    transactions = csv.reader(f)
    dataset = list(transactions)

    oht = OnehotTransactions()
    oht_ary = oht.fit(dataset).transform(dataset)
    data_frame = pd.DataFrame(oht_ary, columns=oht.columns_)

    frequent_itemsets = apriori(data_frame, min_support=0.3, use_colnames=True)

    # get the association rules
    rules = association_rules(frequent_itemsets, metric='confidence', min_threshold=0.84).round(2)

display(rules)
        f.write("%s " % content2)
        f.write("%s\n" % content3)
        total_eval_count = total_eval_count + 1

print "Total Number of evaluations",
print total_eval_count

# Make a list of all the Side Effects, so that we can convert it to One Hot Notation
side_effect_list = []
for patient in patient_data.keys():
    for drug in patient_data[patient]["Treatments List"].keys():
        side_effect_list.append(
            patient_data[patient]["Treatments List"][drug]["Side Effects"])

# Converting to One Hot Vector
convert = OnehotTransactions()
side_effect_list = convert.fit(side_effect_list).transform(side_effect_list)

# Making the Librec Dictionary i.e. the vectorised feature dataset
side_effect_counter = 0
librec_dict = {}

for patient in patient_data.keys():
    patient_counter = patient_id[patient]
    librec_dict[patient_counter] = {}

    for drug in patient_data[patient]["Treatments List"].keys():
        key = int(drug)
        librec_dict[patient_counter][key] = {}
        librec_dict[patient_counter][key]["Side Effect Rating"] = patient_data[
            patient]["Treatments List"][drug]["Side Effect Rating"]