Пример #1
0
def assign_servers_test_output(df_train, df_test, percentile, confidence,
                               apps_server):
    df_train['hour'] = None
    df_train['hour'] = pd.DatetimeIndex(df_train['Date']).hour

    data_l = list(df_train['pairs'])
    pairs_count = (df_train.groupby('pairs2').agg({
        'Date': 'count',
        'norm_latency': 'mean',
        'Duration': 'sum',
        'Packets': 'sum'
    }).reset_index())
    pairs_count.columns = [
        'pairs', 'frequency', 'avg_norm_latency', 'total_duration',
        'total_packets'
    ]
    pairs_count['norm_latency'] = (
        pairs_count['total_duration'] / pairs_count['total_packets'].sum()
    ) * 100  #sum of all duration time divided by sum of all packets transfered for that pair

    per_n = (pairs_count['frequency'].quantile(percentile))
    patterns = pyfpgrowth.find_frequent_patterns(data_l, per_n)
    rules = pyfpgrowth.generate_association_rules(patterns, confidence)

    #format the rules, bring back in the other info on latency rank

    formated_rules = format_rules(rules, df_train, apps_server)

    #now we make the server assignments based on the training rules applied to the test data
    server_df, server_assignments, total_latency, total_latency_model, avg_latency, avg_latency_model = server_association(
        formated_rules, df_test, apps_server)  #this function loaded fr

    #return(formated_rules)
    return (server_df, server_assignments, total_latency, total_latency_model,
            avg_latency, avg_latency_model)
Пример #2
0
def upload():
    #	path_parent = os.path.dirname(os.getcwd())
    #	os.chdir(path_parent)

    if request.method == "POST":

        if request.files:
            file = request.files['inputFile']
            support = request.form['support']
            confidence = request.form['confidence']

            file.save(os.path.join(app.config["FILE_UPLOADS"], file.filename))
            with open('./uploads/' + file.filename, newline='') as f:
                reader = csv.reader(f)
                data = list(reader)
                transactions = data
                patterns = pyfpgrowth.find_frequent_patterns(
                    transactions,
                    len(transactions) * int(support) / 100)
                rules = pyfpgrowth.generate_association_rules(
                    patterns,
                    int(confidence) / 100)
#                return str(rules)
            res = rules
            newFile = File(name=file.filename,
                           sup=support,
                           con=confidence,
                           result=res)

        db.session.add(newFile)
        db.session.commit()
        return render_template("data.html", result=result)
Пример #3
0
def popupmsg():
    rules = pyfpgrowth.generate_association_rules(s, 0.33)
    k = dict(sorted(rules.items(), key=operator.itemgetter(1), reverse=True))
    ls = list(k.keys())
    #print(ls)
    lgid = list1_goal.curselection()
    #print(lgid)
    n_gid = int(lgid[0])
    gid = a[n_gid - 1]
    print("gid:" + str(gid))

    print("s :" + str(s))
    print("rules : " + str(rules))

    sug = "goal id :" + str(gid[0]) + " suggestion: " + str(
        k[gid][0]) + "probabilty :" + str(k[gid][1])
    '''for i in range(len(rules)):
        print(rules[gid][0][0])
        if rules[i][0][0]==gid:
            print(rules[i][1][1])'''
    '''i=0
    for key, value in k.items(): 
        if gid == ls[i][0]: 
            print(k[gid][0]) 
            #print(value[i][0])
        #elif gif in value[][0]
        i+=1  '''
    popup = tk.Tk()
    popup.wm_title("suggestion")
    label = tk.Label(popup, text=sug)
    label.pack(side="top", fill="x", pady=10)
    B1 = tk.Button(popup, text="Okay", command=popup.destroy)
    B1.pack()
    popup.mainloop()
Пример #4
0
def generate_rules(dataframe, information=[], minsupport=25, minconfidence=0.55, satisfied_value=3):
    '''
    :param information: the transaction we already have
    :param dataframe: it should includes userID, productID, rating
    :param minsupport: the itemset at least show up minsupport times
    :param minconfidence: the min possibility that the consumer will apply the rules
    :param satisified_value:  consider the user like the product only he rating it over the value
    :return: rules: the association rules
    '''

    raw_dict = {}

    # transform dataframe to transaction
    for row in dataframe.itertuples():
        raw_dict.setdefault(row.reviewerID, {})
        if float(row.rating) >= satisfied_value:
            raw_dict[row.reviewerID].update({row.productID: row.rating})

    transaction = []
    for user in raw_dict:
        transaction.append(list(raw_dict[user].keys()))
    if information:
        transaction.extend(information)

    # pp.pprint(transaction)
    # pp.pprint(len(transaction))


    # generate the rules
    patterns = pyfpgrowth.find_frequent_patterns(transaction, minsupport)
    rules = pyfpgrowth.generate_association_rules(patterns, minconfidence)
    return rules
Пример #5
0
    def categorize_queries(self, **data):
        """
        Executes Apriori algorithm and returns a RelationRecord generator.

        Arguments:
            transactions -- A transaction iterable object
                            (eg. [['A', 'B'], ['B', 'C']]).

        Keyword arguments:
            min_support -- The minimum support of relations (float).
            min_lift -- The minimum lift of relations (float). (>1 is likely)
            min_probability -- Finds patterns that are associated with another with a certain minimum probability:

            min_confidence -- The minimum confidence of relations (float).


        """

        min_support = data.get('min_support', 10)
        min_lift = data.get('min_lift', 1)
        min_probability = data.get('min_probability', 0.5)
        min_confidence = data.get('min_confidence', 0)

        print("Converting to transactions.")
        transactions, match_queries = self.create_transactions(
            self.df, self.col)

        if self.alg.lower() == "apriori":
            print("Running Apriori")
            min_support = float(min_support / len(transactions))
            results = list(
                apriori(transactions,
                        min_support=min_support,
                        min_confidence=min_confidence,
                        min_lift=min_lift,
                        max_length=None))
            print("Making Categories")
            self.categories = [' '.join(list(l.items)) for l in results]

        elif self.alg.lower() == "fpgrowth":
            print("Running FPGrpwth")
            results = list(
                pg.generate_association_rules(
                    pg.find_frequent_patterns(transactions, min_support),
                    min_probability))
            print("Making Categories")
            self.categories = [' '.join(l) for l in results]

        else:
            raise Exception(
                "{} is not one of the available algorithms (`apriori`, `fpgrowth`)"
                .format(self.alg))

        print('Total Categories: {}'.format(len(set(self.categories))))

        self.df['match_queries'] = match_queries
        self.df['category'] = self.df.match_queries.map(
            lambda x: self.match_labels(x, self.categories))

        self.counts = pd.DataFrame(self.df.category.value_counts())
Пример #6
0
def find_rules(data, support_threshold, confidence_threshold):
    patterns = find_frequent_patterns(transactions=data,
                                      support_threshold=support_threshold)
    rules = generate_association_rules(
        patterns=patterns, confidence_threshold=confidence_threshold)

    return rules
def fpgrouth(id, dataset, principal):
    pasos = "Dataset Cargado" + '\n'
    dataset = pickdataset(int(id), dataset)
    patterns = pyfpgrowth.find_frequent_patterns(dataset, 3)
    rules = pyfpgrowth.generate_association_rules(patterns, 0.6)
    pasos += "Encuentros: " + '\n'
    pasos += str(patterns) + '\n'
    avgReal = 0
    for i in rules.values():
        it = i[1:2]
        x = str(it)
        x1 = x.split(',')
        x2 = str(x1[0])
        x3 = x2.split('(')
        avgReal += float(x3[1])
    avgReal = str((avgReal / len(rules.values())) * 100) + '% Confianza'
    reglas = str(rules)
    img = 'No aplica'
    if principal:
        context = {
            'algoritmoPrincipal': 'FP-growth',
            'resultado': avgReal,
            'pasos': pasos,
            'reglas': reglas,
            'img': img
        }
    else:
        context = {
            'algoritmoComparar': 'FP-growth',
            'resultado2': avgReal,
            'pasos2': pasos,
            'reglas2': reglas,
            'img2': img
        }
    return context
Пример #8
0
 def run_rule_manually(self):
     for record in self:
         transactions = self.get_sale_data()
         if (record.rule_type == 'apriori'):
             results = self.format_rules(
                 list(
                     apriori(transactions,
                             min_support=record.min_supp,
                             min_confidence=record.min_conf)))
             self.update_rule(results, 'apriori')
         else:
             totalRow = len(transactions)
             results = self.format_rules_fp(
                 pyfpgrowth.generate_association_rules(
                     pyfpgrowth.find_frequent_patterns(
                         transactions, totalRow * record.min_supp),
                     record.min_conf))
             self.update_rule(results, 'fpgrowth')
         self.update_on_web()
         return {
             'type': 'ir.actions.act_window',
             'name': 'View Rules',
             'view_type': 'form',
             'view_mode': 'tree,form',
             'res_model': 'data.mining.show',
             'target': 'current',
         }
Пример #9
0
def fpGrowth(transactions):
    patterns = pyfpgrowth.find_frequent_patterns(transactions, 1)
    rules = pyfpgrowth.generate_association_rules(patterns, 0.2)
    print(patterns)
    print(rules)
    salida = "\nFrecuencias" + "\n" + str(
        patterns) + "\n" + "Reglas" + "\n" + str(rules)
    return salida
def demo():
    transactions = [[1, 2, 5], [2, 4], [2, 3], [1, 2, 4], [1, 3], [2, 3],
                    [1, 3], [1, 2, 3, 5], [1, 2, 3]]
    # support = 2
    # minconf = 0.7
    patterns = pyfpgrowth.find_frequent_patterns(transactions, 2)
    rules = pyfpgrowth.generate_association_rules(patterns, 0.7)
    print(rules)
Пример #11
0
def fpgrowth(seq, **kwargs):
    import pyfpgrowth
    sup = int(kwargs.pop('sup'))
    conf = float(kwargs.pop('conf'))
    patterns = pyfpgrowth.find_frequent_patterns(seq, sup)
    rules = pyfpgrowth.generate_association_rules(patterns, conf)
    ret = [[key, value[0], value[1]] for key, value in rules.items()]
    return ret
Пример #12
0
def pattern_mine(trans, support, confidence):
    """
    input: [[term1, term2, ...], [term1, term2, ...] ...]
    output: 
    """
    pattern = pyfpgrowth.find_frequent_patterns(trans, support)
    rule = pyfpgrowth.generate_association_rules(pattern, confidence)
    return pattern, rule
Пример #13
0
def AssociationRule(Voicing,support,confidence):
    #Voicing_patterns = {}
    #Voicing_rules = {}
    for v in Voicing.items():
        patterns = pyfpgrowth.find_frequent_patterns(v[1], len(v[1])*support)
        Voicing_patterns[v[0]] = patterns
        rules = pyfpgrowth.generate_association_rules(patterns, confidence)
        Voicing_rules[v[0]]= rules
    return Voicing_patterns,Voicing_rules
Пример #14
0
def generate_rules(data: pd.DataFrame,
                   support_threshold: int = 1,
                   confidence_threshold: float = 0.3) -> tuple:
    patterns = fp.find_frequent_patterns(data['items'],
                                         support_threshold=support_threshold)
    rules = fp.generate_association_rules(
        patterns, confidence_threshold=confidence_threshold)

    return patterns, rules
Пример #15
0
def proses(id):
    my_data = File.query.get(id)
    with open('./uploads/' + my_data.name, newline='') as f:
        reader = csv.reader(f)
        data = list(reader)
        transactions = data
        patterns = pyfpgrowth.find_frequent_patterns(
            transactions,
            len(transactions) * my_data.sup / 100)
        rules = pyfpgrowth.generate_association_rules(patterns, 0.5)
        return str(rules)
Пример #16
0
def dev_association(data):
    a = 0
    test_14 = 0
    count = [0, 0, 0, 0, 0, 0]
    itemset = [[] for x in range(data.shape[1] * data.shape[2])]
    for i in range(data.shape[1]):
        for j in range(data.shape[2]):
            for z in range(data.shape[0]):
                if data[z, i, j] == 1:
                    itemset[a].append(z)
                    count[z] = count[z] + 1
            if data[1, i, j] == data[4, i, j] == 1:
                test_14 += 1
            a += 1

    # count = [count[z]/data.shape[1]*data.shape[2] for z in range(len(count))]
    print(test_14)
    patterns = fpgrowth.find_frequent_patterns(itemset, 50)
    rules = fpgrowth.generate_association_rules(patterns, .2)

    association_rules = apriori(itemset,
                                min_support=0.002,
                                min_confidence=0.4,
                                min_lift=2,
                                min_length=2.5)
    association_results = list(association_rules)

    associ_rules = []
    for item in association_results:
        # associ_rules = []
        pair = []
        for i in item[0]:
            pair.append(i)
        support = item[1]
        for rules in item[2]:
            tmp = []
            for i in rules[0]:
                tmp.append(i)
            for i in rules[1]:
                tmp.append(i)
            associ_rules.append([tmp, rules[2]])

    dev_rules_no_0 = []
    dev_rules = np.array(associ_rules)
    print(dev_rules[0])
    print(dev_rules[0, 0])
    # print(dev_rules[0,0,0])
    print(dev_rules[0, 0][0])
    for item in dev_rules:
        if item[0][0] == 0:
            continue
        dev_rules_no_0.append([item[0][0:-1], [item[0][-1]], item[1]])

    return patterns, rules, count, association_results, dev_rules_no_0
Пример #17
0
def get_scores(transactions, resume_words):
    num_resumes = len(transactions)
    suggestion_scores = collections.Counter()
    patterns = fpg.find_frequent_patterns(transactions, num_resumes / 1.5)
    rules = fpg.generate_association_rules(patterns, 0.5)
    for antecedent, consequent in rules.items():
        if set(antecedent).issubset(resume_words) and antecedent in patterns:
            suggestion_scores[consequent[0]] += ((patterns[antecedent] * consequent[1]))

    suggestions = set.union(*[set(x) for x in suggestion_scores if suggestion_scores[x] >= 3])
    return {x for x in suggestions if x not in resume_words}
Пример #18
0
def main():
    df = pd.read_csv(r"MarketBasket/Market_Basket_Optimisation.csv",
                     header=None)
    transcation_efficient = transcationGenerator(df, "efficient")
    transcation_non_efficient = transcationGenerator(df, "non-efficient")
    print('-' * 20, 'Apriori', '-' * 20)
    apriori_one(transcation_efficient, support=0.05, confidence=0.3)
    print('-' * 20, 'Apriori', '-' * 20)
    apriori_two(transcation_non_efficient, 0.05, "confidence", 0.3)
    print('-' * 20, 'FP-GROWTH', '-' * 20)
    patterns = fp.find_frequent_patterns(transcation_efficient, 20)
    rules = fp.generate_association_rules(patterns, 0.3)
    print('关联规则:', '\n', rules)
def botnet():
    transactions = []

    with open("../data/KnowledgeGraph/sample7.txt") as f:
        for line in f:
            line = line.strip('\n')
            ip, ua, target = line.split(',')
            print("Add (%s %s %s)" % (ip, ua, target))
            transactions.append([ip, ua, target])

    patterns = pyfpgrowth.find_frequent_patterns(transactions, 3)
    rules = pyfpgrowth.generate_association_rules(patterns, 0.9)

    print(rules)
Пример #20
0
    def fit(self, transactions):
        patterns = pyfpgrowth.find_frequent_patterns(transactions, self.min_support)
        rules = pyfpgrowth.generate_association_rules(patterns,  self.min_confidence)
        item_sim_sets = {}
        for key, value in rules.items():
            rule_items = key + value[0]
            for item in rule_items:
                item_sim_sets.setdefault(item, set())
                for i in range(len(rule_items)):
                    if rule_items[i] == item:
                        continue
                    item_sim_sets[item].add(rule_items[i])

        return item_sim_sets
Пример #21
0
def get_rules(transactions):
    print('generating patterns...')
    start_time = time.time()
    patterns = pyfpgrowth.find_frequent_patterns(transactions, 400)
    print(
        f'patterns generated (time taken : {round(time.time() - start_time, 4)} seconds)'
    )
    print('generating rules...')
    start_time = time.time()
    rules = pyfpgrowth.generate_association_rules(patterns, 0.7)
    print(
        f'rules generated (time taken : {round(time.time() - start_time, 4)} seconds)'
    )
    return rules
Пример #22
0
def fpgrowth(dataset):
    # 将数据存放到transactions中
    transactions = []
    for i in range(0, dataset.shape[0]):
        temp = []
        for j in range(0, 20):
            if str(dataset.values[i, j]) != 'nan':
                temp.append(str(dataset.values[i, j]))
        transactions.append(temp)
        # print(transactions)
    # 挖掘频繁项集和频繁规则,频数为100以上
    itemsets = pyfpgrowth.find_frequent_patterns(transactions, 100)
    rules = pyfpgrowth.generate_association_rules(itemsets, 0.4)
    print("频繁项集:", itemsets)
    print("关联规则:", rules)
Пример #23
0
def get_rules():
    file = open("NewRulesForME.csv")
    reader = csv.reader(file, delimiter=',')
    transactions = []
    for row in reader:
        transactions.append(row)

    patterns = pyfpgrowth.find_frequent_patterns(transactions, 3)

    rules = pyfpgrowth.generate_association_rules(patterns, 0.60)

    print(rules)
    for i in rules:
        print(i)
    '''
Пример #24
0
def rules(request):
    data = data_set()
    # data = [['chicken', 'eggs', 'oil'],
    #         ['eggs', 'masala'],
    #         ['eggs', 'ginger'],
    #         ['chicken', 'eggs', 'masala'],
    #         ['chicken', 'ginger'],
    #         ['eggs', 'ginger'],
    #         ['chicken', 'ginger'],
    #         ['chicken', 'eggs', 'ginger', 'oil'],
    #         ['chicken', 'eggs', 'ginger']]

    patterns = pyfpgrowth.find_frequent_patterns(data, 2)
    rules = pyfpgrowth.generate_association_rules(patterns, 1)
    print(rules)
    return render(request, 'rules.html', {'rules': rules})
Пример #25
0
def AssociationRule(ip_urls, support, minConf):
    ips = set([items[0] for items in ip_urls])
    url_paths = set([items[1] for items in ip_urls])
    ips_map = {ip: -i - 1 for i, ip in enumerate(ips)}
    urls_map = {ulrpath: i + 1 for i, ulrpath in enumerate(url_paths)}
    transactions = [[ips_map.get(items[0]),
                     url_paths.get(items[1])] for items in ip_urls]
    patterns = pyfpgrowth.find_frequent_patterns(transactions, support)
    rules = pyfpgrowth.generate_association_rules(patterns, minConf)
    ip = []
    url = []
    for (i, ) in rules.keys():
        if i < 0:
            ip.append(i)
        else:
            url.append(i)
def printFP_GrowthResult(transactions, support, confidence):
    patterns = pyfpgrowth.find_frequent_patterns(transactions,
                                                 support * len(transactions))
    rules = pyfpgrowth.generate_association_rules(patterns, confidence)

    print '\n\nFP Growth algorithm:'
    print 'min_support: ', support
    print 'min_confidence: ', confidence
    print '\nFrequent item set:( size:', len(patterns), ')'
    print '[',
    for key in patterns:
        print key, ',',
    print ']'

    print '\nRules:'
    for key in rules:
        print key, '->', rules[key][0], ', confidence:', rules[key][1]
    def solve(self):
        vote_data = pd.read_csv('A.csv')
        transaction = vote_data.values
        patterns = pyfpgrowth.find_frequent_patterns(transaction, 150)
        print(patterns)
        rules = pyfpgrowth.generate_association_rules(patterns, 0.9)
        result_list = []
        for key,value in rules.items():
            i_list = []
            i_list.append(list(key))
            i_list.append(list(value[0]))
            result_list.append(i_list)

        print(result_list)
        string = ''
        for item in result_list:
            string += str(item) + '\n'
        with open('result.txt','w') as file:
            file.write(string)
        return result_list
Пример #28
0
def get_patterns_rules(txn_list, 
					   FPOF_flag=0, 
					   OutliernessDegree_flag=0, 
					   min_sup=np.nan, 
					   min_conf=np.nan):
	print("FPGrowth rule-mining in progress ......")
	# Run PFGRWOTH ALGO - Get the Frequent patterns
	minsup = min_sup
	minconf = min_conf
	print("Min Support: ", minsup, ", Min Confidence: ", minconf)
	leninput = len(modelinput)
	if FPOF_flag == 1:
		patterns = pyfpgrowth.find_frequent_patterns(txn_list, minsup*leninput)
		print("Number of patterns (Frequent Itemsets):  ", len(patterns))
		print("FPGrowth rule-mining completed")
		return patterns
	if OutliernessDegree_flag == 1:
		patterns = pyfpgrowth.find_frequent_patterns(txn_list, minsup*leninput)
		rules = pyfpgrowth.generate_association_rules(patterns, minconf)
		print("Number of patterns (Frequent Itemsets):  ", len(patterns), ", Number of high-confidence rules:  ", len(rules))
		print("FPGrowth rule-mining completed")
		return rules	
Пример #29
0
def generate_C2C_rules(data_list, support_ratio=0.00003, confidence_ratio=0.3):
    support = support_ratio * len(data_list)
    patterns = pyfpgrowth.find_frequent_patterns(data_list, support)
    rules = pyfpgrowth.generate_association_rules(patterns, confidence_ratio)
    rules_counter = Counter()
    new_rules_t = defaultdict(lambda: 0)
    new_rules = defaultdict(lambda: dict())

    for pattern, r in rules.items():
        # target_c = r[0][0]
        confidence = r[1]
        for triggerC in pattern:
            for targetC in r[0]:
                new_rules_t[(triggerC, targetC)] += confidence
                rules_counter.update([(triggerC, targetC)])

    new_rules_t = {k: v / rules_counter[k] for k, v in new_rules_t.items()}
    for k, v in new_rules_t.items():
        c1, c2 = k
        new_rules[c1].update({c2: v})

    return new_rules
def on_input(data1, data2):
    soid_data = {}
    final = ""
    df1 = pandas.read_csv(io.StringIO(data1), low_memory=False)
    df2 = pandas.read_csv(io.StringIO(data2), low_memory=False)
    df1 = df1.merge(df2, on='PRODUCTID')
    df3 = df1[['SALESORDERID', 'PRODUCTNAME']]
    df3.to_csv('itemSet.csv', index=False)
    with open('itemSet.csv', mode='rU') as f:
        reader = csv.reader(f)
        for n, row in enumerate(reader):
            if not n:
                # Skip header row (n = 0).
                continue
            soid, pname = row
            if soid not in soid_data:
                soid_data[soid] = list()
            soid_data[soid].append((pname))
    patterns = pyfpgrowth.find_frequent_patterns(soid_data.values(), 8)
    rules = pyfpgrowth.generate_association_rules(patterns, 0.8)
    for key, value in rules.iteritems():
        if (len(key) <= 2 and len(value[0]) <= 1):
            final = final + str(key) + ":" + str(value[0]) + "\n"
    api.send("output", "Rules :" + final)
Пример #31
0

import pyfpgrowth


# In[15]:


patterns = pyfpgrowth.find_frequent_patterns(dataset, 1000)
patterns


# In[18]:


rules5 = pyfpgrowth.generate_association_rules(patterns, 0.5)
rules5


# In[19]:


patterns500 = pyfpgrowth.find_frequent_patterns(dataset, 500)
patterns500


# In[20]:


rules500_5 = pyfpgrowth.generate_association_rules(patterns500, 0.5)
rules500_5
Пример #32
0
import pyfpgrowth


transactions=[]

with open("../data/KnowledgeGraph/sample7.txt") as f:
    for line in f:
        line=line.strip('\n')
        ip,ua,target=line.split(',')
        print "Add (%s %s %s)" % (ip,ua,target)
        transactions.append([ip,ua,target])



patterns = pyfpgrowth.find_frequent_patterns(transactions, 3)
rules = pyfpgrowth.generate_association_rules(patterns, 0.9)

print rules