def compute_outputs(self): for i in range(len(self.nodes)): output = self.totals[i] self.outputs[i] = output outputs = sort(self.outputs) size = int(len(outputs) * self.psize) for i in range(len(outputs)): n = outputs[i] if i < size: self.outputs[n] = 1 else: self.outputs[n] = 0
def home(): data = request.get_json() #logging.info("Request received: {}".format(data)) command = data['command'] res = {} if(command=='ping'): res = functions.ping_pong() elif(command=='sample'): res = functions.sample(data['size'], data['begin'], data['end']) elif(command=='sort'): res = functions.sort(data) logging.info("Response: {}".format(res)) print(jsonify(res)) return jsonify(res)
parents[i] = range(n_cities) np.random.shuffle(parents[i]) best = [] #this boy picks the best of each simulation strd = 0 #this guy is for standard error measurements means = [] #Action for j in range(n_sims): for gen in range(generations): offspring = partially_mapped(parents, distances, n_cities, N_pop) new_population = np.append([offspring], [parents], axis=1) path_lengths, population = sort(new_population, distances) removables = len(path_lengths) - N_pop for i in range(removables): path_lengths = np.delete(path_lengths, len(path_lengths) - 1) parents = [] for i in range(N_pop): parents.append(population[i]) for populant in range(N_pop): parents[populant], path_lengths[populant] = hillclimb2( parents[populant], distances) means.append(np.mean(path_lengths)) best.append(path_lengths[0]) #collecting shit for standard deviation
def run_functions(curr_spreadsheet, query_string, query_parameters): print("Query string: " + query_string) # ADD <ENTRY_1> <ENTRY_2> # <ENTRY_1>: row 4 commands = query_string.split(".") sheet = curr_spreadsheet.sheet for i in range(len(commands)): args_and_comm = commands[i].split(" ") command_name = args_and_comm[0] args = [] for k in range(1, len(args_and_comm)): result = args.append(query_parameters[args_and_comm[k]]) result = None if command_name == "ADD": result = functions.add(sheet, args[0], args[1]) elif command_name == "INSERTAFT": if args[0].rowOrCol == 'col': result = functions.insert_entry(sheet, chr(ord(args[0].value) + 1), args[0].rowOrCol) else: result = functions.insert_entry(sheet, int(args[0].value) + 1, args[0].rowOrCol) elif command_name == "INSERTBEF": if args[0].rowOrCol == 'col': result = functions.insert_entry(sheet, chr(ord(args[0].value)), args[0].rowOrCol) else: result = functions.insert_entry(sheet, int(args[0].value), args[0].rowOrCol) elif command_name == "SET": if(isinstance(args[0], float)): result = functions.cell_update(sheet, args[1].cell_str, args[0]) else: result = functions.cell_update(sheet, args[1].cell_str, args[0].value) elif command_name == "AVG": result = functions.average_entry(sheet, args[0].value, args[0].rowOrCol) elif command_name == "BOLD": if(isinstance(args[0], Cell)): result = functions.format_bold(sheet, str(args[0].cell_str) + ":" + str(args[0].cell_str)) else: result = functions.format_bold_entry(sheet, args[0]) elif command_name == "SET_BG": if(len(args) == 2): if(isinstance(args[1], Cell)): result = functions.set_background(sheet, args[1].cell_str + ":" + args[1].cell_str, args[0].color_str) else: result = functions.set_background_entry(sheet, args[1], args[0].color_str) else: result = functions.set_background(sheet, str(args[1].cell_str) + ":" + str(args[2].cell_str), args[0].color_str) elif command_name == "MULTIPLY": if(isinstance(args[1], Entry)): result = functions.multiply_entry(sheet, int(args[0].value), args[1].value, args[1].rowOrCol == "row") else: result = functions.multiply_cell(sheet, int(args[0].value), args[1].cell_str) elif command_name == "SIN": if(len(args) == 1): if (isinstance(args[0], Cell)): result = functions.sin_cell(sheet, args[0].cell_str) else: # Don't know if this works result = functions.sin_entry(sheet, ord(args[0].value) - ord("A") + 1, args[0].rowOrCol == "col") else: result = functions.sin_range(sheet, str(args[0].cell_str) + ":" + str(args[1].cell_str)) elif command_name == "COS": if(len(args) == 1): if (isinstance(args[0], Cell)): result = functions.cos_cell(sheet, args[0].cell_str) else: result = functions.cos_entry(sheet, args[0].value, args[0].rowOrCol == "col") else: result = functions.cos_range(sheet, str(args[0].cell_str) + ":" + str(args[1].cell_str)) elif command_name == "SORT": # result = functions.sort(sheet, args[0]) if(args[0].rowOrCol == 'col'): result = functions.sort(sheet, (ord(args[0].value) - ord("A") + 1, 'asc')) # elif command_name == "FILTER_EVEN": # result = functions.filter_even(sheet, args[0]) # elif command_name == "FILTER_ODD": # result = functions.filter_odd(sheet, args[0]) elif command_name == "FILTER_PRIME": result = functions.filter_by_prime(sheet) # elif command_name == "MAX_VAL": # result = functions.max_val(sheet, args[0]) # elif command_name == "NORMALIZE": # result = functions.normalize(sheet, args[0], args[1]) query_parameters["<RES_" + str(i + 1) + ">"] = result
buttons.grid_columnconfigure(0, weight=1) buttons.grid_rowconfigure(0, weight=1) # Treeview container = ttk.Frame() container.pack(fill='both', expand=True) treeColumns = ("Item", "ID", "Price", "Available", "Checked out", "Description") tree = ttk.Treeview(columns=treeColumns, show="headings") for column in treeColumns: tree.heading(column, text=column, command=lambda c=column: functions.sort(tree, c, 0)) for i in treeColumns: tree.column(i, width=130, minwidth=30) vsb = ttk.Scrollbar(orient="vertical", command=tree.yview) # Vertical scroll bar hsb = ttk.Scrollbar(orient="horizontal", command=tree.xview) # Horizontal scroll bar tree.configure(yscrollcommand=vsb.set, xscrollcommand=hsb.set) vsb.grid(column=1, row=0, sticky='ns', in_=container) hsb.grid(column=0, row=1, sticky='ew', in_=container) tree.grid(column=0, row=0, sticky='nsew', in_=container)
from math import sqrt N = 1000 chars = ['avr', 'med', 'zr', 'zq', 'ztr'] arr = [10, 100, 1000] print("Лабораторная работа №2; Выполнила: Фомина Дарья\n") norm = dst.Normal(1, 0) print("/************************************************/\n") print("Normal distribution\n") for n in arr: print(f'n={n}') res = {} for i in range(5): res[i] = [0, 0] for i in range(N): s = f.sort(norm, n) z = f.avr(s) res[0][0] += z / N res[0][1] += z * z / N z = f.med(s) res[1][0] += z / N res[1][1] += z * z / N z = f.zr(s) res[2][0] += z / N res[2][1] += z * z / N z = f.zq(s) res[3][0] += z / N res[3][1] += z * z / N z = f.ztr(s) res[4][0] += z / N res[4][1] += z * z / N
userInput = input("Choose a index: ") index = int(userInput) # Get key by value if index in headers.values( ) and index not in nonNumericIndexes and index is not 0: index = int(userInput) for key, value in headers.items(): if value == index: header = key # Sort and show all data, functions in external file 'functions.py' dataset = copy.copy(data[index]) print(dataset) sortedData = functions.sort(data[index]) print(header) print(f"Average: \n{functions.average(dataset)}") _modus = functions.modus(dataset) print(f"Modus: \n{_modus[0]}: {_modus[1]} time(s)") print(f"Median: \n{functions.median(sortedData)}") print(f"Standard deviaton: \n{functions.standardDeviation(dataset)}") # Show histogram with confindence interval fig, ax = plt.subplots() binwidth = (max(dataset) - min(dataset)) / 40 plt.hist(dataset, bins=np.arange(min(dataset), max(dataset) + binwidth, binwidth))
def launch_functions_menu(): print("Functions Section - Enter the exercise number you want to run") selection = 0 while selection != 13: print("exercise #1") print("exercise #2") print("exercise #3") print("exercise #4") print("exercise #5") print("exercise #6") print("exercise #7") print("exercise #8") print("exercise #9") print("exercise #10") print("exercise #11") print("exercise #12") print("exit #13") selection = int(input("Insert Selection = ")) if selection == 1: number = int(input("Enter a number to see if it's prime ")) print(functions.is_prime(number)) elif selection == 2: number = int(input("Get all prime numbers smaller than ")) functions.prime_numbers_smaller_than(number) elif selection == 3: number = int(input("Fibonacci sequence with elements count = ")) functions.fibonacci_sequence(number) elif selection == 4: print("my_map function") functions.exercise_4() elif selection == 5: print("my_filter function") functions.exercise_5() elif selection == 6: print("my_reduce function") functions.exercise_6() elif selection == 7: number = int(input("Special Sum of numbers smaller than ")) print(functions.sum_of_special_numbers_smaller_than(number)) elif selection == 8: print("sort [4, 2, 3, 1] {0}".format(functions.sort([4, 2, 3, 1]))) elif selection == 9: print("Check if 1 in list [1, 4, 5] ") print(functions.element_in_list(1, [1, 4, 5])) elif selection == 10: print("call stateful function") print(functions.stateful_func()) print(functions.stateful_func()) print(functions.stateful_func()) print("----- new state -----") new_state = functions.State() print(functions.stateful_func(new_state)) print(functions.stateful_func(new_state)) print("----- old state -----") print(functions.stateful_func()) elif selection == 11: print("measure the duration of a function") print(functions.my_func()) elif selection == 12: print("Total execution time for multiple functions") print("----- Total Execution Time -------") print(functions.my_func()) print("----- Total Execution Time -------") print(functions.my_func2()) elif selection == 13: print("Exit")
def action_statistics(action_path, outputpath): cm_data_raw = pd.read_table(action_path, sep=',', encoding='utf-8') #对user action data 进行统计处理。 names = ['userid', 'actionType', 'actionTime'] newCust = pd.DataFrame(columns=[ "userid", 'totalstep', 'step1N', 'step2N', 'step3N', 'step4N', 'step5N', 'step6N', 'step7N', 'step8N', 'step9N', 'step1P', 'step2P', 'step3P', 'step4P', 'step5P', 'step6P', 'step7P', 'step8P', 'step9P', 'viewProductN', 'last1time', 'last2time', 'last3time', 'last4time', 'last5time', 'last6time', 'last7time', 'last8time', 'last9time', 'buy/viewProductN', 'buy/n5', '(2-4)/(1)', 'tail1', 'tail2', 'tail3', 'tail4', 'head1', 'maxtime', 'mintime', 'averagetime', 'vartime', 'mediantime', 'tailtime1', 'tailtime2', 'tailtime3', 'tailtime4', 'tail3ave', 'tail3var' ]) idlist = [] for i in range(0, len(cm_data_raw)): record = cm_data_raw.iloc[i] uid = record['userid'] if uid not in idlist: idlist.append(uid) udata = cm_data_raw[cm_data_raw['userid'] == uid] udata = sort(udata, ["actionType", "actionTime"], ascending=False) #总步数为 totalStep = 0 #记录每一步击中数量 n1 = 0 n2 = 0 n3 = 0 n4 = 0 n5 = 0 n6 = 0 n7 = 0 n8 = 0 n9 = 0 #记录每一步击中数量比例 p1 = 0 p2 = 0 p3 = 0 p4 = 0 p5 = 0 p6 = 0 p7 = 0 p8 = 0 p9 = 0 #第一次击中步骤时间标签。用于识别最近一次到达此步骤。 f1 = 0 f2 = 0 f3 = 0 f4 = 0 f5 = 0 f6 = 0 f7 = 0 f8 = 0 f9 = 0 #第一次击中某一步骤的时间 t1 = 0 t2 = 0 t3 = 0 t4 = 0 t5 = 0 t6 = 0 t7 = 0 t8 = 0 t9 = 0 #倒数几步为何总总type tail1 = 0 tail2 = 0 tail3 = 0 tail4 = 0 #第一步为何type head1 = 0 #时间间隔初始化 timeslot = [] times1 = 0 ptime = 0 ctime = 0 #时间间隔相关统计 maxtime = 0 mintime = 0 averagetime = 0 vartime = 0 mediantime = 0 #倒数实践间隔 tail3a = [] tailtime1 = 0 tailtime2 = 0 tailtime3 = 0 tailtime4 = 0 tail3ave = 0 tail3var = 0 #对一个用户行为数据信息进行统计 for j in range(0, len(udata)): oudata = udata.iloc[j] #倒数的type if j == 0: tail1 = oudata['actionType'] tailtime1 = oudata['actionTime'] if j == 1: tail2 = oudata['actionType'] tailtime2 = oudata['actionTime'] if j == 2: tail3 = oudata['actionType'] tailtime3 = oudata['actionTime'] if j == 3: tail4 = oudata['actionType'] tailtime4 = oudata['actionTime'] #每个行为每个用户使用过几次 if oudata['actionType'] == 1: n1 = n1 + 1 if f1 == 0: f1 = 2 t1 = oudata['actionTime'] if oudata['actionType'] == 2: n2 = n2 + 1 if f2 == 0: f2 = 2 t2 = oudata['actionTime'] if oudata['actionType'] == 3: n3 = n3 + 1 if f3 == 0: f3 = 2 t3 = oudata['actionTime'] if oudata['actionType'] == 4: n4 = n4 + 1 if f4 == 0: f4 = 2 t4 = oudata['actionTime'] if oudata['actionType'] == 5: n5 = n5 + 1 if f5 == 0: f5 = 2 t5 = oudata['actionTime'] if oudata['actionType'] == 9: n9 = n9 + 1 if f9 == 0: f9 = 2 t9 = oudata['actionTime'] if oudata['actionType'] == 8: n8 = n8 + 1 if f8 == 0: f8 = 2 t8 = oudata['actionTime'] if oudata['actionType'] == 7: n7 = n7 + 1 if f7 == 0: f7 = 2 t7 = oudata['actionTime'] if oudata['actionType'] == 6: n6 = n6 + 1 if f6 == 0: f6 = 2 t6 = oudata['actionTime'] if oudata['actionType'] == 5: n5 = n5 + 1 if f5 == 0: f5 = 2 t5 = oudata['actionTime'] head1 = oudata['actionType'] #进行时间间隔计算 if j != 0: ctime = oudata['actionTime'] times1 = ctime - ptime ptime = ctime timeslot.append(times1) else: ptime = oudata['actionTime'] if len(timeslot) != 0: maxtime = np.max(timeslot) mintime = np.min(timeslot) averagetime = np.average(timeslot) mediantime = np.median(timeslot) vartime = np.var(timeslot) tail3a = timeslot[-3:] tail3ave = np.average(tail3a) tail3var = np.var(tail3a) if n2 + n3 + n4 != 0: buy_viewProductN = n9 / (n2 + n3 + n4) elif n9 == 0: buy_viewProductN = 0 elif n9 != 0 and n2 + n3 + n4 == 0: buy_viewProductN = 1 if n5 != 0: buy_5 = n9 / n5 else: buy_5 = 0 if n2 + n3 + n4 != 0: p59_24 = (n5 + n6 + n7 + n8 + n9) / (n2 + n3 + n4) else: p59_24 = 1 if p59_24 > 1: p59_24 = 1 if n1 != 0: p24_1 = (n2 + n3 + n4) / n1 else: p24_1 = 0 totalStep = n1 + n2 + n3 + n4 + n5 + n6 + n7 + n8 + n9 p1 = n1 / totalStep p2 = n2 / totalStep p3 = n3 / totalStep p4 = n4 / totalStep p5 = n5 / totalStep p6 = n6 / totalStep p7 = n7 / totalStep p8 = n8 / totalStep p9 = n9 / totalStep finalud = { "userid": uid, 'totalStep': totalStep, 'step1N': n1, 'step2N': n2, 'step3N': n3, 'step4N': n4, 'step5N': n5, 'step6N': n6, 'step7N': n7, 'step8N': n8, 'step9N': n9, 'step1P': p1, 'step2P': p2, 'step3P': p3, 'step4P': p4, 'step5P': p5, 'step6P': p6, 'step7P': p7, 'step8P': p8, 'step9P': p9, 'viewProductN': n2 + n3 + n4, 'last1time': t1, 'last2time': t2, 'last3time': t3, 'last4time': t4, 'last5time': t5, 'last6time': t6, 'last7time': t7, 'last8time': t8, 'last9time': t9, 'buy/viewProductN': buy_viewProductN, 'buy/n5': buy_5, '(2-4)/(1)': p24_1, 'tail1': tail1, 'tail2': tail2, 'tail3': tail3, 'tail4': tail4, 'head1': head1, 'maxtime': maxtime, 'mintime': mintime, 'averagetime': averagetime, 'vartime': vartime, 'mediantime': mediantime, 'tailtime1': tailtime1, 'tailtime2': tailtime2, 'tailtime3': tailtime3, 'tailtime4': tailtime4, 'tail3ave': tail3ave, 'tail3var': tail3var } newCust = newCust.append(finalud, ignore_index=True) newCust.to_csv(outputpath) return
def action_statistics(action_path,outputpath): cm_data_raw=pd.read_table(action_path,sep=',',encoding='utf-8') #对user action data 进行统计处理。 names=['userid','actionType','actionTime'] newCust=pd.DataFrame(columns=["userid",'step1N','step2N','step3N','step4N', 'step5N','step6N','step7N','step8N', 'step9N','viewProductN','last1time','last2time','last3time','last4time', 'last5time','last6time','last7time','last8time','last9time','buy/viewProductN', 'buy/n5','(9)/(1)','(2-4)/(1)']) idlist=[] for i in range(0,len(cm_data_raw)): record=cm_data_raw.iloc[i] uid=record['userid'] if uid not in idlist: idlist.append(uid) udata=cm_data_raw[cm_data_raw['userid']==uid] udata=sort(udata,["actionType","actionTime"],ascending=False) #记录每一步击中数量 n1=0 n2=0 n3=0 n4=0 n5=0 n6=0 n7=0 n8=0 n9=0 #第一次击中步骤时间标签。用于识别最近一次到达此步骤。 f1=0 f2=0 f3=0 f4=0 f5=0 f6=0 f7=0 f8=0 f9=0 #第一次击中某一步骤的时间 t1=0 t2=0 t3=0 t4=0 t5=0 t6=0 t7=0 t8=0 t9=0 #对一个用户行为数据信息进行统计 for j in range(0,len(udata)): oudata=udata.iloc[j] #每个行为每个用户使用过几次 if oudata['actionType']==1: n1=n1+1 if f1==0: f1=2 t1=oudata['actionTime'] if oudata['actionType']==2: n2=n2+1 if f2==0: f2=2 t2=oudata['actionTime'] if oudata['actionType']==3: n3=n3+1 if f3==0: f3=2 t3=oudata['actionTime'] if oudata['actionType']==4: n4=n4+1 if f4==0: f4=2 t4=oudata['actionTime'] if oudata['actionType']==5: n5=n5+1 if f5==0: f5=2 t5=oudata['actionTime'] if oudata['actionType']==9: n9=n9+1 if f9==0: f9=2 t9=oudata['actionTime'] if oudata['actionType']==8: n8=n8+1 if f8==0: f8=2 t8=oudata['actionTime'] if oudata['actionType']==7: n7=n7+1 if f7==0: f7=2 t7=oudata['actionTime'] if oudata['actionType']==6: n6=n6+1 if f6==0: f6=2 t6=oudata['actionTime'] if oudata['actionType']==5: n5=n5+1 if f5==0: f5=2 t5=oudata['actionTime'] #因为数据有缺失,如果5-9有缺失就用下一步数量补充。 if n8<n9: n8=n9 t8=t9 if n7<n8: n7=n8 t7=t8 if n6<n7: n6=n7 t6=t7 if n5<n6: n5=n6 t5=t6 if n2+n3+n4 !=0: buy_viewProductN=n9/(n2+n3+n4) elif n9==0: buy_viewProductN=0 elif n9!=0 and n2+n3+n4 ==0: buy_viewProductN=1 if n5 !=0: buy_5=n9/n5 else: buy_5=0 if n2+n3+n4 !=0: p59_24=(n5+n6+n7+n8+n9)/(n2+n3+n4) else: p59_24=1 if p59_24>1: p59_24=1 if n1!=0: p9_1=n9/n1 else: p9_1=0 if n1!=0: p24_1=(n2+n3+n4)/n1 else: p24_1=0 finalud={"userid":uid,'step1N':n1,'step2N':n2,'step3N':n3,'step4N':n4, 'step5N':n5,'step6N':n6,'step7N':n7,'step8N':n8, 'step9N':n9,'viewProductN':n2+n3+n4,'last1time':t1,'last2time':t2,'last3time':t3,'last4time':t4, 'last5time':t5,'last6time':t6,'last7time':t7,'last8time':t8,'last9time':t9,'buy/viewProductN':buy_viewProductN, 'buy/n5':buy_5,'(9)/(1)':p9_1,'(2-4)/(1)':p24_1} newCust=newCust.append(finalud,ignore_index=True) newCust.to_csv(outputpath) return
def up_oh_statistics(up_path, oh_path, outputpath): up_op_data = datatonumber(up_path, oh_path) up_data = up_op_data[1] oh_data = up_op_data[0] #对orderhistory data 进行统计处理。 names = [ 'userid', 'orderid', 'orderTime', 'orderType', 'contientN', 'cityN', 'countryN' ] prev_cus_id = '0' newCust = pd.DataFrame(columns=[ "userid", 'nearestOrderTime', 'nearestCity', 'nearestCountry', 'nearestContinent', 'totalOrder', 'numberOftype0ne', 'typeOnePer', 'numberofCity', 'numberofCountry', 'NumberofU', 'NumberofNA', 'NumberofA', 'NumberofO', 'NumberofAF', 'NumberofSA' ]) idlist = [] for i in range(0, len(oh_data)): record = oh_data.iloc[i] uid = record['userid'] if uid not in idlist: idlist.append(uid) udata = oh_data[oh_data['userid'] == uid] udata = sort(udata, ["orderTime", "orderType"], ascending=False) prv_time = [] totalOrder = 0 numberOftype0ne = 0 numberofCity = 0 numberofCountry = 0 NumberofU = 0 NumberofA = 0 NumberofNA = 0 NumberofSA = 0 NumberofAF = 0 NumberofO = 0 cityList = [] countryList = [] #对一个用户历史信息进行统计 for j in range(0, len(udata)): oudata = udata.iloc[j] odtime = oudata['orderTime'] ot = oudata['orderType'] ct = oudata['cityN'] coun = oudata['countryN'] cn = oudata['continentN'] #最近一次去过的城市,国家,以及大洲,因为之前userdata 按照时间大小排序,所以第一个就是最近的订单 if j == 0: nearestCity = ct nearestCou = coun nearestCn = cn nearestOd = odtime if odtime not in prv_time: prv_time.append(odtime) #总订单数,同一时间订单视为父子订单只计一个订单 totalOrder = totalOrder + 1 #精品游订单数量统计 if ot == 1: numberOftype0ne = numberOftype0ne + 1 #去过的城市列表,用于计算去过几个城市 if ct not in cityList: cityList.append(ct) #去过的国家列表,用于计算去过几个国家 if coun not in countryList: countryList.append(coun) #去过各个大洲几次 if cn == 'U': NumberofU = NumberofU + 1 if cn == 'NA': NumberofNA = NumberofNA + 1 if cn == 'A': NumberofA = NumberofA + 1 if cn == 'O': Numberof0 = NumberofO + 1 if cn == 'AF': NumberofAF = NumberofAF + 1 if cn == 'SA': NumberofSA = NumberofSA + 1 finalud = { 'userid': uid, 'nearestOrderTime': nearestOd, 'nearestCity': nearestCity, 'nearestCountry': nearestCou, 'nearestContinent': nearestCn, 'totalOrder': totalOrder, 'numberOftype0ne': numberOftype0ne, 'typeOnePer': numberOftype0ne / totalOrder, 'numberofCity': len(cityList), 'numberofCountry': len(countryList), 'NumberofU': NumberofU, 'NumberofNA': NumberofNA, 'NumberofA': NumberofA, 'NumberofO': NumberofO, 'NumberofAF': NumberofAF, 'NumberofSA': NumberofSA } newCust = newCust.append(finalud, ignore_index=True) #用order history 统计好的表left join user profile表得出一个新表并输出 up_oh_data = pd.merge(up_data, newCust, how='outer', on='userid') data_1 = up_oh_data[[ 'genderN', 'provinceN', 'nearestCity', 'nearestCountry', 'nearestContinent' ]] data_2 = pd.get_dummies(data_1) result_1 = pd.concat([up_oh_data, data_2], axis=1) result_1.drop([ 'genderN', 'provinceN', 'nearestCity', 'nearestCountry', 'nearestContinent' ], axis=1, inplace=True) result_1.to_csv(outputpath) return
def action_statistics(action_path, outputpath): cm_data_raw = pd.read_table(action_path, sep=',', encoding='utf-8') #对user action data 进行统计处理。 names = ['userid', 'actionType', 'actionTime'] newCust = pd.DataFrame(columns=[ "userid", 'totalstep', 'step1N', 'step2N', 'step3N', 'step4N', 'step5N', 'step6N', 'step7N', 'step8N', 'step9N', 'step1P', 'step2P', 'step3P', 'step4P', 'step5P', 'step6P', 'step7P', 'step8P', 'step9P', 'viewProductN', 'last1time', 'last2time', 'last3time', 'last4time', 'last5time', 'last6time', 'last7time', 'last8time', 'last9time', 'buy/viewProductN', 'buy/n5', '(2-4)/(1)', 'tail1', 'tail2', 'tail3', 'tail4', 'head1', 'maxtime', 'mintime', 'averagetime', 'vartime', 'mediantime', 'tailtime1', 'tailtime2', 'tailtime3', 'tailtime4', 'tail3ave', 'tail3var', 'near1', 'near2', 'near3', 'near4', 'near5', 'near6', 'near7', 'nsar8', 'near9', 't1a', 't1v', 't1mini', 't1max', 't2a', 't2v', 't2mini', 't2max', 't3a', 't3v', 't3mini', 't3max', 't4a', 't4v', 't4mini', 't4max', 't5a', 't5v', 't5mini', 't5max', 't6a', 't6v', 't6mini', 't6max', 't7a', 't7v', 't7mini', 't7max', 't8a', 't8v', 't8mini', 't8max', 't9a', 't9v', 't9mini', 't9max', 't9av' ]) idlist = [] for i in range(0, len(cm_data_raw)): record = cm_data_raw.iloc[i] uid = record['userid'] if uid not in idlist: idlist.append(uid) udata = cm_data_raw[cm_data_raw['userid'] == uid] udata = sort(udata, ["actionTime", "actionType"], ascending=False) #总步数为 totalStep = 0 #记录每一步击中数量 n1 = 0 n2 = 0 n3 = 0 n4 = 0 n5 = 0 n6 = 0 n7 = 0 n8 = 0 n9 = 0 #记录每一步击中数量比例 p1 = 0 p2 = 0 p3 = 0 p4 = 0 p5 = 0 p6 = 0 p7 = 0 p8 = 0 p9 = 0 #第一次击中步骤时间标签。用于识别最近一次到达此步骤。 f1 = 0 f2 = 0 f3 = 0 f4 = 0 f5 = 0 f6 = 0 f7 = 0 f8 = 0 f9 = 0 #第一次击中某一步骤的时间 t1 = 0 t2 = 0 t3 = 0 t4 = 0 t5 = 0 t6 = 0 t7 = 0 t8 = 0 t9 = 0 #倒数几步为何总总type tail1 = 0 tail2 = 0 tail3 = 0 tail4 = 0 #第一步为何type head1 = 0 #时间间隔初始化 timeslot = [] times1 = 0 ptime = 0 ctime = 0 #时间间隔相关统计 maxtime = 0 mintime = 0 averagetime = 0 vartime = 0 mediantime = 0 #倒数实践间隔 tail3a = [] tailtime1 = 0 tailtime2 = 0 tailtime3 = 0 tailtime4 = 0 tail3ave = 0 tail3var = 0 #距离每个行为最近的行为和时间 near1 = 0 near2 = 0 near3 = 0 near4 = 0 near5 = 0 near6 = 0 near7 = 0 near8 = 0 near9 = 0 #距离各个节点最近时间间隔 ne1 = [] ne2 = [] ne3 = [] ne4 = [] ne5 = [] ne6 = [] ne7 = [] ne8 = [] ne9 = [] #对一个用户行为数据信息进行统计 for j in range(0, len(udata)): oudata = udata.iloc[j] #倒数的type if j == 0: tail1 = oudata['actionType'] tailtime1 = oudata['actionTime'] if j == 1: tail2 = oudata['actionType'] tailtime2 = oudata['actionTime'] if j == 2: tail3 = oudata['actionType'] tailtime3 = oudata['actionTime'] if j == 3: tail4 = oudata['actionType'] tailtime4 = oudata['actionTime'] #每个行为每个用户使用过几次 if oudata['actionType'] == 1: n1 = n1 + 1 if f1 == 0: f1 = 2 t1 = oudata['actionTime'] near1 = j if j > 0: for i in range(1, j): ne1.append(udata.iloc[i]['actionTime'] - udata.iloc[i - 1]['actionTime']) if oudata['actionType'] == 2: n2 = n2 + 1 if f2 == 0: f2 = 2 t2 = oudata['actionTime'] near2 = j if j > 0: for i in range(1, j): ne2.append(udata.iloc[i]['actionTime'] - udata.iloc[i - 1]['actionTime']) if oudata['actionType'] == 3: n3 = n3 + 1 if f3 == 0: f3 = 2 t3 = oudata['actionTime'] near3 = j if j > 0: for i in range(1, j): ne3.append(udata.iloc[i]['actionTime'] - udata.iloc[i - 1]['actionTime']) if oudata['actionType'] == 4: n4 = n4 + 1 if f4 == 0: f4 = 2 t4 = oudata['actionTime'] near4 = j if j > 0: for i in range(1, j): ne4.append(udata.iloc[i]['actionTime'] - udata.iloc[i - 1]['actionTime']) if oudata['actionType'] == 5: n5 = n5 + 1 if f5 == 0: f5 = 2 t5 = oudata['actionTime'] near5 = j if j > 0: for i in range(1, j): ne5.append(udata.iloc[i]['actionTime'] - udata.iloc[i - 1]['actionTime']) if oudata['actionType'] == 9: n9 = n9 + 1 if f9 == 0: f9 = 2 t9 = oudata['actionTime'] near9 = j if j > 0: for i in range(1, j): ne9.append(udata.iloc[i]['actionTime'] - udata.iloc[i - 1]['actionTime']) if oudata['actionType'] == 8: n8 = n8 + 1 if f8 == 0: f8 = 2 t8 = oudata['actionTime'] near8 = j if j > 0: for i in range(1, j): ne8.append(udata.iloc[i]['actionTime'] - udata.iloc[i - 1]['actionTime']) if oudata['actionType'] == 7: n7 = n7 + 1 if f7 == 0: f7 = 2 t7 = oudata['actionTime'] near7 = j if j > 0: for i in range(1, j): ne7.append(udata.iloc[i]['actionTime'] - udata.iloc[i - 1]['actionTime']) if oudata['actionType'] == 6: n6 = n6 + 1 if f6 == 0: f6 = 2 t6 = oudata['actionTime'] near6 = j if j > 0: for i in range(1, j): ne6.append(udata.iloc[i]['actionTime'] - udata.iloc[i - 1]['actionTime']) if oudata['actionType'] == 5: n5 = n5 + 1 if f5 == 0: f5 = 2 t5 = oudata['actionTime'] near5 = j if j > 0: for i in range(1, j): ne7.append(udata.iloc[i]['actionTime'] - udata.iloc[i - 1]['actionTime']) head1 = oudata['actionType'] #进行时间间隔计算 if j != 0: ctime = oudata['actionTime'] times1 = ctime - ptime ptime = ctime timeslot.append(times1) else: ptime = oudata['actionTime'] if len(timeslot) != 0: maxtime = np.max(timeslot) mintime = np.min(timeslot) averagetime = np.average(timeslot) mediantime = np.median(timeslot) vartime = np.var(timeslot) tail3a = timeslot[-3:] tail3ave = np.average(tail3a) tail3var = np.var(tail3a) if n2 + n3 + n4 != 0: buy_viewProductN = n9 / (n2 + n3 + n4) elif n9 == 0: buy_viewProductN = 0 elif n9 != 0 and n2 + n3 + n4 == 0: buy_viewProductN = 1 if n5 != 0: buy_5 = n9 / n5 else: buy_5 = 0 if n2 + n3 + n4 != 0: p59_24 = (n5 + n6 + n7 + n8 + n9) / (n2 + n3 + n4) else: p59_24 = 1 if p59_24 > 1: p59_24 = 1 if n1 != 0: p24_1 = (n2 + n3 + n4) / n1 else: p24_1 = 0 totalStep = n1 + n2 + n3 + n4 + n5 + n6 + n7 + n8 + n9 p1 = n1 / totalStep p2 = n2 / totalStep p3 = n3 / totalStep p4 = n4 / totalStep p5 = n5 / totalStep p6 = n6 / totalStep p7 = n7 / totalStep p8 = n8 / totalStep p9 = n9 / totalStep t8a = 0 t8v = 0 t8min = 0 t8max = 0 t7a = 0 t7v = 0 t7min = 0 t7max = 0 t6a = 0 t6v = 0 t6min = 0 t6max = 0 t5a = 0 t5v = 0 t5min = 0 t5max = 0 t4a = 0 t4v = 0 t4min = 0 t4max = 0 t3a = 0 t3v = 0 t3min = 0 t3max = 0 t2a = 0 t2v = 0 t2min = 0 t2max = 0 t1a = 0 t1v = 0 t1min = 0 t1max = 0 t9a = 0 t9v = 0 t9min = 0 t9max = 0 t9av = 0 if len(ne9) != 0: t9a = np.average(ne9) t9v = np.var(ne9) t9min = np.min(ne9) t9max = np.max(ne9) t9av = t9a * t9v if len(ne8) != 0: t8a = np.average(ne8) t8v = np.var(ne8) t8min = np.min(ne8) t8max = np.max(ne8) if len(ne7) != 0: t7a = np.average(ne7) t7v = np.var(ne7) t7min = np.min(ne7) t7max = np.max(ne7) if len(ne6) != 0: t6a = np.average(ne6) t6v = np.var(ne6) t6min = np.min(ne6) t6max = np.max(ne6) if len(ne5) != 0: t5a = np.average(ne5) t5v = np.var(ne5) t5min = np.min(ne5) t5max = np.max(ne5) if len(ne4) != 0: t4a = np.average(ne4) t4v = np.var(ne4) t4min = np.min(ne4) t4max = np.max(ne4) if len(ne3) != 0: t3a = np.average(ne3) t3v = np.var(ne3) t3min = np.min(ne3) t3max = np.max(ne3) if len(ne2) != 0: t2a = np.average(ne2) t2v = np.var(ne2) t2min = np.min(ne2) t2max = np.max(ne2) if len(ne1) != 0: t1a = np.average(ne1) t1v = np.var(ne1) t1min = np.min(ne1) t1max = np.max(ne1) finalud = { "userid": uid, 'totalStep': totalStep, 'step1N': n1, 'step2N': n2, 'step3N': n3, 'step4N': n4, 'step5N': n5, 'step6N': n6, 'step7N': n7, 'step8N': n8, 'step9N': n9, 'step1P': p1, 'step2P': p2, 'step3P': p3, 'step4P': p4, 'step5P': p5, 'step6P': p6, 'step7P': p7, 'step8P': p8, 'step9P': p9, 'viewProductN': n2 + n3 + n4, 'last1time': t1, 'last2time': t2, 'last3time': t3, 'last4time': t4, 'last5time': t5, 'last6time': t6, 'last7time': t7, 'last8time': t8, 'last9time': t9, 'buy/viewProductN': buy_viewProductN, 'buy/n5': buy_5, '(2-4)/(1)': p24_1, 'tail1': tail1, 'tail2': tail2, 'tail3': tail3, 'tail4': tail4, 'head1': head1, 'maxtime': maxtime, 'mintime': mintime, 'averagetime': averagetime, 'vartime': vartime, 'mediantime': mediantime, 'tailtime1': tailtime1, 'tailtime2': tailtime2, 'tailtime3': tailtime3, 'tailtime4': tailtime4, 'tail3ave': tail3ave, 'tail3var': tail3var, 't9a': t9a, 't9v': t9v, 't9mini': t9min, 't9max': t9max, 't9av': t9av, 't8a': t8a, 't8v': t8v, 't8mini': t8min, 't8max': t8max, 't7a': t7a, 't7v': t7v, 't7mini': t7min, 't7max': t7max, 't6a': t6a, 't6v': t6v, 't6mini': t6min, 't6max': t6max, 't5a': t5a, 't5v': t5v, 't5mini': t5min, 't5max': t5max, 't4a': t4a, 't4v': t4v, 't4mini': t4min, 't4max': t4max, 't3a': t3a, 't3v': t3v, 't3mini': t3min, 't3max': t3max, 't2a': t2a, 't2v': t2v, 't2mini': t2min, 't2max': t2max, 't1a': t1a, 't1v': t1v, 't1mini': t1min, 't1max': t1max } newCust = newCust.append(finalud, ignore_index=True) newCust.to_csv(outputpath) return
def rating_stantistics(inputpath, outputpath): cm_data_raw = pd.read_table(inputpath, sep=',', encoding='utf-8') #对user action data 进行统计处理。 newCust = pd.DataFrame(columns=[ "userid", 'totalrate', 'totalnumber', 'averate', 'numberof1', 'numberof2', 'numberof3', 'numberof367', 'numberof433', 'numberof4', 'numberof5', 'lowrate', 'highrate' ]) idlist = [] for i in range(0, len(cm_data_raw)): record = cm_data_raw.iloc[i] uid = record['userid'] if uid not in idlist: idlist.append(uid) udata = cm_data_raw[cm_data_raw['userid'] == uid] udata = sort(udata, ["orderid"], ascending=False) #记录总分数 totalrate = 0 totalnumber = 0 averate = 0 numberof5 = 0 numberof1 = 0 numberof2 = 0 numberof3 = 0 numberof367 = 0 numberof433 = 0 numberof4 = 0 #3分及其以下为low rate,求其数量 lowrate = 0 #4分及其以上为高分 highrate = 0 #对一个用户评分信息进行统计 for j in range(0, len(udata)): oudata = udata.iloc[j] totalrate = totalrate + oudata['rating'] totalnumber = totalnumber + 1 if j == 0: nr = oudata['rating'] #统计用户评分 if oudata['rating'] == 1: numberof1 = numberof1 + 1 lowrate = lowrate + 1 if oudata['rating'] == 2: numberof2 = numberof2 + 1 lowrate = lowrate + 1 if oudata['rating'] == 3: numberof3 = numberof3 + 1 lowrate = lowrate + 1 if oudata['rating'] == 3.67: numberof367 = numberof367 + 1 if oudata['rating'] == 4.33: numberof367 = numberof433 + 1 if oudata['rating'] == 4: numberof4 = numberof4 + 1 if oudata['rating'] == 5: numberof5 = numberof5 + 1 averate = totalrate / totalnumber lowrate = numberof1 + numberof2 + numberof3 highrate = numberof4 + numberof5 + numberof433 finalud = { "userid": uid, 'totalrate': totalrate, 'totalnumber': totalnumber, 'averate': averate, 'numberof1': numberof1, 'numberof2': numberof2, 'numberof3': numberof3, 'numberof367': numberof367, 'numberof433': numberof433, 'numberof4': numberof4, 'numberof5': numberof5, 'lowrate': lowrate, 'highrate': highrate } newCust = newCust.append(finalud, ignore_index=True) newCust.to_csv(outputpath) return
# Generates list of files in path csv_files = os.listdir(path) # Define variables frequency = functions.namesList(csv_files) # Independent variable rangeList1 = [] # Going to be sorted from first to last rangeList2 = [] # Going to be sorted from first to last meanList1 = [] # Going to be sorted from first to last meanList2 = [] # Going to be sorted from first to last #print(frequency) #print(csv_files) # Take the directory and append the ranges from their respective columns for file in functions.sort(csv_files): analog1Range = functions.getRange( functions.convertToFloat(functions.columnList(path + file, 0))) analog2Range = functions.getRange( functions.convertToFloat(functions.columnList(path + file, 1))) rangeList1.append(analog1Range) rangeList2.append(analog2Range) analog1Mean = functions.mean( functions.convertToFloat(functions.columnList(path + file, 0))) analog2Mean = functions.mean( functions.convertToFloat(functions.columnList(path + file, 1))) meanList1.append(analog1Mean) meanList2.append(analog2Mean) print("File: " + file) #print(functions.sort(csv_files))