示例#1
0
        def save_key_attributes():
            if len(event_log) > 0:
                selected_caseid = caseid_value_chosen.get()
                selected_eventid = eventid_value_chosen.get()
                selected_activity = activity_value_chosen.get()
                selected_timestamp = timestamp_value_chosen.get()

                global extracted_data  #event_log only with key attributes
                if selected_eventid=="-- no attribute --" :
                    extracted_data = event_log[
                        [selected_caseid, selected_activity, selected_timestamp]]
                    extracted_data["Event"] = list(range(0,len(event_log.index)))
                    cols = extracted_data.columns.tolist()
                    cols = cols[:1]+ cols[-1:] + cols[1:3]
                    extracted_data = extracted_data[cols]

                if selected_eventid!="-- no attribute --" :
                    extracted_data = event_log[[selected_caseid, selected_eventid, selected_activity, selected_timestamp]]

                extracted_data.columns = \
                    ["Case", "Event", "Activity", "Timestamp"]
                extracted_data = extracted_data.dropna(subset=['Case'])
                PageTwo.firstpreprocess = extracted_data
                extracted_data = extracted_data.sort_values(["Case", "Timestamp"],
                                                            ascending=[True, True])


                form_dict = {"2020-01-02 03:04:05.006":"%Y-%m-%d %H:%M:%S.%f",
                             "2020-01-02 03:04:05":"%Y-%m-%d %H:%M:%S",
                            "2020-01-02 03:04:05.006 PM":"%Y-%m-%d %I:%M:%S.%f %p",
                            "2020-01-02 03:04:05 PM":"%Y-%m-%d %I:%M:%S %p",
                            "20-01-02 03:04:05.006":"%y-%m-%d %H:%M:%S.%f",
                            "20-01-02 03:04:05":"%y-%m-%d %H:%M:%S",
                            "20-01-02 03:04:05.006 PM":"%y-%m-%d %I:%M:%S.%f %p",
                            "20-01-02 03:04:05 PM":"%y-%m-%d %I:%M:%S %p",
                             "2020/01/02 03:04:05.006": "%Y/%m/%d %H:%M:%S.%f",
                             "2020/01/02 03:04:05": "%Y/%m/%d %H:%M:%S",
                             "2020/01/02 03:04:05.006 PM": "%Y/%m/%d %I:%M:%S.%f %p",
                             "2020/01/02 03:04:05 PM": "%Y/%m/%d %I:%M:%S %p",
                             "20/01/02 03:04:05.006": "%y/%m/%d %H:%M:%S.%f",
                             "20/01/02 03:04:05": "%y/%m/%d %H:%M:%S",
                             "20/01/02 03:04:05.006 PM": "%y/%m/%d %I:%M:%S.%f %p",
                             "20/01/02 03:04:05 PM": "%y/%m/%d %I:%M:%S %p"
                             }

                if timeform_value_chosen.get() not in form_dict.keys() :
                    form = timeform_value_chosen.get()
                    if iserror(dt.strptime, extracted_data['Timestamp'][0], form):
                        if "\"" in form or "\'" in form:
                            words = ["Type timestamp format without", "\'", "or", "\""]
                            words = " ".join(words)
                            messagebox.showinfo("Error", words)
                        else: messagebox.showinfo("Error", "Wrong input for timestamp format: use format in 'datetime' packages. (ex: %Y-%m-%d %H:%M:%S)")
                    else:
                        time = extracted_data['Timestamp'].apply(lambda x: dt.strptime(x, form))
                        unixtime = time.apply(lambda x: (x - dt(1970, 1, 1)).total_seconds())
                        extracted_data['Timestamp'] = time
                        extracted_data['unixtime'] = unixtime
                        PageTwo.extracted_data = extracted_data
                        messagebox.showinfo("Message", "Loaded")
                        tk.Frame.grid_forget()
                        parent.show_frame1()


                if timeform_value_chosen.get() in form_dict.keys():
                    form = form_dict[timeform_value_chosen.get()]
                    if iserror(dt.strptime, extracted_data['Timestamp'][0], form):
                        messagebox.showinfo("Error",
                                            "{0} is not matched with {1}".format(extracted_data['Timestamp'][0], form))
                    else:
                        time = extracted_data['Timestamp'].apply(lambda x: dt.strptime(x, form))
                        unixtime = time.apply(lambda x: (x - dt(1970, 1, 1)).total_seconds())
                        extracted_data['Timestamp'] = time
                        extracted_data['unixtime'] = unixtime
                        PageTwo.extracted_data = extracted_data
                        messagebox.showinfo("Message", "Loaded")
                        parent.show_frame1()

            else:
                messagebox.showinfo("Error", "Error")
        s = '00'
    bp = h + ':' + m + ':' + s
    return bp


train = pd.read_csv("train.csv")
mp = pd.read_csv("train.csv")["duration"]
time = mp.apply(checki)


def func_sec(time_string):
    h, m, s = time_string.split(':')
    return int(h) * 3600 + int(m) * 60 + int(s)


time1 = time.apply(func_sec)

data_train["duration"] = time1
data_train.head()

# Visualization # Individual Plots
plt.hist(data_train["category"])
plt.show()
plt.plot(data_train["adview"])
plt.show()

# Remove videos with adview greater than 2000000 as outlier
data_train = data_train[data_train["adview"] < 2000000]

# Heatmap
import seaborn as sns
示例#3
0
def convert_time(time):
	time = time.apply(_parse_time)
	time = time.apply(_convert_delta_to_hours,args=(time[0],))
	#time = time.round(2)
	return time