示例#1
0
def get_p_window_given_task(tasks, windows, window_names, unique_tasks=TASKS):
    """
    Similar to the above, 1D case relating window titles. This function is called 
    when exe name and window title are judged to be independent.
    
    Here, we get the conditional probability of observing the window title, given 
    the task.
    
    For the same reason as above, we compute the conditional probability for 
    each possible value of task.
    """ 

    p_window_given_task = dtools.init_dic_matrix(unique_tasks, window_names) # + class for unknown titles
    # first unknown titles and known tasks
    for task in unique_tasks:
        p_window_given_task[task][''] = np.sum(np.logical_and(
                windows == '',
                tasks == task))
    p_window_given_task[task] = dtools.normalize(p_window_given_task[task])

    # then known titles and known tasks
    for w in window_names:
        for task in unique_tasks:
            temp_window = windows == w

            temp_task = tasks == task
            
            p_window_given_task[task][w] = np.sum(np.logical_and(temp_window, temp_task)) 
        p_window_given_task[task] = dtools.normalize(p_window_given_task[task])

    return p_window_given_task
示例#2
0
def get_p_rclicks_given_task(tasks, rclicks, n_clicks, unique_tasks=TASKS):
    """
    Get the conditional probability of observing rclicks many right clicks,
    given the task.
    
    For the same reason as above, we compute the conditional probability for 
    each possible value of task.
    """        

    p_rclicks_given_task = dtools.init_dic_matrix(unique_tasks, np.arange(n_clicks))
    for task in unique_tasks:
        for i in range(n_clicks):
            p_rclicks_given_task [task][i] = np.sum(
                    np.logical_and(
                            rclicks == i, 
                            tasks == task)) 
        p_rclicks_given_task [task] = dtools.normalize(p_rclicks_given_task[task])
    

    return p_rclicks_given_task 
示例#3
0
def computeCramers(data1, dataName1, data2, dataName2):
    """
    Correlation between data1 and data2 (in our case a task and a descriptor)
    
    cramersV is symmetric so, it does not matter which is first variable 
    (dimension) and which is second
    """
    d1 = np.unique(data1)
    d2 = np.unique(data2)
    crosstab = dtools.init_dic_matrix(d1, d2)
    for val1 in d1:
        for val2 in d2:
            crosstab[val1][val2] = np.sum(
                np.multiply(data1 == val1, data2 == val2))

    (x_sq, v) = cramersV(crosstab)
    (x_sq_cor, v_cor) = cramersV_bias_corrected(crosstab)

    print('******************************')
    print('Correlation between {} and {}\n'.format(dataName1, dataName2))
    print('Cramers V: {:.3f}'.format(v))
    print('Cramers V: {:.3f} (bias corrected)\n'.format(v_cor))
示例#4
0
def get_p_keyst_given_task(tasks, keystrokes_quan, n_tot_keyst, unique_tasks=TASKS):
    """
    This function returns the conditional probability of observing keystrokes_quan 
    quantiles of keystrokes, given the task.
    
    Since we are trying to estimate the task, at this point it is still unkonwn 
    and therefore we compute the conditional probability for each possible value
    of task.
    """

    p_keyst_given_task = dtools.init_dic_matrix(unique_tasks, np.arange(n_tot_keyst))
    for task in unique_tasks:
        for i in range(n_tot_keyst):
            p_keyst_given_task[task][i] = np.sum(
                    np.logical_and(
                            keystrokes_quan == i, 
                            tasks == task)) 
            
        p_keyst_given_task[task] = dtools.normalize(p_keyst_given_task[task])
    

    return p_keyst_given_task
示例#5
0
def get_p_duration_given_task(tasks, duration, unique_tasks=TASKS):
    """
    This function is very similar the 1D case relating exe name or window title. 
    
    Here, we get the conditional probability of observing the duration value, given 
    the task.
    
    For the same reason as above, we compute the conditional probability for 
    each possible value of task.
    """
    n_bins = len(np.unique(duration))
    p_duration_given_task = dtools.init_dic_matrix(unique_tasks, np.arange(n_bins))

    for task in unique_tasks:
        for i in range(n_bins+1):
            p_duration_given_task[task][i] = np.sum(
                    np.logical_and(
                            duration == i, 
                            tasks == task)) 
        p_duration_given_task[task] = dtools.normalize(p_duration_given_task[task])
    

    return p_duration_given_task
示例#6
0
def get_p_exe_given_task(tasks, exes, exe_names, unique_tasks=TASKS):
    """
    This is 1D case relating application (i.e. exe) names. Namely, in case exe 
    name and window title are judged to be independent, we treat each of them 
    individualy (i.e. as 1D).
        
    Here, we get the conditional probability of observing the exe name, given 
    the task.
    
    For the same reason as above, we compute the conditional probability for 
    each possible value of task.
    """ 
    p_exe_given_task = dtools.init_dic_matrix(unique_tasks, exe_names)

    for e in exe_names:
        for task in unique_tasks:
            p_exe_given_task[task][e] = np.sum(
                    np.logical_and(
                            exes == e, 
                            tasks == task))

        p_exe_given_task[task] = dtools.normalize(p_exe_given_task[task])
    
    return p_exe_given_task 
    n_task = params.N_TASKS  # I include '0' for unknown task (not labeled by Shimizu)
    all_exe = (np.unique(exe_code_mat))  # number of all exe
    all_title = (
        np.unique(title_code_mat)
    )  # all window titles used in the Shimizu's rules and +1 for the titles which do  not include keyword from his rules
    n_all_time = 2  # for lunch break and not-lunch break
    """    
    Correlation between exe and task
    CramersV is symmetric (so it does not matter which one comes first)
    """
    cV = []
    for exe in all_exe:
        """
        Two rows in cross tabulation:  one for exeE positive, other for exeE negative
        """
        crosstab_exeE_vs_task = dtools.init_dic_matrix(
            ['Positive', 'Negative'])
        tempE = (exe_code_mat == exe)
        for task in params.TASKS:
            # exeE negative
            crosstab_exeE_vs_task['Negative'][task] = np.sum( np.logical_and(tempE == False, \
                                 task_code_mat == task))
            # exeE positive
            crosstab_exeE_vs_task['Positive'][task] = np.sum( np.logical_and(tempE == True, \
                                 task_code_mat == task))

        (x_sq_task_vs_exe,
         v_task_vs_exe) = Vtools.cramersV(crosstab_exeE_vs_task)
        (x_sq_task_vs_exe_cor, v_task_vs_exe_cor
         ) = Vtools.cramersV_bias_corrected(crosstab_exeE_vs_task)

        cV.append(v_task_vs_exe)