Пример #1
0
 def extract(self, session, info, meta):
     features = OrderedDict()
     
     if info.should_include:
         total_feedback = self.pos_feedback + self.neg_feedback + self.other_feedback
         features['pos_feedback'] = self.pos_feedback
         features['neg_feedback'] = self.neg_feedback
         features['other_feedback'] = self.other_feedback
         features['pos_neg_ratio'] = U.savediv(self.pos_feedback, self.neg_feedback, 9999)
         features['pos_all_ratio'] = U.savediv(self.pos_feedback, total_feedback, 9999)
         features['total_feedback'] = total_feedback
         features.update(U.prefix_keys(self.cnt_char_feedback, 'char_'))
     
     data = pd.io.json.json_normalize(session.event_data.apply(json.loads))        
     self.update_feedback(data)
     return U.prefix_keys(features, 'fb_')
Пример #2
0
 def extract(self, session, info, meta):
     features = OrderedDict()
     if info.should_include:           
         for dt in CYCLIC_FEATURES:
             for angle in ('sin', 'cos'):
                 key = f'ts_{dt}_{angle}'
                 acc = self.acc
                 features[f'{key}_mean'] = np.mean(acc[key]) if acc[key] else 0
                 features[f'{key}_std'] = np.std(acc[key]) if acc[key] else 0
                 self.acc[key] += session[key].tolist()
     return U.prefix_keys(features, 'cycl_')
Пример #3
0
 def extract(self, session, info, meta):
     features = OrderedDict()
     
     if info.should_include:
         features['var_media'] = sum([0 if not v else 1 for v in self.cnt_media.values()])
         features['var_source'] = sum([0 if not v else 1 for v in self.cnt_source.values()])
         features['var_level'] = sum([0 if not v else 1 for v in self.cnt_level.values()])
         features['var_weight'] = sum([0 if not v else 1 for v in self.cnt_weight.values()])
         features['var_size'] = sum([0 if not v else 1 for v in self.cnt_size.values()])
         features.update(U.prefix_keys(self.cnt_media.copy(), 'media_'))
         features.update(U.prefix_keys(self.cnt_source.copy(), 'source_'))
         features.update(U.prefix_keys(self.cnt_level.copy(), 'level_'))
         features.update(U.prefix_keys(self.cnt_size.copy(), 'size_'))
         features.update(U.prefix_keys(self.cnt_weight.copy(), 'weight_'))
     
     data = pd.io.json.json_normalize(session.event_data.apply(json.loads))
     self.update_round(data)
     self.update_coord(data)
     self.update_media(data)
     self.update_source(data)
     self.update_levels(data)
     self.update_sizes(data)
     self.update_weights(data)
     
     return U.prefix_keys(features, 'event_')
Пример #4
0
    def extract(self, session, info, meta):
        features = OrderedDict()
        
        if info.should_include:
            for name in ('title_event_code', 'title', 'event_code', 'event_id'):
                cnt = getattr(self, f'cnt_{name}')
                nonzeros = np.count_nonzero(list(cnt.values()))
                features[name] = nonzeros

        self.update_counters(self.cnt_title_event_code, session, 'title_event_code')
        self.update_counters(self.cnt_title, session, 'title')
        self.update_counters(self.cnt_event_code, session, 'event_code')
        self.update_counters(self.cnt_event_id, session, 'event_id')
        
        return U.prefix_keys(features, 'var_')
Пример #5
0
    def extract(self, session, info, meta):
        features = OrderedDict()
        
        if info.should_include:
            for attr in ('months', 'dows', 'doms', 'hours'):
                for func in (min, max, np.mean, np.std):
                    arr = getattr(self, attr)
                    features[f'{func.__name__}_{attr}'] = U.guard_false(func, arr)
                    
        self.months.extend(session['ts_Month'].tolist())
        self.dows.extend(session['ts_Dayofweek'].tolist())
        self.doms.extend(session['ts_Day'].tolist())
        self.hours.extend(session['ts_Hour'].tolist())

        return U.prefix_keys(features, 'ts_')
Пример #6
0
 def extract(self, session, info, meta):
     
     def most_freq(cnt): return max(cnt.items(), key=itemgetter(1))[0]
     def least_freq(cnt): return min(cnt.items(), key=itemgetter(1))[0]
     
     features = OrderedDict()
     if info.should_include:
         counters = OrderedDict([
             *self.cnt_title_event_code.items(),
             *self.cnt_title.items(),
             *self.cnt_event_code.items(),
             *self.cnt_event_id.items(),
             *self.cnt_activities.items(),
             *self.cnt_worlds.items(),
             *self.cnt_types.items(),
             *self.cnt_title_worlds.items(),
             *self.cnt_title_types.items(),
             *self.cnt_world_types.items()
         ])
         features.update(counters)
         features['most_freq_title'] = most_freq(self.cnt_title)
         features['least_freq_title'] = least_freq(self.cnt_title)
         features['most_freq_world'] = most_freq(self.cnt_worlds)
         features['least_freq_world'] = least_freq(self.cnt_worlds)
         features['most_freq_type'] = most_freq(self.cnt_types)
         features['least_freq_type'] = least_freq(self.cnt_types)
         features['most_freq_title_world'] = most_freq(self.cnt_title_worlds)
         features['least_freq_title_world'] = least_freq(self.cnt_title_worlds)
         features['most_freq_title_type'] = most_freq(self.cnt_title_types)
         features['least_freq_title_type'] = least_freq(self.cnt_title_types)
         features['most_freq_world_type'] = most_freq(self.cnt_world_types)
         features['least_freq_world_type'] = least_freq(self.cnt_world_types)
         
     self.update_counters(self.cnt_title_event_code, session, 'title_event_code')
     self.update_counters(self.cnt_title, session, 'title')
     self.update_counters(self.cnt_event_code, session, 'event_code')
     self.update_counters(self.cnt_event_id, session, 'event_id')
     self.update_counters(self.cnt_worlds, session, 'world')
     self.update_counters(self.cnt_types, session, 'type')
     self.update_counters(self.cnt_title_worlds, session, 'title_world')
     self.update_counters(self.cnt_title_types, session, 'title_type')
     self.update_counters(self.cnt_world_types, session, 'world_type')
     
     if self.last_activity is None or self.last_activity != info.session_type:
         self.cnt_activities[info.session_type] += 1
         self.last_activity = info.session_type
         
     return U.prefix_keys(features, 'cnt_')
Пример #7
0
    def extract(self, session, info, meta):
        features = OrderedDict()
    
        if info.should_include:
            features['acc_attempts_pos'] = self.acc_correct_attempts
            features['acc_attempts_neg'] = self.acc_incorrect_attempts
            self.acc_correct_attempts += info.outcomes.pos
            self.acc_incorrect_attempts += info.outcomes.neg
            
            features['acc_accuracy'] = U.savediv(self.acc_accuracy, self.n_rows)
            accuracy = U.savediv(info.outcomes.pos, info.outcomes.total)
            self.acc_accuracy += accuracy
            
            features.update(self.last_accuracy_title)
            self.last_accuracy_title[f'acc_{info.session_title}'] = accuracy
            
            features['accuracy_group'] = to_accuracy_group(accuracy)
            self.accuracy_groups[features['accuracy_group']] += 1
            
            features['acc_accuracy_group'] = U.savediv(self.acc_accuracy_group, self.n_rows)
            self.acc_accuracy_group += features['accuracy_group']

            features['acc_actions'] = self.acc_actions
            
            event_count = session['event_count'].iloc[-1]
                
            features['duration_mean'] = np.mean(self.durations) if self.durations else 0
            features['total_duration'] = sum(self.durations)
            self.durations.append(info.duration_seconds)
        
            features['clip_duration_mean'] = U.guard_false(np.mean, self.clip_durations)
            features['clip_total_duration'] = sum(self.clip_durations)
            self.clip_durations.append(duration.CLIPS.get(info.session_title, 0))
            
            features['clip_ratio'] = U.savediv(
                features['clip_total_duration'], features['total_duration'])
            
            self.n_rows += 1
            
        self.acc_actions += len(session)
        
        if info.should_include:
            # hack to make sure that target variable is not included into features
            accuracy_group = features.pop('accuracy_group')
            features = U.prefix_keys(features, 'perf_')
            features['accuracy_group'] = accuracy_group
            
        return features
Пример #8
0
 def extract(self, session, info, meta):
     features = OrderedDict()
     
     if info.should_include:           
         features.update(U.prefix_keys(self.cnt_month, 'month_'))
         features.update(U.prefix_keys(self.cnt_dayofweek, 'dow_'))
         features.update(U.prefix_keys(self.cnt_dayofmonth, 'dom_'))
         features.update(U.prefix_keys(self.cnt_hour, 'hour_'))
         features.update(U.prefix_keys(self.cnt_minute, 'minute_'))
     
     self.update_counters(self.cnt_month, session, 'ts_Month')
     self.update_counters(self.cnt_dayofweek, session, 'ts_Dayofweek')
     self.update_counters(self.cnt_dayofmonth, session, 'ts_Day')
     self.update_counters(self.cnt_hour, session, 'ts_Hour')
     self.update_counters(self.cnt_minute, session, 'ts_Minute')
     
     return U.prefix_keys(features, 'ts_')