Пример #1
0
    def setUp(self):
        fn = os.path.join(os.path.dirname(__file__), 
                          'fixtures/raw_ink.p')
        user_raw_ink = pickle.load(open(fn,"rb"))
        
        # fix random seed
        random.seed(12345)

        all_users = ['user_1', 'user_32', 'user_6', 
                     'user_29', 'user_9', 'user_35']

        max_examples = 10

        user_ink_data = {}
        label_ink_pairs = []
        for userid in all_users:
            raw_ink = user_raw_ink[userid]
            normalized_ink = {}
            for label in ['a','q','u','v']: 
                temp = [np.nan_to_num(normalize_ink(json2array(ink)))
                        for ink in filter_bad_ink(raw_ink[label])]
                if len(temp) > max_examples:
                    sampled_data = random.sample(temp, max_examples)
                else:
                    sampled_data = temp
                normalized_ink[label] = sampled_data
                label_ink_pairs += [(label,ink) for ink in sampled_data]
            user_ink_data[userid] = normalized_ink

        self.user_ink_data = user_ink_data
        self.label_ink_pairs = label_ink_pairs
Пример #2
0
    def setUp(self):
        fn = os.path.join(os.path.dirname(__file__), 
                          'fixtures/clustered_data.p')
        candidate_proto = pickle.load(open(fn,"rb"))
        
        # fix random seed
        random.seed(12345)

        max_examples = 20
        clustered_data = {}
        label_ink_pairs = []
        for label in ['a','u','v']:
            clustered_data[label] = []
            for _,examples in candidate_proto[label]:
                data = [np.nan_to_num(normalize_ink(json2array(ink)))
                        for ink in filter_bad_ink(examples)]

                if len(data) > max_examples:
                    sampled_data = random.sample(data, max_examples)
                else:
                    sampled_data = data

                weights = [1] * len(sampled_data)
                clustered_data[label].append(zip(sampled_data, weights))
                label_ink_pairs += [(label,ink) for ink in sampled_data]

        self.clustered_data = clustered_data
        self.label_ink_pairs = label_ink_pairs
Пример #3
0
 def setUp(self):
     fn = os.path.join(os.path.dirname(__file__), 
                       'fixtures/clustered_data.p')
     candidate_proto = pickle.load(open(fn,"rb"))
     # set test label
     self.label = 'u'
     clustered_data = []
     for _,examples in candidate_proto[self.label]:
         data = [np.nan_to_num(normalize_ink(json2array(ink)))
                 for ink in filter_bad_ink(examples)]
         clustered_data.append(data)
     # set test cluster
     self.ink_data = clustered_data[1]