def as_vector(self): ''' turn this record to a vector that can be fed into a prediction model ''' # assert self.tag == MetricsTag.ENC, 'metrics un-encoded, unable to vectorize' assert self.tag == 'enc', 'metrics un-encoded, unable to vectorize' conf = LumosConf() inst_id = conf.get_inst_id(self.inst_type) d_info = conf.get_inst_detailed_conf(self.inst_type) n_fam, n_cpu, n_mem = d_info['family'], d_info['cpu'], d_info['memory'] scale_id = conf.get_scale_id(self.scale) X = np.array( [inst_id, n_fam, n_cpu, n_mem, scale_id, self.ts[0], self.ts[1]]) X = np.concatenate((X, self.metrics), axis=0) Y = self.jct return X, Y
def get_train_test_data(self, train_scale='tiny', test_wl='', flag='single'): ''' get the training data that profiled on a concrete instance type param: @t_inst_type: the instance type that is used for profiling @test_wl: the workload that is to be used for testing ''' rankize_data = self.get_data_rankize() assert test_wl in self.__data['1'] or test_wl in ( 'HiBench', 'BigBench'), 'invalid test workload' assert flag in ('single', 'multi'), 'indicating single/multi testing workloads' def is_test_wl(wl): if flag == 'single': return wl == test_wl else: if test_wl == 'BigBench': return 'hive' in wl elif test_wl == 'HiBench': return 'hive' not in wl conf = LumosConf() truncate = conf.get('dataset', 'truncate') fft_stat_encoder = FFTStatEncoder(truncate=truncate) train_data = defaultdict(lambda: defaultdict(lambda: { 'X': [], 'Y': [] })) test_data = defaultdict(lambda: defaultdict(lambda: \ defaultdict(lambda: defaultdict(lambda: { 'X': [], 'Y': [] })))) predict_scales = ['tiny', 'small', 'large', 'huge'] if train_scale == 'small': predict_scales.remove('tiny') for rnd, rnd_data in rankize_data.items(): for wl, wl_data in rnd_data.items(): if is_test_wl(wl): continue for record1 in wl_data[train_scale]: t_inst_type = record1.inst_type test_conf = conf.get_inst_detailed_conf(t_inst_type, format='list') test_metrics_vec = fft_stat_encoder.encode( record1.metrics, record1.raw_metrics, sampling_interval=self.sampling_interval) for scale in predict_scales: target_scale = conf.get_scale_id(scale) for record2 in wl_data[scale]: target_conf = conf.get_inst_detailed_conf( record2.inst_type, format='list') target_rank = record2.rank target_jct = record2.jct X = test_conf.copy() X.extend(target_conf) X.append(target_scale) X.extend(test_metrics_vec) train_data[rnd][t_inst_type]['X'].append(X) if self.ordinal: train_data[rnd][t_inst_type]['Y'].append( target_rank) else: train_data[rnd][t_inst_type]['Y'].append( target_jct) for rnd, rnd_data in rankize_data.items(): for wl, wl_data in rnd_data.items(): if not is_test_wl(wl): continue # wl_data = rnd_data[test_wl] for record1 in wl_data[train_scale]: t_inst_type = record1.inst_type test_conf = conf.get_inst_detailed_conf(t_inst_type, format='list') test_metrics_vec = fft_stat_encoder.encode( record1.metrics, record1.raw_metrics, sampling_interval=self.sampling_interval) for scale in predict_scales: target_scale = conf.get_scale_id(scale) for record2 in wl_data[scale]: target_conf = conf.get_inst_detailed_conf( record2.inst_type, format='list') target_rank = record2.rank target_jct = record2.jct X = test_conf.copy() X.extend(target_conf) X.append(target_scale) X.extend(test_metrics_vec) test_data[wl][rnd][t_inst_type][scale]['X'].append( X) if self.ordinal: test_data[wl][rnd][t_inst_type][scale][ 'Y'].append(target_rank) else: test_data[wl][rnd][t_inst_type][scale][ 'Y'].append(target_jct) return train_data, test_data