def __init__(self, number_of_kernels_per_dim, number_of_actions, gamma, learning_rate, hard_theta=False): # Set max value for normalization of inputs self._max_normal = 1 # get state \action information self.data_transformer = DataTransformer() state_mean = [-3.00283763e-01, 5.61618575e-05] state_std = [0.51981243, 0.04024895] self.data_transformer.set(state_mean, state_std) self._actions = number_of_actions # create RBF features: self.feature_extractor = RadialBasisFunctionExtractor( number_of_kernels_per_dim) self.number_of_features = self.feature_extractor.get_number_of_features( ) # the weights of the q learner if hard_theta: self.theta = np.random.uniform(-10, 10, size=number_of_actions * self.number_of_features) else: self.theta = np.random.uniform(-0.001, 0, size=number_of_actions * self.number_of_features) # discount factor for the solver self.gamma = gamma self.learning_rate = learning_rate
def preprocess(train=None, data=None): params = TransformationParameter() params.stride = 8 params.crop_size_x = 368 params.crop_size_y = 368 params.target_dist = 0.6 params.scale_prob = 1 params.scale_min = 0.5 params.scale_max = 1.1 params.max_rotate_degree = 40 params.center_perterb_max = 40 params.do_clahe = False params.num_parts_in_annot = 17 params.num_parts = 56 params.mirror = True dataTransformer = DataTransformer(params) np = 2 * (params.num_parts + 1) stride = params.stride grid_x = params.crop_size_x / stride grid_y = params.crop_size_y / stride channelOffset = grid_y * grid_x vec_channels = 38 heat_channels = 19 ch = vec_channels + heat_channels start_label_data = (params.num_parts + 1) * channelOffset transformed_data = [] # size: params.crop_size_x * params.crop_size_y * 3 transformed_label = [] # size: grid_x * grid_y * np # Transformation print("Transforming...") data_img, mask_img, label = dataTransformer.transform(data) return data_img, mask_img, label
def testTransform(self): data_transformer = DataTransformer( mapping=test_data.transformer_mapping) transformed_data = data_transformer.transform( test_data.data_from_dataset) print(transformed_data) self.assertDictEqual(transformed_data, test_data.transformed_data)
def testAddToDataDict(self): key = 'demographic.age' value = 60 data_type = "string" data_dict = {} DataTransformer.add_to_data_dict(key, value, data_type, data_dict) print(data_dict) self.assertEqual(data_dict, {'demographic': {'age': 60}})
class Solver: def __init__(self, number_of_kernels_per_dim, number_of_actions, gamma, learning_rate): # Set max value for normalization of inputs self._max_normal = 1 # get state \action information self.data_transformer = DataTransformer() state_mean = [-3.00283763e-01, 5.61618575e-05] state_std = [0.51981243, 0.04024895] self.data_transformer.set(state_mean, state_std) self._actions = number_of_actions # create RBF features: self.feature_extractor = RadialBasisFunctionExtractor(number_of_kernels_per_dim) self.number_of_features = self.feature_extractor.get_number_of_features() # the weights of the q learner self.theta = np.random.uniform(-0.001, 0, size=number_of_actions * self.number_of_features) # discount factor for the solver self.gamma = gamma self.learning_rate = learning_rate def _normalize_state(self, s): return self.data_transformer.transform_states(np.array([s]))[0] def get_features(self, state): normalized_state = self._normalize_state(state) features = self.feature_extractor.encode_states_with_radial_basis_functions([normalized_state])[0] return features def get_q_val(self, features, action): theta_ = self.theta[action*self.number_of_features: (1 + action)*self.number_of_features] return np.dot(features, theta_) def get_all_q_vals(self, features): all_vals = np.zeros(self._actions) for a in range(self._actions): all_vals[a] = solver.get_q_val(features, a) return all_vals def get_max_action(self, state): sparse_features = solver.get_features(state) q_vals = solver.get_all_q_vals(sparse_features) return np.argmax(q_vals) def get_state_action_features(self, state, action): state_features = self.get_features(state) all_features = np.zeros(len(state_features) * self._actions) all_features[action * len(state_features): (1 + action) * len(state_features)] = state_features return all_features def update_theta(self, state, action, reward, next_state, done): # compute the new weights and set in self.theta. also return the bellman error (for tracking). assert False, "implement update_theta" return 0.0
def test_transform_from_file(self): data_transformer = DataTransformer.from_mapping_file( "../../resources/mapping/colaus_cineca_mapping_csv") transformed_data = data_transformer.transform(test_data.data_from_dataset) print(transformed_data) self.assertDictEqual(transformed_data, test_data.transformed_data)
def run( self, x, y, labels, figname='', figsize=(15, 5), bands=3, colors=("#8BBCD4", "#2B7ABD", "#0050A0", "#EF9483", "#E02421", "#A90E0A") ): # dark blue, medium blue, light blue, dark red, medium red, light red """ Return the entire graph and its plt object Look at DataTransformer.transform to see how the data is transformed. Keyword arguments: x: single array with x values. Distance between neighboring entries have to be the same y: two-dimansional array with y values for each entry. labels: array with strings, shown as the labels on the y-axis. figsize: (a,b) used when creating the figure (optional) bands: default is 3 colors: array with the colors used for the bands. from dark to light blue, then from dark red to light red. Requirements: len(y[i]) == len(x) for all 0 <= i < len(y) len(y[0]) == len(labels) len(colors) == 2*bands RETURN: plt object """ self.check_valid_params(x, y, labels, figsize, bands, colors) n = len(y) F = self.create_figure(figname, figsize) df = DataTransformer(y, bands) for i in range(n): ax = F.add_subplot(n, 1, i + 1) transformed_x, bands = df.transform(y[i], x) for idx, band in enumerate(bands): ax.fill_between(transformed_x[idx], 0, band, color=colors[idx]) self.adjust_visuals_line(x, df, ax, i, labels) return plt
def preprocessing(train=None): params = TransformationParameter() params.stride = 8 params.crop_size_x = 368 params.crop_size_y = 368 params.target_dist = 0.6 params.scale_prob = 1 params.scale_min = 0.5 params.scale_max = 1.1 params.max_rotate_degree = 40 params.center_perterb_max = 40 params.do_clahe = False params.num_parts_in_annot = 17 params.num_parts = 56 params.mirror = True dataTransformer = DataTransformer(params) # dataTransformer.initRand() np = 2*(params.num_parts+1) stride = params.stride grid_x = params.crop_size_x / stride grid_y = params.crop_size_y / stride channelOffset = grid_y * grid_x vec_channels = 38 heat_channels = 19 ch = vec_channels + heat_channels start_label_data = (params.num_parts+1) * channelOffset transformed_data = [] # size: params.crop_size_x * params.crop_size_y * 3 transformed_label = [] # size: grid_x * grid_y * np # Dataset dataset_dir = os.path.abspath(os.path.join(os.path.dirname( __file__ ), '..', 'dataset')) if train: anno_path = os.path.join(dataset_dir, "annotations/person_keypoints_train2017.json") else: anno_path = os.path.join(dataset_dir, "annotations/person_keypoints_val2017.json") # Transformation data_img,mask_img,label = dataTransformer.transform(filename,anno_path) return data_img, mask_img,label
def main(): # Send request # See prediction_service.proto for gRPC request/response details. X = ["@kg.MutualFund 基金@初始规模 是怎样"] # expected output: y = 1 y = ["class_id_1"] data_transformer = DataTransformer(train_data_file) X_encoded, y_encoded = data_transformer.fit(X, y) # REST url = sys.argv[1] data = { 'signature_name': 'textclassified', 'instances': [{ 'inputX': X_encoded[0].tolist() }] } data = json.dumps(data) r = requests.post(url, data=data) print('test:{}'.format(X[0])) print(r.text) print('y_true:{}'.format(y_encoded))
def main(): # CoLaus DataPipeline() \ .with_consumer(CsvDataConsumer(data_dir + "CoLaus_sample_100linesShuffled.csv", "\t")) \ .with_processor(DataTransformer.from_mapping_file(mapping_dir + "colaus_cineca_mapping_questionnaire.csv")) \ .with_processor(FieldValueTransformer.from_mapping_file(mapping_dir + "colaus_data_label_mapping.xlsx")) \ .with_producer(JsonProducer(data_dir + "colaus_cineca.json")) \ .run() # H3Africa DataPipeline() \ .with_consumer(CsvDataConsumer(data_dir + "h3africa_dummy_datasets_for_cineca_demo.csv", ";")) \ .with_processor(DataTransformer.from_mapping_file(mapping_dir + "h3africa_cineca_mapping_questionnaire.csv")) \ .with_producer(JsonProducer(data_dir + "h3africa_cineca.json")) \ .run() # CHILD DataPipeline() \ .with_consumer(CsvDataConsumer(data_dir + "child_demo_data.csv", ",")) \ .with_processor(FieldValueTransformerPre.from_mapping_file("../resources/mapping/child_initial_data_label_mapping.xlsx")) \ .with_processor(DataTransformer.from_mapping_file(mapping_dir + "child_cineca_mapping_questionnaire.csv")) \ .with_producer(JsonProducer(data_dir + "child_cineca.json")) \ .run()
def run(spark, student_file, teacher_file, out_path='report.json'): """ Main driver function of data processor application """ io_handler = IOHandler(spark) try: student_df = io_handler.spark_read_file(student_file, delim='_') logger.info("Successfully loaded student file from %s", student_file) teacher_df = io_handler.spark_read_file(teacher_file) logger.info("Successfully loaded teacher file from %s", teacher_file) except FileNotFoundError as error_message: logger.error(error_message) return joined_df = join_dfs(student_df, teacher_df, 'cid') logger.info("Finished joining dataframes") transformer = DataTransformer(spark) output_df = transformer.fit_output_schema(joined_df) logger.info("Fit data to output schema:") output_df.show() io_handler.write_report(output_df, 'json', out_path) logger.info("Processing completed")
def run(self, x, y, labels, figsize=(20,3), bands=3, colors=("#8BBCD4","#2B7ABD","#0050A0","#EF9483","#E02421", "#A90E0A")): # dark blue, medium blue, light blue, dark red, medium red, light red """ Return the entire graph and its plt object Look at DataTransformer.transform to see how the data is transformed. Keyword arguments: x: single array with x values. Distance between neighboring entries have to be the same y: two-dimansional array with y values for each entry. labels: array with strings, shown as the labels on the y-axis. figsize: (a,b) used when creating the figure (optional) bands: default is 3 colors: array with the colors used for the bands. from dark to light blue, then from dark red to light red. Requirements: len(y[i]) == len(x) for all 0 <= i < len(y) len(y[0]) == len(labels) len(colors) == 2*bands RETURN: plt object """ self.check_valid_params(x,y,labels,figsize,bands,colors) n = len(y) F = self.create_figure(figsize) df = DataTransformer(y, bands) for i in range(n): ax = F.add_subplot(n, 1, i+1) transformed_x, bands = df.transform(y[i], x) for idx,band in enumerate(bands): ax.fill_between(transformed_x[idx],0,band,color=colors[idx]) self.adjust_visuals_line(x, df, ax, i, labels) return plt
def training_the_model(samples_to_collect=100000, seed=100): number_of_kernels_per_dim = [10, 8] gamma = 0.999 w_updates = 20 evaluation_number_of_games = 50 evaluation_max_steps_per_game = 300 np.random.seed(seed) env = MountainCarWithResetEnv() # collect data states, actions, rewards, next_states, done_flags = DataCollector( env).collect_data(samples_to_collect) # get data success rate data_success_rate = np.sum(rewards) / len(rewards) print(f'Data Success Rate {data_success_rate}') # standardize data data_transformer = DataTransformer() data_transformer.set_using_states( np.concatenate((states, next_states), axis=0)) states = data_transformer.transform_states(states) next_states = data_transformer.transform_states(next_states) # process with radial basis functions feature_extractor = RadialBasisFunctionExtractor(number_of_kernels_per_dim) # encode all states: encoded_states = feature_extractor.encode_states_with_radial_basis_functions( states) encoded_next_states = feature_extractor.encode_states_with_radial_basis_functions( next_states) # set a new linear policy linear_policy = LinearPolicy(feature_extractor.get_number_of_features(), 3, True) # but set the weights as random linear_policy.set_w(np.random.uniform(size=linear_policy.w.shape)) # start an object that evaluates the success rate over time evaluator = GamePlayer(env, data_transformer, feature_extractor, linear_policy) success_rate_vs_iteration = list() for lspi_iteration in range(w_updates): print(f'Starting LSPI iteration {lspi_iteration}') new_w = compute_lspi_iteration(encoded_states, encoded_next_states, actions, rewards, done_flags, linear_policy, gamma) norm_diff = linear_policy.set_w(new_w) success_rate = evaluator.play_games(evaluation_number_of_games, evaluation_max_steps_per_game) success_rate_vs_iteration.append(success_rate) if norm_diff < 0.00001: break print('LSPI Done') return success_rate_vs_iteration
def get_predictions(net, true_intent, intent_map, slots_map, context, batch_size): """Get predictions for every item in the intent. It returns a list where index is same as in validation item. Each record is of following format: Tuple(Predicted_Intent, List[(List[bits of text], slot)]""" result = [] idx_to_slot = {v: k for k, v in slots_map.items()} idx_to_intent = {v: k for k, v in intent_map.items()} intent_dev_dataset = NLUBenchmarkDataset(SacreMosesTokenizer(), 'val', intent_map, slots_map, intent_to_load=true_intent) transformer = DataTransformer(ELMoCharVocab()) transformed_dev_dataset = intent_dev_dataset.transform(transformer, lazy=False) batchify_fn = Tuple(Pad(), Stack(), Pad(), Stack()) dev_dataloader = DataLoader(transformed_dev_dataset, batch_size=batch_size, num_workers=multiprocessing.cpu_count() - 3, batchify_fn=batchify_fn) for i, (data, valid_lengths, entities, intent) in enumerate(dev_dataloader): items_per_iteration = data.shape[0] length = data.shape[1] data = data.as_in_context(context) hidden_state = net.elmo_container[0].begin_state(mx.nd.zeros, batch_size=items_per_iteration, ctx=context) mask = get_data_mask(length, valid_lengths, items_per_iteration, context) intents, slots = net(data, hidden_state, mask) score, slots_seq = net.crf(slots.transpose(axes=(1, 0, 2))) intents_prediction = intents.argmax(axis=1).asnumpy() slots_prediction = slots_seq.asnumpy() for rec_id, pred_intent in enumerate(intents_prediction): text = intent_dev_dataset[rec_id][0] tokens = intent_dev_dataset[rec_id][1] slot_prediction = slots_prediction[rec_id] prediction_item = get_prediction_item(idx_to_slot, slot_prediction, tokens) result.append((idx_to_intent[pred_intent], prediction_item, text, tokens)) return result
def test_output_files_exist(self): warnings.simplefilter("ignore", ResourceWarning) d = DataTransformer() l1 = 0 l2 = 0 try: self.s3 = boto3.resource('s3') except BotoCoreError as e: if isinstance(e, NoCredentialsError): print("Invalid credentials") else: print("Error message -" + str(e)) sys.exit() self.bucketDest = self.s3.Bucket(d.OUTPUT_BUCKET_NAME) exists = True try: self.s3.meta.client.head_bucket(Bucket=d.OUTPUT_BUCKET_NAME) except ClientError as e: error_code = int(e.response['Error']['Code']) print(e) self.assertTrue(self, 1 == 0) if error_code == 404: exists = False if exists: self.list = self.s3.meta.client.list_objects( Bucket=d.INPUT_BUCKET_NAME)['Contents'] for s3_key in self.list: s3_object = s3_key['Key'] if not s3_object.endswith("/"): l1 += 1 self.list2 = self.s3.meta.client.list_objects( Bucket=d.OUTPUT_BUCKET_NAME)['Contents'] for s3_key in self.list2: s3_object = s3_key['Key'] if not s3_object.endswith("/"): l2 += 1 self.assertTrue(self, l1 == l2)
help='path to save the final model') parser.add_argument( '--gpu', type=int, default=0, help='id of the gpu to use. Set it to empty means to use cpu.') arg = parser.parse_args() return arg if __name__ == '__main__': args = parse_args() context = mx.cpu(0) if args.gpu is None else mx.gpu(args.gpu) segments = ['train', 'dev'] transformer = DataTransformer(segments) dataloaders = [ transform_segment(transformer, segment, args) for segment in segments ] model = get_model(transformer._word_vocab, transformer._char_vocab, args) trainer = gluon.Trainer(model.collect_params(), 'ftml', {'learning_rate': args.lr}) best_model_name = run_training(model, trainer, dataloaders[0], dataloaders[1], args) model.load_parameters(best_model_name, ctx=context) avg_L, acc, em, f1, predictions = run_evaluate(model, dataloaders[1], args,
w_updates = 100 evaluation_number_of_games = 10 evaluation_max_steps_per_game = 1000 np.random.seed(123) # np.random.seed(234) env = MountainCarWithResetEnv() # collect data states, actions, rewards, next_states, done_flags = DataCollector( env).collect_data(samples_to_collect) # get data success rate data_success_rate = np.sum(rewards) / len(rewards) print(f'success rate {data_success_rate}') # standardize data data_transformer = DataTransformer() data_transformer.set_using_states( np.concatenate((states, next_states), axis=0)) states = data_transformer.transform_states(states) next_states = data_transformer.transform_states(next_states) # process with radial basis functions feature_extractor = RadialBasisFunctionExtractor(number_of_kernels_per_dim) # encode all states: encoded_states = feature_extractor.encode_states_with_radial_basis_functions( states) encoded_next_states = feature_extractor.encode_states_with_radial_basis_functions( next_states) # set a new linear policy linear_policy = LinearPolicy(feature_extractor.get_number_of_features(), 3, True) # but set the weights as random
tf.compat.v1.flags.DEFINE_integer("evaluate_every", 1, "Evaluate model on dev set after this many steps ") tf.compat.v1.flags.DEFINE_integer("checkpoint_every", 10, "Save model after this many steps ") tf.compat.v1.flags.DEFINE_integer("num_checkpoints", 5, "Number of checkpoints to store ") tf.compat.v1.flags.DEFINE_float("regulation_rate", 5e-4, "Number of checkpoints to store ") FLAGS = tf.compat.v1.flags.FLAGS export_path_base = FLAGS.export_path_base export_path = os.path.join( compat.as_bytes(export_path_base), compat.as_bytes(str(FLAGS.model_version))) assert not os.path.exists(export_path), \ 'Export directory already exists. Please specify a different export directory:{}'.format(export_path) data_transformer = DataTransformer(FLAGS.train_data_file) x_train, y_train = data_transformer.fit_with_file(FLAGS.train_data_file, FLAGS.num_class) num_labels = FLAGS.num_class print(x_train.shape, y_train.shape) x_dev, y_dev = data_transformer.fit_with_file(FLAGS.dev_data_file, FLAGS.num_class) with tf.Graph().as_default(): sess = tf.compat.v1.Session() with sess.as_default(): svm = SVM(sequence_length=x_train.shape[1], num_classes=FLAGS.num_class, l2_reg_lambda=FLAGS.regulation_rate) # Define Training procedure global_step = tf.Variable(0, name="global_step", trainable=False) optimizer = tf.compat.v1.train.AdamOptimizer(0.1)
def setUp(self): self.d = DataTransformer([[9,-9,0]], 3) self.common_x_ret = [[1,2,3],[1,2,3],[1,2,3],[1,2,3],[1,2,3],[1,2,3]]
class DataTransformerTest(unittest.TestCase): def setUp(self): self.d = DataTransformer([[9,-9,0]], 3) self.common_x_ret = [[1,2,3],[1,2,3],[1,2,3],[1,2,3],[1,2,3],[1,2,3]] ## mixed positive and negative values def test_top_range_mixed_negative_positives(self): self.run_me([4,1,-9],[[1,2,2.5, 3],[1,2,2.5,3],[1,2,2.5,3],[1,2,2.5,3],[1,2,2.5,3],[1,2,2.5,3]],[ [3,1,0,0],[1,0,0,0],[0,0,0,0],[0,0,0,3],[0,0,0,3],[0,0,0,3]]) def test_positive_zero_negative_positive(self): self.run_me([4,0,-9],self.common_x_ret,[[3,0,0],[1,0,0],[0,0,0],[0,0,3],[0,0,3],[0,0,3]]) def test_mixed_zero_at_end(self): self.run_me([4,-1,0],[[1,1.5,2,3],[1,1.5,2,3],[1,1.5,2,3],[1,1.5,2,3],[1,1.5,2,3],[1,1.5,2,3]],[[3,0,0,0],[1,0,0,0],[0,0,0,0],[0,0,1,0],[0,0,0,0],[0,0,0,0]]) def test_positive_negative_positive(self): self.run_me([4,1,-9],[[1,2,2.5, 3],[1,2,2.5,3],[1,2,2.5,3],[1,2,2.5,3],[1,2,2.5,3],[1,2,2.5,3]],[ [3,1,0,0],[1,0,0,0],[0,0,0,0],[0,0,0,3],[0,0,0,3],[0,0,0,3]]) def test_positive_zero_negative(self): self.run_me([4,1,-9,3],[[1,2,2.5,3,3.5,4],[1,2,2.5,3,3.5,4],[1,2,2.5,3,3.5,4],[1,2,2.5,3,3.5,4],[1,2,2.5,3,3.5,4],[1,2,2.5,3,3.5,4]],[ [3,1,0,0,0,3],[1,0,0,0,0,0],[0,0,0,0,0,0],[0,0,0,3,0,0],[0,0,0,3,0,0],[0,0,0,3,0,0]], x_data=[1,2,3,4]) ## only positive OR negatives values def test_positive_bottom_edge(self): self.run_me([4.5,1,3],self.common_x_ret, [ [3,1,3],[1.5,0,0],[0,0,0],[0,0,0],[0,0,0],[0,0,0]]) def test_zero_at_beginning(self): self.run_me([0,3,0],self.common_x_ret,[[0,3,0],[0,0,0],[0,0,0],[0,0,0],[0,0,0],[0,0,0]]) def test_zero_at_end(self): self.run_me([4,3,0],self.common_x_ret,[[3,3,0],[1,0,0],[0,0,0],[0,0,0],[0,0,0],[0,0,0]]) def test_positive_zero_positive(self): self.run_me([4,0,1],self.common_x_ret,[[3,0,1],[1,0,0],[0,0,0],[0,0,0],[0,0,0],[0,0,0]]) def test_top_range_no_negative(self): self.run_me([4,1,9],self.common_x_ret,[ [3,1,3],[1,0,3],[0,0,3],[0,0,0],[0,0,0],[0,0,0]]) def test_edgecases_medium(self): self.assertTrue(self.d.transform([2,5.99,3.81],[1,2,3]) == (self.common_x_ret,[ [2,3,3],[0,2.99,0.81],[0,0,0],[0,0,0],[0,0,0],[0,0,0]])) def test_choice(self): self.assertTrue(self.d.transform([4,1,8],[1,2,3]) == (self.common_x_ret,[ [3,1,3],[1,0,3],[0,0,2],[0,0,0],[0,0,0],[0,0,0]])) def test_edge_top_negative_value(self): self.assertTrue(self.d.transform([0,-7.5,-9],[1,2,3]) == (self.common_x_ret,[ [0,0,0],[0,0,0],[0,0,0],[0,3,3],[0,3,3],[0,1.5,3]])) def test_edge_middle_negative_value(self): self.assertTrue(self.d.transform([-3.5,-6,-4],[1,2,3]) == (self.common_x_ret,[ [0,0,0],[0,0,0],[0,0,0],[3,3,3],[0.5,3,1],[0,0,0]])) def test_edge_bottom_negative_value(self): self.run_me([-0.5,-3,-8],self.common_x_ret,[[0,0,0],[0,0,0],[0,0,0], [0.5, 3, 3], [0,0,3],[0,0,2]]) def test_regular_bottom_negative_value(self): self.run_me([0,-2,-1],self.common_x_ret,[[0,0,0],[0,0,0], [0,0,0],[0,2,1],[0,0,0],[0,0,0]]) def run_me(self, data, x,y, x_data=[1,2,3]): a,b = self.d.transform(data,x_data) print a print b self.assertTrue(a == x) self.assertTrue(b == y)
return best_model_path if __name__ == '__main__': args = parse_args() context = mx.cpu(0) if args.gpu is None else mx.gpu(args.gpu) train_dataset = NLUBenchmarkDataset(SacreMosesTokenizer(), 'train_full') print(train_dataset.get_intent_map()) print(train_dataset.get_slots_map()) dev_dataset = NLUBenchmarkDataset(SacreMosesTokenizer(), 'val', train_dataset.get_intent_map(), train_dataset.get_slots_map()) transformer = DataTransformer(ELMoCharVocab()) transformed_train_dataset = train_dataset.transform(transformer, lazy=False) transformed_dev_dataset = dev_dataset.transform(transformer, lazy=False) batchify_fn = Tuple(Pad(), Stack(), Pad(), Stack()) train_dataloader = DataLoader(transformed_train_dataset, batch_size=args.batch_size, shuffle=True, num_workers=multiprocessing.cpu_count() - 3, batchify_fn=batchify_fn) dev_dataloader = DataLoader(transformed_dev_dataset, batch_size=args.batch_size, shuffle=True, num_workers=multiprocessing.cpu_count() - 3,
def make_datatransformer(self,dt_args): self.X_dt = DataTransformer(**dt_args) self.y_dt = DataTransformer(**dt_args)
from data_transformer import DataTransformer from movie_tables import Movie, Country, Genre, movies_genres_association from json import loads from flask import Flask, jsonify, request from base import session from auth_provider import token_auth, basic_auth, generate_auth_token app = Flask('MoviesREST') db = DataTransformer() @app.route('/token') @basic_auth.login_required def get_auth_token(): token = generate_auth_token() return jsonify({'token': token.decode('ascii')}) @app.route('/movies', methods=['GET']) @token_auth.login_required def get_movie(): result = session.query(Movie.id, Movie.title, Movie.year, Country.name, Genre.name) \ .join(Country, isouter=True) \ .join(movies_genres_association, isouter=True) \ .join(Genre, isouter=True) \ .all() return db.transform_dataset_into_json(result) @app.route('/movies/<id>', methods=['GET'])
precision = 1.0 * correct / extract recall = 1.0 * correct / standard f1 = 2 * precision * recall / (precision + recall) return precision, recall, f1, correct, extract, standard if __name__ == '__main__': args = parse_args() context = mx.cpu(0) if args.gpu is None else mx.gpu(args.gpu) train_dataset = INSPECDataset('train') dev_dataset = INSPECDataset('dev') test_dataset = INSPECDataset('test') vocab = get_vocab([train_dataset, dev_dataset]) transformer = DataTransformer(vocab, args.seq_len) train_dataloader = DataLoader(train_dataset.transform(transformer), batch_size=args.batch_size, shuffle=True, num_workers=multiprocessing.cpu_count() - 3) dev_dataloader = DataLoader(dev_dataset.transform(transformer), batch_size=args.batch_size, shuffle=True, num_workers=multiprocessing.cpu_count() - 3) test_dataloader = DataLoader(test_dataset.transform(transformer), batch_size=args.batch_size, shuffle=True, num_workers=multiprocessing.cpu_count() - 3) model = get_model(len(vocab), args.embedding_dim, args.hidden,
def run( self, x, y, labels, figsize=(15, 20), bands=3, colors=("#8BBCD4", "#2B7ABD", "#0050A0", "#EF9483", "#E02421", "#A90E0A") ): # dark blue, medium blue, light blue, dark red, medium red, light red """ Return the entire graph and its plt object Look at DataTransformer.transform to see how the data is transformed. Keyword arguments: x: single array with x values. Distance between neighboring entries have to be the same y: two-dimensional array with y values for each entry. labels: array with strings, shown as the labels on the y-axis. figsize: (a,b) used when creating the figure (optional) bands: default is 3 colors: array with the colors used for the bands. from dark to light blue, then from dark red to light red. Requirements: len(y[i]) == len(x) for all 0 <= i < len(y) len(y[0]) == len(labels) len(colors) == 2*bands RETURN: plt object """ self.check_valid_params(x, y, labels, figsize, bands, colors) n = len(y[0, :]) F, axes = plt.subplots(n, 1, figsize=figsize, sharex=True, sharey=True) df = DataTransformer(y, bands) for i, ax in enumerate(axes.flatten()): transformed_x, ybands = df.transform(y[:, i], x) for idx, band in enumerate(ybands): ax.fill_between(transformed_x[idx], 0, band, color=colors[idx]) self.adjust_visuals_line(x, df, ax, i, n, labels) F.text(0.5, 0.04, 'Time', ha='center', size=30) F.text(0.04, 0.5, 'Error to observation ratio', va='center', rotation='vertical', size=30) handles = [] legend_colors = [ "#A90E0A", "#E02421", "#EF9483", "#8BBCD4", "#2B7ABD", "#0050A0" ] for c in legend_colors: handles.append(self.patch_creator(c)) bandwidths = int(df.max) / bands lowerbounds = np.arange(int(df.min), int(df.max), bandwidths) labels = [ str(int(b)) + ' - ' + str(int(b + bandwidths)) for b in lowerbounds ] F.legend(handles, labels, ncol=bands * 2, loc='upper center', fontsize='xx-large') return plt
def run_lspi(seed, w_updates=20, samples_to_collect=100000, evaluation_number_of_games=1, evaluation_max_steps_per_game=200, thresh=0.00001, only_final=False): """ This is the main lspi function :param seed: random seed for the run :param w_updates: how many w updates to do :param samples_to_collect: how many samples to collect :param evaluation_number_of_games: how many game evaluations to do :param evaluation_max_steps_per_game: how many steps to allow the evaluation game to run :param thresh: the threshold for the stopping condition :param only_final: run evaluation only at the end of the run :return: None """ res_dir = './Results/' np.random.seed(seed) number_of_kernels_per_dim = [12, 10] gamma = 0.999 env = MountainCarWithResetEnv() # collect data states, actions, rewards, next_states, done_flags = DataCollector( env).collect_data(samples_to_collect) # get data success rate data_success_rate = np.sum(rewards) / len(rewards) print('success rate: {}'.format(data_success_rate)) # standardize data data_transformer = DataTransformer() data_transformer.set_using_states( np.concatenate((states, next_states), axis=0)) states = data_transformer.transform_states(states) next_states = data_transformer.transform_states(next_states) # process with radial basis functions feature_extractor = RadialBasisFunctionExtractor(number_of_kernels_per_dim) # encode all states: encoded_states = feature_extractor.encode_states_with_radial_basis_functions( states) encoded_next_states = feature_extractor.encode_states_with_radial_basis_functions( next_states) # set a new linear policy linear_policy = LinearPolicy(feature_extractor.get_number_of_features(), 3, True) # but set the weights as random linear_policy.set_w(np.random.uniform(size=linear_policy.w.shape)) # start an object that evaluates the success rate over time evaluator = GamePlayer(env, data_transformer, feature_extractor, linear_policy) # success_rate = evaluator.play_games(evaluation_number_of_games, evaluation_max_steps_per_game) # print("Initial success rate: {}".format(success_rate)) performances = [] if not only_final: performances.append( evaluator.play_games(evaluation_number_of_games, evaluation_max_steps_per_game)) read = False if read: with open(res_dir + 'weight.pickle', 'rb') as handle: new_w = pickle.load(handle) linear_policy.set_w(np.expand_dims(new_w, 1)) for lspi_iteration in range(w_updates): print('starting lspi iteration {}'.format(lspi_iteration)) new_w = compute_lspi_iteration(encoded_states, encoded_next_states, actions, rewards, done_flags, linear_policy, gamma) with open(res_dir + 'weight.pickle', 'wb') as handle: pickle.dump(new_w, handle, protocol=pickle.HIGHEST_PROTOCOL) norm_diff = linear_policy.set_w(new_w) if not only_final: performances.append( evaluator.play_games(evaluation_number_of_games, evaluation_max_steps_per_game)) if norm_diff < thresh: break print('done lspi') if not only_final: with open(res_dir + 'perf' + str(seed) + '.pickle', 'wb') as handle: pickle.dump(performances, handle, protocol=pickle.HIGHEST_PROTOCOL) if only_final: score = evaluator.play_games(evaluation_number_of_games, evaluation_max_steps_per_game) with open(res_dir + 'final_perf' + str(samples_to_collect) + '.pickle', 'wb') as handle: pickle.dump(score, handle, protocol=pickle.HIGHEST_PROTOCOL) evaluator.play_game(evaluation_max_steps_per_game, render=True)