Пример #1
0
 def __init__(self,
              number_of_kernels_per_dim,
              number_of_actions,
              gamma,
              learning_rate,
              hard_theta=False):
     # Set max value for normalization of inputs
     self._max_normal = 1
     # get state \action information
     self.data_transformer = DataTransformer()
     state_mean = [-3.00283763e-01, 5.61618575e-05]
     state_std = [0.51981243, 0.04024895]
     self.data_transformer.set(state_mean, state_std)
     self._actions = number_of_actions
     # create RBF features:
     self.feature_extractor = RadialBasisFunctionExtractor(
         number_of_kernels_per_dim)
     self.number_of_features = self.feature_extractor.get_number_of_features(
     )
     # the weights of the q learner
     if hard_theta:
         self.theta = np.random.uniform(-10,
                                        10,
                                        size=number_of_actions *
                                        self.number_of_features)
     else:
         self.theta = np.random.uniform(-0.001,
                                        0,
                                        size=number_of_actions *
                                        self.number_of_features)
     # discount factor for the solver
     self.gamma = gamma
     self.learning_rate = learning_rate
Пример #2
0
def preprocess(train=None, data=None):
    params = TransformationParameter()
    params.stride = 8
    params.crop_size_x = 368
    params.crop_size_y = 368
    params.target_dist = 0.6
    params.scale_prob = 1
    params.scale_min = 0.5
    params.scale_max = 1.1
    params.max_rotate_degree = 40
    params.center_perterb_max = 40
    params.do_clahe = False
    params.num_parts_in_annot = 17
    params.num_parts = 56
    params.mirror = True

    dataTransformer = DataTransformer(params)
    np = 2 * (params.num_parts + 1)
    stride = params.stride
    grid_x = params.crop_size_x / stride
    grid_y = params.crop_size_y / stride
    channelOffset = grid_y * grid_x
    vec_channels = 38
    heat_channels = 19
    ch = vec_channels + heat_channels
    start_label_data = (params.num_parts + 1) * channelOffset

    transformed_data = []  # size: params.crop_size_x * params.crop_size_y * 3
    transformed_label = []  # size: grid_x * grid_y * np

    # Transformation
    print("Transforming...")
    data_img, mask_img, label = dataTransformer.transform(data)

    return data_img, mask_img, label
Пример #3
0
    def testTransform(self):
        data_transformer = DataTransformer(
            mapping=test_data.transformer_mapping)
        transformed_data = data_transformer.transform(
            test_data.data_from_dataset)

        print(transformed_data)
        self.assertDictEqual(transformed_data, test_data.transformed_data)
Пример #4
0
    def testAddToDataDict(self):
        key = 'demographic.age'
        value = 60
        data_type = "string"
        data_dict = {}
        DataTransformer.add_to_data_dict(key, value, data_type, data_dict)

        print(data_dict)
        self.assertEqual(data_dict, {'demographic': {'age': 60}})
Пример #5
0
class Solver:
    def __init__(self, number_of_kernels_per_dim, number_of_actions, gamma, learning_rate):
        # Set max value for normalization of inputs
        self._max_normal = 1
        # get state \action information
        self.data_transformer = DataTransformer()
        state_mean = [-3.00283763e-01,  5.61618575e-05]
        state_std = [0.51981243, 0.04024895]
        self.data_transformer.set(state_mean, state_std)
        self._actions = number_of_actions
        # create RBF features:
        self.feature_extractor = RadialBasisFunctionExtractor(number_of_kernels_per_dim)
        self.number_of_features = self.feature_extractor.get_number_of_features()
        # the weights of the q learner
        self.theta = np.random.uniform(-0.001, 0, size=number_of_actions * self.number_of_features)
        # discount factor for the solver
        self.gamma = gamma
        self.learning_rate = learning_rate

    def _normalize_state(self, s):
        return self.data_transformer.transform_states(np.array([s]))[0]

    def get_features(self, state):
        normalized_state = self._normalize_state(state)
        features = self.feature_extractor.encode_states_with_radial_basis_functions([normalized_state])[0]
        return features

    def get_q_val(self, features, action):
        theta_ = self.theta[action*self.number_of_features: (1 + action)*self.number_of_features]
        return np.dot(features, theta_)

    def get_all_q_vals(self, features):
        all_vals = np.zeros(self._actions)
        for a in range(self._actions):
            all_vals[a] = solver.get_q_val(features, a)
        return all_vals

    def get_max_action(self, state):
        sparse_features = solver.get_features(state)
        q_vals = solver.get_all_q_vals(sparse_features)
        return np.argmax(q_vals)

    def get_state_action_features(self, state, action):
        state_features = self.get_features(state)
        all_features = np.zeros(len(state_features) * self._actions)
        all_features[action * len(state_features): (1 + action) * len(state_features)] = state_features
        return all_features

    def update_theta(self, state, action, reward, next_state, done):
        # compute the new weights and set in self.theta. also return the bellman error (for tracking).
        assert False, "implement update_theta"
        return 0.0
Пример #6
0
def test_transform_from_file(self):
    data_transformer = DataTransformer.from_mapping_file(
        "../../resources/mapping/colaus_cineca_mapping_csv")
    transformed_data = data_transformer.transform(test_data.data_from_dataset)

    print(transformed_data)
    self.assertDictEqual(transformed_data, test_data.transformed_data)
Пример #7
0
    def run(
        self,
        x,
        y,
        labels,
        figname='',
        figsize=(15, 5),
        bands=3,
        colors=("#8BBCD4", "#2B7ABD", "#0050A0", "#EF9483", "#E02421",
                "#A90E0A")
    ):  # dark blue, medium blue, light blue, dark red, medium red, light red
        """ Return the entire graph and its plt object

        Look at DataTransformer.transform to see how the data is transformed.

        Keyword arguments:
        x: single array with x values. Distance between neighboring entries have to be the same
        y: two-dimansional array with y values for each entry. 
        labels: array with strings, shown as the labels on the y-axis.
        figsize: (a,b) used when creating the figure (optional)
        bands: default is 3
        colors: array with the colors used for the bands. from dark to light blue, then from dark red to light red.

        Requirements:
        len(y[i]) == len(x) for all 0 <= i < len(y)
        len(y[0]) == len(labels)
        len(colors) == 2*bands

        RETURN: plt object
    """

        self.check_valid_params(x, y, labels, figsize, bands, colors)
        n = len(y)

        F = self.create_figure(figname, figsize)
        df = DataTransformer(y, bands)

        for i in range(n):
            ax = F.add_subplot(n, 1, i + 1)
            transformed_x, bands = df.transform(y[i], x)

            for idx, band in enumerate(bands):
                ax.fill_between(transformed_x[idx], 0, band, color=colors[idx])

            self.adjust_visuals_line(x, df, ax, i, labels)

        return plt
def preprocessing(train=None):
    params = TransformationParameter()
    params.stride = 8
    params.crop_size_x = 368
    params.crop_size_y = 368
    params.target_dist = 0.6
    params.scale_prob = 1
    params.scale_min = 0.5
    params.scale_max = 1.1
    params.max_rotate_degree = 40
    params.center_perterb_max = 40
    params.do_clahe = False
    params.num_parts_in_annot = 17
    params.num_parts = 56
    params.mirror = True

    dataTransformer = DataTransformer(params)
    # dataTransformer.initRand()
    np = 2*(params.num_parts+1)
    stride = params.stride
    grid_x = params.crop_size_x / stride
    grid_y = params.crop_size_y / stride
    channelOffset = grid_y * grid_x
    vec_channels = 38
    heat_channels = 19
    ch = vec_channels + heat_channels
    start_label_data = (params.num_parts+1) * channelOffset

    transformed_data = [] # size: params.crop_size_x * params.crop_size_y * 3
    transformed_label = [] # size: grid_x * grid_y * np
    
    # Dataset 
    dataset_dir = os.path.abspath(os.path.join(os.path.dirname( __file__ ), '..', 'dataset'))
    if train:
        anno_path = os.path.join(dataset_dir, "annotations/person_keypoints_train2017.json")
    else:
        anno_path = os.path.join(dataset_dir, "annotations/person_keypoints_val2017.json")

    # Transformation
    data_img,mask_img,label = dataTransformer.transform(filename,anno_path)

    return data_img, mask_img,label
Пример #9
0
def main():
    # Send request
    # See prediction_service.proto for gRPC request/response details.
    X = ["@kg.MutualFund 基金@初始规模 是怎样"]  # expected output: y = 1
    y = ["class_id_1"]
    data_transformer = DataTransformer(train_data_file)
    X_encoded, y_encoded = data_transformer.fit(X, y)

    # REST
    url = sys.argv[1]
    data = {
        'signature_name': 'textclassified',
        'instances': [{
            'inputX': X_encoded[0].tolist()
        }]
    }
    data = json.dumps(data)
    r = requests.post(url, data=data)
    print('test:{}'.format(X[0]))
    print(r.text)
    print('y_true:{}'.format(y_encoded))
Пример #10
0
def main():
    # CoLaus
    DataPipeline() \
        .with_consumer(CsvDataConsumer(data_dir + "CoLaus_sample_100linesShuffled.csv", "\t")) \
        .with_processor(DataTransformer.from_mapping_file(mapping_dir + "colaus_cineca_mapping_questionnaire.csv")) \
        .with_processor(FieldValueTransformer.from_mapping_file(mapping_dir + "colaus_data_label_mapping.xlsx")) \
        .with_producer(JsonProducer(data_dir + "colaus_cineca.json")) \
        .run()

    # H3Africa
    DataPipeline() \
        .with_consumer(CsvDataConsumer(data_dir + "h3africa_dummy_datasets_for_cineca_demo.csv", ";")) \
        .with_processor(DataTransformer.from_mapping_file(mapping_dir + "h3africa_cineca_mapping_questionnaire.csv")) \
        .with_producer(JsonProducer(data_dir + "h3africa_cineca.json")) \
        .run()

    # CHILD
    DataPipeline() \
        .with_consumer(CsvDataConsumer(data_dir + "child_demo_data.csv", ",")) \
        .with_processor(FieldValueTransformerPre.from_mapping_file("../resources/mapping/child_initial_data_label_mapping.xlsx")) \
        .with_processor(DataTransformer.from_mapping_file(mapping_dir + "child_cineca_mapping_questionnaire.csv")) \
        .with_producer(JsonProducer(data_dir + "child_cineca.json")) \
        .run()
Пример #11
0
def run(spark, student_file, teacher_file, out_path='report.json'):
    """ Main driver function of data processor application """

    io_handler = IOHandler(spark)
    try:
        student_df = io_handler.spark_read_file(student_file, delim='_')
        logger.info("Successfully loaded student file from %s", student_file)
        teacher_df = io_handler.spark_read_file(teacher_file)
        logger.info("Successfully loaded teacher file from %s", teacher_file)
    except FileNotFoundError as error_message:
        logger.error(error_message)
        return

    joined_df = join_dfs(student_df, teacher_df, 'cid')
    logger.info("Finished joining dataframes")

    transformer = DataTransformer(spark)
    output_df = transformer.fit_output_schema(joined_df)
    logger.info("Fit data to output schema:")
    output_df.show()

    io_handler.write_report(output_df, 'json', out_path)
    logger.info("Processing completed")
Пример #12
0
  def run(self, x, y, labels, figsize=(20,3), bands=3, colors=("#8BBCD4","#2B7ABD","#0050A0","#EF9483","#E02421", "#A90E0A")): # dark blue, medium blue, light blue, dark red, medium red, light red
    """ Return the entire graph and its plt object

        Look at DataTransformer.transform to see how the data is transformed.

        Keyword arguments:
        x: single array with x values. Distance between neighboring entries have to be the same
        y: two-dimansional array with y values for each entry. 
        labels: array with strings, shown as the labels on the y-axis.
        figsize: (a,b) used when creating the figure (optional)
        bands: default is 3
        colors: array with the colors used for the bands. from dark to light blue, then from dark red to light red.

        Requirements:
        len(y[i]) == len(x) for all 0 <= i < len(y)
        len(y[0]) == len(labels)
        len(colors) == 2*bands

        RETURN: plt object
    """

    self.check_valid_params(x,y,labels,figsize,bands,colors) 
    n = len(y)

    F = self.create_figure(figsize) 
    df = DataTransformer(y, bands)

    for i in range(n):
      ax = F.add_subplot(n, 1, i+1)
      transformed_x, bands = df.transform(y[i], x)

      for idx,band in enumerate(bands):
        ax.fill_between(transformed_x[idx],0,band,color=colors[idx])

      self.adjust_visuals_line(x, df, ax, i, labels)

    return plt
Пример #13
0
def training_the_model(samples_to_collect=100000, seed=100):
    number_of_kernels_per_dim = [10, 8]
    gamma = 0.999
    w_updates = 20
    evaluation_number_of_games = 50
    evaluation_max_steps_per_game = 300
    np.random.seed(seed)

    env = MountainCarWithResetEnv()
    # collect data
    states, actions, rewards, next_states, done_flags = DataCollector(
        env).collect_data(samples_to_collect)
    # get data success rate
    data_success_rate = np.sum(rewards) / len(rewards)
    print(f'Data Success Rate {data_success_rate}')
    # standardize data
    data_transformer = DataTransformer()
    data_transformer.set_using_states(
        np.concatenate((states, next_states), axis=0))
    states = data_transformer.transform_states(states)
    next_states = data_transformer.transform_states(next_states)
    # process with radial basis functions
    feature_extractor = RadialBasisFunctionExtractor(number_of_kernels_per_dim)
    # encode all states:
    encoded_states = feature_extractor.encode_states_with_radial_basis_functions(
        states)
    encoded_next_states = feature_extractor.encode_states_with_radial_basis_functions(
        next_states)
    # set a new linear policy
    linear_policy = LinearPolicy(feature_extractor.get_number_of_features(), 3,
                                 True)
    # but set the weights as random
    linear_policy.set_w(np.random.uniform(size=linear_policy.w.shape))
    # start an object that evaluates the success rate over time
    evaluator = GamePlayer(env, data_transformer, feature_extractor,
                           linear_policy)

    success_rate_vs_iteration = list()

    for lspi_iteration in range(w_updates):
        print(f'Starting LSPI iteration {lspi_iteration}')

        new_w = compute_lspi_iteration(encoded_states, encoded_next_states,
                                       actions, rewards, done_flags,
                                       linear_policy, gamma)
        norm_diff = linear_policy.set_w(new_w)

        success_rate = evaluator.play_games(evaluation_number_of_games,
                                            evaluation_max_steps_per_game)

        success_rate_vs_iteration.append(success_rate)

        if norm_diff < 0.00001:
            break

    print('LSPI Done')
    return success_rate_vs_iteration
def get_predictions(net, true_intent, intent_map, slots_map, context, batch_size):
    """Get predictions for every item in the intent.
    It returns a list where index is same as in validation item. Each record is of following format:
    Tuple(Predicted_Intent, List[(List[bits of text], slot)]"""
    result = []
    idx_to_slot = {v: k for k, v in slots_map.items()}
    idx_to_intent = {v: k for k, v in intent_map.items()}

    intent_dev_dataset = NLUBenchmarkDataset(SacreMosesTokenizer(), 'val', intent_map,
                                             slots_map, intent_to_load=true_intent)
    transformer = DataTransformer(ELMoCharVocab())
    transformed_dev_dataset = intent_dev_dataset.transform(transformer, lazy=False)
    batchify_fn = Tuple(Pad(), Stack(), Pad(), Stack())
    dev_dataloader = DataLoader(transformed_dev_dataset, batch_size=batch_size,
                                num_workers=multiprocessing.cpu_count() - 3,
                                batchify_fn=batchify_fn)

    for i, (data, valid_lengths, entities, intent) in enumerate(dev_dataloader):
        items_per_iteration = data.shape[0]
        length = data.shape[1]

        data = data.as_in_context(context)

        hidden_state = net.elmo_container[0].begin_state(mx.nd.zeros,
                                                         batch_size=items_per_iteration,
                                                         ctx=context)
        mask = get_data_mask(length, valid_lengths, items_per_iteration, context)

        intents, slots = net(data, hidden_state, mask)
        score, slots_seq = net.crf(slots.transpose(axes=(1, 0, 2)))

        intents_prediction = intents.argmax(axis=1).asnumpy()
        slots_prediction = slots_seq.asnumpy()

        for rec_id, pred_intent in enumerate(intents_prediction):
            text = intent_dev_dataset[rec_id][0]
            tokens = intent_dev_dataset[rec_id][1]
            slot_prediction = slots_prediction[rec_id]

            prediction_item = get_prediction_item(idx_to_slot, slot_prediction, tokens)
            result.append((idx_to_intent[pred_intent], prediction_item, text, tokens))

    return result
Пример #15
0
    def test_output_files_exist(self):
        warnings.simplefilter("ignore", ResourceWarning)
        d = DataTransformer()
        l1 = 0
        l2 = 0
        try:
            self.s3 = boto3.resource('s3')
        except BotoCoreError as e:
            if isinstance(e, NoCredentialsError):
                print("Invalid credentials")
            else:
                print("Error message -" + str(e))
            sys.exit()

        self.bucketDest = self.s3.Bucket(d.OUTPUT_BUCKET_NAME)
        exists = True
        try:
            self.s3.meta.client.head_bucket(Bucket=d.OUTPUT_BUCKET_NAME)
        except ClientError as e:
            error_code = int(e.response['Error']['Code'])
            print(e)
            self.assertTrue(self, 1 == 0)
            if error_code == 404:
                exists = False
        if exists:
            self.list = self.s3.meta.client.list_objects(
                Bucket=d.INPUT_BUCKET_NAME)['Contents']

            for s3_key in self.list:
                s3_object = s3_key['Key']

                if not s3_object.endswith("/"):
                    l1 += 1

            self.list2 = self.s3.meta.client.list_objects(
                Bucket=d.OUTPUT_BUCKET_NAME)['Contents']
            for s3_key in self.list2:
                s3_object = s3_key['Key']

                if not s3_object.endswith("/"):
                    l2 += 1
        self.assertTrue(self, l1 == l2)
Пример #16
0
                        help='path to save the final model')
    parser.add_argument(
        '--gpu',
        type=int,
        default=0,
        help='id of the gpu to use. Set it to empty means to use cpu.')
    arg = parser.parse_args()
    return arg


if __name__ == '__main__':
    args = parse_args()
    context = mx.cpu(0) if args.gpu is None else mx.gpu(args.gpu)

    segments = ['train', 'dev']
    transformer = DataTransformer(segments)
    dataloaders = [
        transform_segment(transformer, segment, args) for segment in segments
    ]

    model = get_model(transformer._word_vocab, transformer._char_vocab, args)

    trainer = gluon.Trainer(model.collect_params(), 'ftml',
                            {'learning_rate': args.lr})
    best_model_name = run_training(model, trainer, dataloaders[0],
                                   dataloaders[1], args)

    model.load_parameters(best_model_name, ctx=context)
    avg_L, acc, em, f1, predictions = run_evaluate(model,
                                                   dataloaders[1],
                                                   args,
    w_updates = 100
    evaluation_number_of_games = 10
    evaluation_max_steps_per_game = 1000

    np.random.seed(123)
    # np.random.seed(234)

    env = MountainCarWithResetEnv()
    # collect data
    states, actions, rewards, next_states, done_flags = DataCollector(
        env).collect_data(samples_to_collect)
    # get data success rate
    data_success_rate = np.sum(rewards) / len(rewards)
    print(f'success rate {data_success_rate}')
    # standardize data
    data_transformer = DataTransformer()
    data_transformer.set_using_states(
        np.concatenate((states, next_states), axis=0))
    states = data_transformer.transform_states(states)
    next_states = data_transformer.transform_states(next_states)
    # process with radial basis functions
    feature_extractor = RadialBasisFunctionExtractor(number_of_kernels_per_dim)
    # encode all states:
    encoded_states = feature_extractor.encode_states_with_radial_basis_functions(
        states)
    encoded_next_states = feature_extractor.encode_states_with_radial_basis_functions(
        next_states)
    # set a new linear policy
    linear_policy = LinearPolicy(feature_extractor.get_number_of_features(), 3,
                                 True)
    # but set the weights as random
Пример #18
0
    tf.compat.v1.flags.DEFINE_integer("evaluate_every", 1, "Evaluate model on dev set after this many steps ")
    tf.compat.v1.flags.DEFINE_integer("checkpoint_every", 10, "Save model after this many steps ")
    tf.compat.v1.flags.DEFINE_integer("num_checkpoints", 5, "Number of checkpoints to store ")

    tf.compat.v1.flags.DEFINE_float("regulation_rate", 5e-4, "Number of checkpoints to store ")

    FLAGS = tf.compat.v1.flags.FLAGS

    export_path_base = FLAGS.export_path_base
    export_path = os.path.join(
          compat.as_bytes(export_path_base),
          compat.as_bytes(str(FLAGS.model_version)))
    assert not os.path.exists(export_path), \
        'Export directory already exists. Please specify a different export directory:{}'.format(export_path)

    data_transformer = DataTransformer(FLAGS.train_data_file)
    x_train, y_train  = data_transformer.fit_with_file(FLAGS.train_data_file, FLAGS.num_class)
    num_labels = FLAGS.num_class
    print(x_train.shape, y_train.shape)
    
    x_dev, y_dev = data_transformer.fit_with_file(FLAGS.dev_data_file, FLAGS.num_class)

    with tf.Graph().as_default():
        sess = tf.compat.v1.Session()
        with sess.as_default():
            svm = SVM(sequence_length=x_train.shape[1], 
                        num_classes=FLAGS.num_class, l2_reg_lambda=FLAGS.regulation_rate)
    
            # Define Training procedure
            global_step = tf.Variable(0, name="global_step", trainable=False)
            optimizer = tf.compat.v1.train.AdamOptimizer(0.1)
 def setUp(self):
   self.d = DataTransformer([[9,-9,0]], 3)
   self.common_x_ret = [[1,2,3],[1,2,3],[1,2,3],[1,2,3],[1,2,3],[1,2,3]]
class DataTransformerTest(unittest.TestCase):
  
  def setUp(self):
    self.d = DataTransformer([[9,-9,0]], 3)
    self.common_x_ret = [[1,2,3],[1,2,3],[1,2,3],[1,2,3],[1,2,3],[1,2,3]]

    
  ## mixed positive and negative values
  def test_top_range_mixed_negative_positives(self):
    self.run_me([4,1,-9],[[1,2,2.5, 3],[1,2,2.5,3],[1,2,2.5,3],[1,2,2.5,3],[1,2,2.5,3],[1,2,2.5,3]],[ [3,1,0,0],[1,0,0,0],[0,0,0,0],[0,0,0,3],[0,0,0,3],[0,0,0,3]])

  def test_positive_zero_negative_positive(self):
    self.run_me([4,0,-9],self.common_x_ret,[[3,0,0],[1,0,0],[0,0,0],[0,0,3],[0,0,3],[0,0,3]])

  def test_mixed_zero_at_end(self):
    self.run_me([4,-1,0],[[1,1.5,2,3],[1,1.5,2,3],[1,1.5,2,3],[1,1.5,2,3],[1,1.5,2,3],[1,1.5,2,3]],[[3,0,0,0],[1,0,0,0],[0,0,0,0],[0,0,1,0],[0,0,0,0],[0,0,0,0]])

  def test_positive_negative_positive(self):
    self.run_me([4,1,-9],[[1,2,2.5, 3],[1,2,2.5,3],[1,2,2.5,3],[1,2,2.5,3],[1,2,2.5,3],[1,2,2.5,3]],[ [3,1,0,0],[1,0,0,0],[0,0,0,0],[0,0,0,3],[0,0,0,3],[0,0,0,3]])

  def test_positive_zero_negative(self):
    self.run_me([4,1,-9,3],[[1,2,2.5,3,3.5,4],[1,2,2.5,3,3.5,4],[1,2,2.5,3,3.5,4],[1,2,2.5,3,3.5,4],[1,2,2.5,3,3.5,4],[1,2,2.5,3,3.5,4]],[ [3,1,0,0,0,3],[1,0,0,0,0,0],[0,0,0,0,0,0],[0,0,0,3,0,0],[0,0,0,3,0,0],[0,0,0,3,0,0]], x_data=[1,2,3,4])


  ## only positive OR negatives values

  def test_positive_bottom_edge(self):
    self.run_me([4.5,1,3],self.common_x_ret, [ [3,1,3],[1.5,0,0],[0,0,0],[0,0,0],[0,0,0],[0,0,0]])

  def test_zero_at_beginning(self):
    self.run_me([0,3,0],self.common_x_ret,[[0,3,0],[0,0,0],[0,0,0],[0,0,0],[0,0,0],[0,0,0]])

  def test_zero_at_end(self):
    self.run_me([4,3,0],self.common_x_ret,[[3,3,0],[1,0,0],[0,0,0],[0,0,0],[0,0,0],[0,0,0]])

  def test_positive_zero_positive(self):
    self.run_me([4,0,1],self.common_x_ret,[[3,0,1],[1,0,0],[0,0,0],[0,0,0],[0,0,0],[0,0,0]])

  def test_top_range_no_negative(self):
    self.run_me([4,1,9],self.common_x_ret,[ [3,1,3],[1,0,3],[0,0,3],[0,0,0],[0,0,0],[0,0,0]])

  def test_edgecases_medium(self):
    self.assertTrue(self.d.transform([2,5.99,3.81],[1,2,3]) == (self.common_x_ret,[ [2,3,3],[0,2.99,0.81],[0,0,0],[0,0,0],[0,0,0],[0,0,0]]))

  def test_choice(self):
    self.assertTrue(self.d.transform([4,1,8],[1,2,3]) == (self.common_x_ret,[ [3,1,3],[1,0,3],[0,0,2],[0,0,0],[0,0,0],[0,0,0]]))

  def test_edge_top_negative_value(self):
    self.assertTrue(self.d.transform([0,-7.5,-9],[1,2,3]) == (self.common_x_ret,[ [0,0,0],[0,0,0],[0,0,0],[0,3,3],[0,3,3],[0,1.5,3]]))

  def test_edge_middle_negative_value(self):
    self.assertTrue(self.d.transform([-3.5,-6,-4],[1,2,3]) == (self.common_x_ret,[ [0,0,0],[0,0,0],[0,0,0],[3,3,3],[0.5,3,1],[0,0,0]]))

  def test_edge_bottom_negative_value(self):
    self.run_me([-0.5,-3,-8],self.common_x_ret,[[0,0,0],[0,0,0],[0,0,0], [0.5, 3, 3], [0,0,3],[0,0,2]])

  def test_regular_bottom_negative_value(self):
    self.run_me([0,-2,-1],self.common_x_ret,[[0,0,0],[0,0,0], [0,0,0],[0,2,1],[0,0,0],[0,0,0]])

  def run_me(self, data, x,y, x_data=[1,2,3]):
    a,b = self.d.transform(data,x_data)
    print a
    print b
    self.assertTrue(a  == x)
    self.assertTrue(b  == y)
Пример #21
0
    return best_model_path


if __name__ == '__main__':
    args = parse_args()
    context = mx.cpu(0) if args.gpu is None else mx.gpu(args.gpu)

    train_dataset = NLUBenchmarkDataset(SacreMosesTokenizer(), 'train_full')
    print(train_dataset.get_intent_map())
    print(train_dataset.get_slots_map())
    dev_dataset = NLUBenchmarkDataset(SacreMosesTokenizer(), 'val',
                                      train_dataset.get_intent_map(),
                                      train_dataset.get_slots_map())

    transformer = DataTransformer(ELMoCharVocab())
    transformed_train_dataset = train_dataset.transform(transformer,
                                                        lazy=False)
    transformed_dev_dataset = dev_dataset.transform(transformer, lazy=False)

    batchify_fn = Tuple(Pad(), Stack(), Pad(), Stack())

    train_dataloader = DataLoader(transformed_train_dataset,
                                  batch_size=args.batch_size,
                                  shuffle=True,
                                  num_workers=multiprocessing.cpu_count() - 3,
                                  batchify_fn=batchify_fn)
    dev_dataloader = DataLoader(transformed_dev_dataset,
                                batch_size=args.batch_size,
                                shuffle=True,
                                num_workers=multiprocessing.cpu_count() - 3,
Пример #22
0
 def make_datatransformer(self,dt_args):
     self.X_dt = DataTransformer(**dt_args)
     self.y_dt = DataTransformer(**dt_args)
Пример #23
0
from data_transformer import DataTransformer
from movie_tables import Movie, Country, Genre, movies_genres_association
from json import loads
from flask import Flask, jsonify, request
from base import session
from auth_provider import token_auth, basic_auth, generate_auth_token

app = Flask('MoviesREST')

db = DataTransformer()


@app.route('/token')
@basic_auth.login_required
def get_auth_token():
    token = generate_auth_token()
    return jsonify({'token': token.decode('ascii')})


@app.route('/movies', methods=['GET'])
@token_auth.login_required
def get_movie():
    result = session.query(Movie.id, Movie.title, Movie.year, Country.name, Genre.name) \
        .join(Country, isouter=True) \
        .join(movies_genres_association, isouter=True) \
        .join(Genre, isouter=True) \
        .all()
    return db.transform_dataset_into_json(result)


@app.route('/movies/<id>', methods=['GET'])
Пример #24
0
    precision = 1.0 * correct / extract
    recall = 1.0 * correct / standard
    f1 = 2 * precision * recall / (precision + recall)
    return precision, recall, f1, correct, extract, standard


if __name__ == '__main__':
    args = parse_args()
    context = mx.cpu(0) if args.gpu is None else mx.gpu(args.gpu)

    train_dataset = INSPECDataset('train')
    dev_dataset = INSPECDataset('dev')
    test_dataset = INSPECDataset('test')

    vocab = get_vocab([train_dataset, dev_dataset])
    transformer = DataTransformer(vocab, args.seq_len)

    train_dataloader = DataLoader(train_dataset.transform(transformer),
                                  batch_size=args.batch_size,
                                  shuffle=True,
                                  num_workers=multiprocessing.cpu_count() - 3)
    dev_dataloader = DataLoader(dev_dataset.transform(transformer),
                                batch_size=args.batch_size,
                                shuffle=True,
                                num_workers=multiprocessing.cpu_count() - 3)
    test_dataloader = DataLoader(test_dataset.transform(transformer),
                                 batch_size=args.batch_size,
                                 shuffle=True,
                                 num_workers=multiprocessing.cpu_count() - 3)

    model = get_model(len(vocab), args.embedding_dim, args.hidden,
Пример #25
0
    def run(
        self,
        x,
        y,
        labels,
        figsize=(15, 20),
        bands=3,
        colors=("#8BBCD4", "#2B7ABD", "#0050A0", "#EF9483", "#E02421",
                "#A90E0A")
    ):  # dark blue, medium blue, light blue, dark red, medium red, light red
        """ Return the entire graph and its plt object

        Look at DataTransformer.transform to see how the data is transformed.

        Keyword arguments:
        x: single array with x values. Distance between neighboring entries have to be the same
        y: two-dimensional array with y values for each entry.
        labels: array with strings, shown as the labels on the y-axis.
        figsize: (a,b) used when creating the figure (optional)
        bands: default is 3
        colors: array with the colors used for the bands. from dark to light blue, then from dark red to light red.

        Requirements:
        len(y[i]) == len(x) for all 0 <= i < len(y)
        len(y[0]) == len(labels)
        len(colors) == 2*bands

        RETURN: plt object
    """

        self.check_valid_params(x, y, labels, figsize, bands, colors)
        n = len(y[0, :])

        F, axes = plt.subplots(n, 1, figsize=figsize, sharex=True, sharey=True)
        df = DataTransformer(y, bands)

        for i, ax in enumerate(axes.flatten()):
            transformed_x, ybands = df.transform(y[:, i], x)
            for idx, band in enumerate(ybands):
                ax.fill_between(transformed_x[idx], 0, band, color=colors[idx])
            self.adjust_visuals_line(x, df, ax, i, n, labels)

        F.text(0.5, 0.04, 'Time', ha='center', size=30)
        F.text(0.04,
               0.5,
               'Error to observation ratio',
               va='center',
               rotation='vertical',
               size=30)
        handles = []
        legend_colors = [
            "#A90E0A", "#E02421", "#EF9483", "#8BBCD4", "#2B7ABD", "#0050A0"
        ]
        for c in legend_colors:
            handles.append(self.patch_creator(c))
        bandwidths = int(df.max) / bands
        lowerbounds = np.arange(int(df.min), int(df.max), bandwidths)
        labels = [
            str(int(b)) + ' - ' + str(int(b + bandwidths)) for b in lowerbounds
        ]
        F.legend(handles,
                 labels,
                 ncol=bands * 2,
                 loc='upper center',
                 fontsize='xx-large')
        return plt
Пример #26
0
def run_lspi(seed,
             w_updates=20,
             samples_to_collect=100000,
             evaluation_number_of_games=1,
             evaluation_max_steps_per_game=200,
             thresh=0.00001,
             only_final=False):
    """
    This is the main lspi function
    :param seed: random seed for the run
    :param w_updates: how many w updates to do
    :param samples_to_collect: how many samples to collect
    :param evaluation_number_of_games: how many game evaluations to do
    :param evaluation_max_steps_per_game: how many steps to allow the evaluation game to run
    :param thresh: the threshold for the stopping condition
    :param only_final: run evaluation only at the end of the run
    :return: None
    """
    res_dir = './Results/'
    np.random.seed(seed)
    number_of_kernels_per_dim = [12, 10]
    gamma = 0.999
    env = MountainCarWithResetEnv()
    # collect data
    states, actions, rewards, next_states, done_flags = DataCollector(
        env).collect_data(samples_to_collect)
    # get data success rate
    data_success_rate = np.sum(rewards) / len(rewards)
    print('success rate: {}'.format(data_success_rate))
    # standardize data
    data_transformer = DataTransformer()
    data_transformer.set_using_states(
        np.concatenate((states, next_states), axis=0))
    states = data_transformer.transform_states(states)
    next_states = data_transformer.transform_states(next_states)
    # process with radial basis functions
    feature_extractor = RadialBasisFunctionExtractor(number_of_kernels_per_dim)
    # encode all states:
    encoded_states = feature_extractor.encode_states_with_radial_basis_functions(
        states)
    encoded_next_states = feature_extractor.encode_states_with_radial_basis_functions(
        next_states)
    # set a new linear policy
    linear_policy = LinearPolicy(feature_extractor.get_number_of_features(), 3,
                                 True)
    # but set the weights as random
    linear_policy.set_w(np.random.uniform(size=linear_policy.w.shape))
    # start an object that evaluates the success rate over time
    evaluator = GamePlayer(env, data_transformer, feature_extractor,
                           linear_policy)

    # success_rate = evaluator.play_games(evaluation_number_of_games, evaluation_max_steps_per_game)
    # print("Initial success rate: {}".format(success_rate))
    performances = []
    if not only_final:
        performances.append(
            evaluator.play_games(evaluation_number_of_games,
                                 evaluation_max_steps_per_game))
    read = False
    if read:
        with open(res_dir + 'weight.pickle', 'rb') as handle:
            new_w = pickle.load(handle)
            linear_policy.set_w(np.expand_dims(new_w, 1))
    for lspi_iteration in range(w_updates):
        print('starting lspi iteration {}'.format(lspi_iteration))

        new_w = compute_lspi_iteration(encoded_states, encoded_next_states,
                                       actions, rewards, done_flags,
                                       linear_policy, gamma)
        with open(res_dir + 'weight.pickle', 'wb') as handle:
            pickle.dump(new_w, handle, protocol=pickle.HIGHEST_PROTOCOL)

        norm_diff = linear_policy.set_w(new_w)
        if not only_final:
            performances.append(
                evaluator.play_games(evaluation_number_of_games,
                                     evaluation_max_steps_per_game))
        if norm_diff < thresh:
            break
    print('done lspi')
    if not only_final:
        with open(res_dir + 'perf' + str(seed) + '.pickle', 'wb') as handle:
            pickle.dump(performances, handle, protocol=pickle.HIGHEST_PROTOCOL)
    if only_final:
        score = evaluator.play_games(evaluation_number_of_games,
                                     evaluation_max_steps_per_game)
        with open(res_dir + 'final_perf' + str(samples_to_collect) + '.pickle',
                  'wb') as handle:
            pickle.dump(score, handle, protocol=pickle.HIGHEST_PROTOCOL)
    evaluator.play_game(evaluation_max_steps_per_game, render=True)