Exemplo n.º 1
0
 def __init__(self, graph, embeddingDimension=200, walkLength = 3, nbContexts = 1, negativeRate=5):
     """instantiates a Node2Vec algorithm
     =================================
     INPUT :
     
     - graph : weighted graph on which the model is going to be trained. 
               <needs to be a networkx graph>
     - embeddingDimension : dimension of continuous representation of the nodes
     - walkLength : length of the random walks used to build the contexts
     - nbContexts : number of contexts computed by node
     - negativeRate : number of negative samples for each positive sample
     """
     
     self.embeddingDimension = embeddingDimension
     self.negativeRate = negativeRate
     self.walkLength = walkLength
     self.nbContexts = nbContexts
     self.contexts = None
     self.original_graph = graph
     
     # for code simplicity, we are going to work on a copy of the graph 
     # where the names of the nodes have been replaced by an index
     
     # build a dictionnary to translate nodes name into index
     nodes = list(graph.nodes)
     self.node2index = {node:index for index, node in enumerate(nodes)}
     self.index2node = {index:node for index, node in enumerate(nodes)}
     
     # instantiating the new graph
     new_graph = nx.DiGraph()
     # adding nodes to the graph
     for node in prog_bar(nodes):
         new_graph.add_node(self.node2index[node])
     # adding edges to the graph 
     for edge in prog_bar(graph.edges(data = True)):
         u = self.node2index[edge[0]]
         v = self.node2index[edge[1]]
         w = edge[2]['weight'] if 'weight' in edge[2].keys() else 1
         if w>0:
             new_graph.add_edge(u, v, weight = w)
     
     self.graph = new_graph
     self.nb_of_nodes = len(nodes)
     
     # initializing the layers 
     self.input2hidden_weights = np.random.uniform(low = -1, high = 1, size = (self.nb_of_nodes, self.embeddingDimension))
     self.hidden2output_weights = np.random.uniform(low = -1, high = 1, size = (self.nb_of_nodes, self.embeddingDimension))
     
     # compute a probability distribution to generate negative_sampling
     probabilities = np.array([max(new_graph.in_degree[i], .001) for i in range(self.nb_of_nodes)]) 
     probabilities = np.power(probabilities, 3/4)
     probabilities = probabilities / np.sum(probabilities)
     self.probabilities = probabilities
     
     pass
Exemplo n.º 2
0
def parsing_attributes(list_of_attributes):
    tokenized_attributes = []
    print('tokenizing attributes ...')
    for attributes in prog_bar(list_of_attributes):
        # we tokenize the attributes string
        tokenized_attributes.append(['<start>'] + tokenize(attributes))
    pad_len = len(max(tokenized_attributes, key=len))
    padded_attributes = []
    print('padding attributes ...')
    for attributes in prog_bar(tokenized_attributes):
        att_len = len(attributes)
        attributes += ['<pad>'] * (pad_len - att_len)
        padded_attributes.append(attributes[:pad_len] + ['<end>'])

    return np.array(padded_attributes)
Exemplo n.º 3
0
def _create_reduced_point_cloud(data_path,
                                info_path,
                                save_path=None,
                                back=False):
    with open(info_path, 'rb') as f:
        kitti_infos = pickle.load(f)
    for info in prog_bar(kitti_infos):
        v_path = info['velodyne_path']
        v_path = pathlib.Path(data_path) / v_path
        points_v = np.fromfile(str(v_path), dtype=np.float32,
                               count=-1).reshape([-1, 6])
        rect = info['calib/R0_rect']
        P2 = info['calib/P2']
        Trv2c = info['calib/Tr_velo_to_cam']
        # first remove z < 0 points
        # keep = points_v[:, -1] > 0
        # points_v = points_v[keep]
        # then remove outside.
        if back:
            points_v[:, 0] = -points_v[:, 0]
        points_v = box_np_ops.remove_outside_points(points_v, rect, Trv2c, P2,
                                                    info["img_shape"])

        if save_path is None:
            save_filename = v_path.parent.parent / (v_path.parent.stem +
                                                    "_reduced") / v_path.name
            # save_filename = str(v_path) + '_reduced'
            if back:
                save_filename += "_back"
        else:
            save_filename = str(pathlib.Path(save_path) / v_path.name)
            if back:
                save_filename += "_back"
        with open(save_filename, 'w') as f:
            points_v.tofile(f)
Exemplo n.º 4
0
def parsing_list_of_attributes(list_of_attributes):
    return_array = [
        parsing_attributes(att) for att in prog_bar(list_of_attributes)
    ]
    return_array = np.array(
        return_array)  #.reshape(len(list_of_attributes), -1)
    return return_array
Exemplo n.º 5
0
def main():
    env = GameEnv(show=False)
    agent = DQNAgent(env=env)
    no_episodes = 500

    for episode in prog_bar(range(no_episodes), ascii=False, unit="episodes"):
        state = env.reset().reshape(1, 2)

        while True:
            action = agent.predict_action(state)
            new_state, reward, done = env.action(action)

            new_state = new_state.reshape(1, 2)

            agent.remember(state, action, reward, new_state, done)
            agent.model_train()
            agent.target_train()

            state = new_state.reshape(1, 2)

            if done:
                if reward == 500:
                    print(f"Completed in episode {episode}")
                    agent.save_model(f"final-{episode}.model")

                break
Exemplo n.º 6
0
def building_data_test(path):
    """function to build the dictionnary of the data"""
    print('loading text ...')
    list_of_attributes = []
    # opening the file
    with open(path, 'r', encoding='utf-8') as file:

        for line_number, line in prog_bar(enumerate(file)):
            if line_number == 0: continue  # skipping the header line
            if '[' not in str(line): continue  # some lines are fuzzy
            attributes = str(line).split('",')[0].replace('"', '').replace(
                '\n', '')  # cosmetic

            # replace name, near and area by placeholders
            if 'name' in attributes:
                name = attributes.split('name[')[1].split(']')[0]

            if 'area' in attributes:
                area = attributes.split('area[')[1].split(']')[0]

            if 'near' in attributes:
                near = attributes.split('near[')[1].split(']')[0]

            list_of_attributes.append(attributes)

        return list_of_attributes
Exemplo n.º 7
0
 def train(self,stepsize, epochs):
     
     # running through the epochs
     for epoch in range(epochs):
         print("Epoch n° : {}/{} - {}".format(epoch+1, epochs, str(datetime.datetime.now())))
         # running through contexts
         for target_word, context in prog_bar(self.contexts):
             
             h = self.input2hidden_weights[target_word,:]
             # creating negative samples 
             # !!!! there is something fuzzy : the world can be a negative and a positive sample here
             for context_word in context:
                 # generating negative samples
                 training_outputs = self.create_negative_samples(context_word)
                 # computing EH
                 EH = np.sum([(self.sigmoid(self.hidden2output_weights[:,j], h) - tj)*self.hidden2output_weights[:,j] for j, tj in training_outputs], axis = 0)
                 
                 # updating output layer weights 
                 for j, tj in training_outputs:
                     self.hidden2output_weights[:,j] -= stepsize * (self.sigmoid(self.hidden2output_weights[:,j], h)-tj) * h.T
                 
                 # updating input layer wiegths
                 self.input2hidden_weights[target_word, :] -= stepsize * EH.T
             
     print("Training ended at ",str(datetime.datetime.now()))
     pass
Exemplo n.º 8
0
def building_data(path):
    """function to build the dictionnary of the data"""
    list_of_reviews = []
    list_of_attributes = []
    # opening the file
    with open(path, 'r', encoding='utf-8') as file:

        for line_number, line in prog_bar(enumerate(file)):
            if line_number == 0: continue  # skipping the header line
            if '[' not in str(line): continue  # some lines are fuzzy
            attributes = str(line).split('",')[0].replace('"', '')  # cosmetic
            review = str(line).split('",')[1].replace('"', '').replace(
                '\n', '')  # cosmetic

            # replace name, near and area by placeholders
            if 'name' in attributes:
                name = attributes.split('name[')[1].split(']')[0]
                review = review.replace(name, 'nameplaceholder')
            if 'area' in attributes:
                area = attributes.split('area[')[1].split(']')[0]
                review = review.replace(area, 'areaplaceholder')
            if 'near' in attributes:
                near = attributes.split('near[')[1].split(']')[0]
                review = review.replace(near, 'nearplaceholder')
            review = 'startofreview ' + review + ' endofreview'

            list_of_reviews.append(review.lower())
            list_of_attributes.append(attributes)

    vocabulary_full = []
    reviews_tokens = []

    for sentence in prog_bar(list_of_reviews):
        tokens = tokenize(sentence)
        vocabulary_full += tokens
        reviews_tokens.append(tokens)

    vocabulary = Counter(vocabulary_full)
    vocabulary = sorted(vocabulary, key=vocabulary.get, reverse=True)
    mapper = {
        token: index + 1
        for index, token in prog_bar(enumerate(vocabulary))
    }
    mapper['<unk>'] = len(mapper) + 1
    mapper['<pad>'] = len(mapper) + 1

    return mapper, list_of_attributes, reviews_tokens
Exemplo n.º 9
0
def parsing_list_of_attributes_test(list_of_attributes):
    attributes_list = []
    dictionnary_list = []
    for att in prog_bar(list_of_attributes):
        attributes_parsed, dictionnary = parsing_attributes_test(att)
        attributes_list.append(attributes_parsed)
        dictionnary_list.append(dictionnary)
    attributes_list = np.array(attributes_list)
    return attributes_list, dictionnary_list
Exemplo n.º 10
0
def mapping_reviews(list_of_reviews, mapper, review_len=76):
    nb_of_samples = len(list_of_reviews)
    mapped_tokens = []

    for review in prog_bar(list_of_reviews):
        mapped_review = []
        for token in review:
            mapped_review.append(mapper.get(token, mapper.get('<unk>')) - 1)
        mapped_tokens.append(mapped_review)
    mapped_tokens = np.array(mapped_tokens,
                             ndmin=2).reshape(nb_of_samples, review_len)
    return mapped_tokens
Exemplo n.º 11
0
def padding_review(list_of_reviews, padding_length):
    """review is a list of tokens"""
    list_of_padded_reviews = []
    for review in prog_bar(list_of_reviews):
        review_len = len(review)
        if review_len < padding_length:
            to_pad = ['<pad>' for _ in range(padding_length - len(review))]
        else:
            to_pad = []
        padded_review = review + to_pad
        list_of_padded_reviews.append(padded_review[:padding_length])
    return np.array(list_of_padded_reviews)
Exemplo n.º 12
0
def building_data_test(path, mapper, padding_length_att=100, limit=None):
    """function to build the format the training data"""
    list_of_attributes = []

    # opening the file
    with open(path, 'r', encoding='utf-8') as file:
        for line_number, line in prog_bar(enumerate(file)):
            if line_number == 0: continue  # skipping the header line
            if '[' not in str(line): continue  # some lines are fuzzy
            if (limit is not None) and line_number > limit: break
            attributes = str(line).split('",')[0].replace('"', '')  # cosmetic
            # tokenizing the data
            attributes = tokenize(attributes)

            # appending the tokenized reviews and attributes to the output lists

            list_of_attributes.append(attributes)

    pad_len_att = padding_length_att  # len(max(list_of_attributes, key = len))

    # padding attributes
    padded_attributes = []
    print('padding attributes ...')
    for attributes in prog_bar(list_of_attributes):
        att_len = len(attributes)
        attributes += ['<pad>'] * (pad_len_att - att_len)
        padded_attributes.append(['<start>'] + attributes[:pad_len_att] +
                                 ['<end>'])

    # translating attributes
    print('translating attributes ...')
    translated_attributes = []
    for att in prog_bar(padded_attributes):
        translated_att = []
        for token in att:
            translated_att.append(mapper.get(token, mapper.get('<unk>')))
        translated_attributes.append(translated_att)

    return np.array(translated_attributes)
Exemplo n.º 13
0
def _calculate_num_points_in_gt(data_path,
                                infos,
                                relative_path,
                                remove_outside=True,
                                num_features=6):
    for info in prog_bar(infos):
        if relative_path:
            v_path = str(pathlib.Path(data_path) / info["velodyne_path"])
        else:
            v_path = info["velodyne_path"]
        points_v = np.fromfile(v_path, dtype=np.float32, count=-1)
        annos = info['annos']

        try:
            points_v = points_v.reshape([-1, num_features])
        except:
            annos["num_points_in_gt"] = 0
            print('warning:', v_path)
            continue

        rect = info['calib/R0_rect']
        Trv2c = info['calib/Tr_velo_to_cam']
        P2 = info['calib/P2']
        if remove_outside:
            points_v = box_np_ops.remove_outside_points(
                points_v, rect, Trv2c, P2, info["img_shape"])

        # points_v = points_v[points_v[:, 0] > 0]

        num_obj = len([n for n in annos['name'] if n != 'DontCare'])
        # annos = kitti.filter_kitti_anno(annos, ['DontCare'])
        dims = annos['dimensions'][:num_obj]
        loc = annos['location'][:num_obj]
        rots = annos['rotation_y'][:num_obj]
        gt_boxes_camera = np.concatenate([loc, dims, rots[..., np.newaxis]],
                                         axis=1)
        gt_boxes_lidar = box_np_ops.box_camera_to_lidar(
            gt_boxes_camera, rect, Trv2c)
        indices = box_np_ops.points_in_rbbox(points_v[:, :3], gt_boxes_lidar)
        num_points_in_gt = indices.sum(0)
        num_ignored = len(annos['dimensions']) - num_obj
        num_points_in_gt = np.concatenate(
            [num_points_in_gt, -np.ones([num_ignored])])
        annos["num_points_in_gt"] = num_points_in_gt.astype(np.int32)
Exemplo n.º 14
0
      'herself', 'it', 'its', 'itself', 'they', 'them', 'their', 'theirs', 'themselves',
      'what', 'which', 'who', 'whom', 'this', 'that', 'these', 'those', 'am', 'is', 'are',
      'was', 'were', 'be', 'been', 'being', 'have', 'has', 'had', 'having', 'do', 'does',
      'did', 'doing', 'a', 'an', 'the', 'and', 'but', 'if', 'or', 'because', 'as', 'until',
      'while', 'of', 'at', 'by', 'for', 'with', 'about', 'against', 'between', 'into',
      'through', 'during', 'before', 'after', 'above', 'below', 'to', 'from', 'up', 'down',
      'in', 'out', 'on', 'off', 'over', 'under', 'again', 'further', 'then', 'once', 'here',
      'there', 'when', 'where', 'why', 'how', 'all', 'any', 'both', 'each', 'few', 'more',
      'most', 'other', 'some', 'such', 'no', 'nor', 'not', 'only', 'own', 'same', 'so',
      'than', 'too', 'very', 's', 't', 'can', 'will', 'just', 'don', 'should', 'now', 'a']
 
 
 # computing wordcount and size of the vocabulary        
 print("Starting word count")
 self.word_count = {}        
 for sentence in prog_bar(sentences):
     for word in sentence:
         if word in self.word_count.keys():
             self.word_count[word] += 1
         else:
             self.word_count[word] = 1
 
 self.word_count = pd.Series(self.word_count)
 print("Word count done")
 
 print("Starting word cleaning")
 # getting rid of too rare words
 self.word_count = self.word_count[self.word_count >= minCount]
 # keeping only valid words
 valid_words = [word for word in self.word_count.index if word not in stopwords]
 self.word_count = self.word_count.loc[valid_words].sort_values(ascending = False)
Exemplo n.º 15
0
def create_groundtruth_database(data_path,
                                info_path=None,
                                used_classes=None,
                                database_save_path=None,
                                db_info_save_path=None,
                                relative_path=True,
                                lidar_only=False,
                                bev_only=False,
                                coors_range=None):
    root_path = pathlib.Path(data_path)
    if info_path is None:
        info_path = root_path / 'level5_infos_train.pkl'
    if database_save_path is None:
        database_save_path = root_path / 'gt_database'
    else:
        database_save_path = pathlib.Path(database_save_path)
    if db_info_save_path is None:
        db_info_save_path = root_path / "level5_dbinfos_train.pkl"
    database_save_path.mkdir(parents=True, exist_ok=True)
    with open(info_path, 'rb') as f:
        level5_infos = pickle.load(f)
    all_db_infos = {}
    if used_classes is None:
        used_classes = list(kitti.get_classes())
        used_classes.pop(used_classes.index('DontCare'))
    for name in used_classes:
        all_db_infos[name] = []
    group_counter = 0
    for info in prog_bar(level5_infos):
        velodyne_path = info['velodyne_path']
        if relative_path:
            # velodyne_path = str(root_path / velodyne_path) + "_reduced"
            velodyne_path = str(root_path / velodyne_path)
        num_features = 6
        if 'pointcloud_num_features' in info:
            num_features = info['pointcloud_num_features']
        points = np.fromfile(velodyne_path, dtype=np.float32,
                             count=-1).reshape([-1, num_features])

        image_idx = info["image_idx"]
        rect = info['calib/R0_rect']
        P2 = info['calib/P2']
        Trv2c = info['calib/Tr_velo_to_cam']
        if not lidar_only:
            points = box_np_ops.remove_outside_points(points, rect, Trv2c, P2,
                                                      info["img_shape"])

        annos = info["annos"]
        names = annos["name"]
        bboxes = annos["bbox"]
        difficulty = annos["difficulty"]
        gt_idxes = annos["index"]
        num_obj = np.sum(annos["index"] >= 0)
        rbbox_cam = kitti.anno_to_rbboxes(annos)[:num_obj]
        rbbox_lidar = box_np_ops.box_camera_to_lidar(rbbox_cam, rect, Trv2c)
        if bev_only:  # set z and h to limits
            assert coors_range is not None
            rbbox_lidar[:, 2] = coors_range[2]
            rbbox_lidar[:, 5] = coors_range[5] - coors_range[2]

        group_dict = {}
        group_ids = np.full([bboxes.shape[0]], -1, dtype=np.int64)
        if "group_ids" in annos:
            group_ids = annos["group_ids"]
        else:
            group_ids = np.arange(bboxes.shape[0], dtype=np.int64)
        point_indices = box_np_ops.points_in_rbbox(points, rbbox_lidar)
        for i in range(num_obj):
            filename = f"{image_idx}_{names[i]}_{gt_idxes[i]}.bin"
            filepath = database_save_path / filename
            gt_points = points[point_indices[:, i]]

            gt_points[:, :3] -= rbbox_lidar[i, :3]
            with open(filepath, 'w') as f:
                gt_points.tofile(f)
            if names[i] in used_classes:
                if relative_path:
                    db_path = str(database_save_path.stem + "/" + filename)
                else:
                    db_path = str(filepath)
                db_info = {
                    "name": names[i],
                    "path": db_path,
                    "image_idx": image_idx,
                    "gt_idx": gt_idxes[i],
                    "box3d_lidar": rbbox_lidar[i],
                    "num_points_in_gt": gt_points.shape[0],
                    "difficulty": difficulty[i],
                    # "group_id": -1,
                    # "bbox": bboxes[i],
                }

                local_group_id = group_ids[i]
                # if local_group_id >= 0:
                if local_group_id not in group_dict:
                    group_dict[local_group_id] = group_counter
                    group_counter += 1
                db_info["group_id"] = group_dict[local_group_id]
                if "score" in annos:
                    db_info["score"] = annos["score"][i]
                all_db_infos[names[i]].append(db_info)
    for k, v in all_db_infos.items():
        print(f"load {len(v)} {k} database infos")

    with open(db_info_save_path, 'wb') as f:
        pickle.dump(all_db_infos, f)
Exemplo n.º 16
0
def train(config_path,
          model_dir,
          result_path=None,
          ckpt_path=None,
          create_folder=False,
          display_step=50,
          summary_step=5,
          pickle_result=True):
    """train a VoxelNet model specified by a config file.
    """
    if create_folder:
        if pathlib.Path(model_dir).exists():
            model_dir = torchplus.train.create_folder(model_dir)

    model_dir = pathlib.Path(model_dir)
    model_dir.mkdir(parents=True, exist_ok=True)
    eval_checkpoint_dir = model_dir / 'eval_checkpoints'
    eval_checkpoint_dir.mkdir(parents=True, exist_ok=True)
    if result_path is None:
        result_path = model_dir / 'results'
    config_file_bkp = "pipeline.config"
    config = pipeline_pb2.TrainEvalPipelineConfig()
    with open(config_path, "r") as f:
        proto_str = f.read()
        text_format.Merge(proto_str, config)
    shutil.copyfile(config_path, str(model_dir / config_file_bkp))
    input_cfg = config.train_input_reader
    eval_input_cfg = config.eval_input_reader
    model_cfg = config.model.second
    train_cfg = config.train_config

    class_names = list(input_cfg.class_names)
    ######################
    # BUILD VOXEL GENERATOR
    ######################
    voxel_generator = voxel_builder.build(model_cfg.voxel_generator)
    ######################
    # BUILD TARGET ASSIGNER
    ######################
    bv_range = voxel_generator.point_cloud_range[[0, 1, 3, 4]]
    box_coder = box_coder_builder.build(model_cfg.box_coder)
    target_assigner_cfg = model_cfg.target_assigner
    target_assigner = target_assigner_builder.build(target_assigner_cfg,
                                                    bv_range, box_coder)
    ######################
    # BUILD NET
    ######################
    center_limit_range = model_cfg.post_center_limit_range
    net = second_builder.build(model_cfg, voxel_generator, target_assigner)
    net.cuda()
    # net_train = torch.nn.DataParallel(net).cuda()
    print("num_trainable parameters:", len(list(net.parameters())))
    # for n, p in net.named_parameters():
    #     print(n, p.shape)
    ######################
    # BUILD OPTIMIZER
    ######################
    # we need global_step to create lr_scheduler, so restore net first.
    if ckpt_path is None:
        torchplus.train.try_restore_latest_checkpoints(model_dir, [net])
    else:
        torchplus.train.restore(ckpt_path, net)

    gstep = net.get_global_step() - 1
    optimizer_cfg = train_cfg.optimizer
    if train_cfg.enable_mixed_precision:
        net.half()
        net.metrics_to_float()
        net.convert_norm_to_float(net)
    optimizer = optimizer_builder.build(optimizer_cfg, net.parameters())
    if train_cfg.enable_mixed_precision:
        loss_scale = train_cfg.loss_scale_factor
        mixed_optimizer = torchplus.train.MixedPrecisionWrapper(
            optimizer, loss_scale)
    else:
        mixed_optimizer = optimizer
    # must restore optimizer AFTER using MixedPrecisionWrapper

    if ckpt_path is None:
        torchplus.train.try_restore_latest_checkpoints(model_dir,
                                                       [mixed_optimizer])
    else:
        torchplus.train.restore(ckpt_path, mixed_optimizer)

    lr_scheduler = lr_scheduler_builder.build(optimizer_cfg, optimizer, gstep)
    if train_cfg.enable_mixed_precision:
        float_dtype = torch.float16
    else:
        float_dtype = torch.float32
    ######################
    # PREPARE INPUT
    ######################

    dataset = input_reader_builder.build(input_cfg,
                                         model_cfg,
                                         training=True,
                                         voxel_generator=voxel_generator,
                                         target_assigner=target_assigner)
    eval_dataset = input_reader_builder.build(eval_input_cfg,
                                              model_cfg,
                                              training=False,
                                              voxel_generator=voxel_generator,
                                              target_assigner=target_assigner)

    def _worker_init_fn(worker_id):
        time_seed = np.array(time.time(), dtype=np.int32)
        np.random.seed(time_seed + worker_id)
        print(f"WORKER {worker_id} seed:", np.random.get_state()[1][0])

    dataloader = torch.utils.data.DataLoader(dataset,
                                             batch_size=input_cfg.batch_size,
                                             shuffle=True,
                                             num_workers=input_cfg.num_workers,
                                             pin_memory=False,
                                             collate_fn=merge_second_batch,
                                             worker_init_fn=_worker_init_fn)
    eval_dataloader = torch.utils.data.DataLoader(
        eval_dataset,
        batch_size=eval_input_cfg.batch_size,
        shuffle=False,
        num_workers=eval_input_cfg.num_workers,
        pin_memory=False,
        collate_fn=merge_second_batch)
    data_iter = iter(dataloader)

    ######################
    # TRAINING
    ######################
    log_path = model_dir / 'log.txt'
    logf = open(log_path, 'a')
    logf.write(proto_str)
    logf.write("\n")
    summary_dir = model_dir / 'summary'
    summary_dir.mkdir(parents=True, exist_ok=True)
    writer = SummaryWriter(str(summary_dir))

    total_step_elapsed = 0
    remain_steps = train_cfg.steps - net.get_global_step()
    t = time.time()
    ckpt_start_time = t

    total_loop = train_cfg.steps // train_cfg.steps_per_eval + 1
    # total_loop = remain_steps // train_cfg.steps_per_eval + 1
    clear_metrics_every_epoch = train_cfg.clear_metrics_every_epoch

    if train_cfg.steps % train_cfg.steps_per_eval == 0:
        total_loop -= 1
    mixed_optimizer.zero_grad()
    try:
        for _ in range(total_loop):
            if total_step_elapsed + train_cfg.steps_per_eval > train_cfg.steps:
                steps = train_cfg.steps % train_cfg.steps_per_eval
            else:
                steps = train_cfg.steps_per_eval
            for step in range(steps):
                lr_scheduler.step()
                try:
                    example = next(data_iter)
                except StopIteration:
                    print("end epoch")
                    if clear_metrics_every_epoch:
                        net.clear_metrics()
                    data_iter = iter(dataloader)
                    example = next(data_iter)
                example_torch = example_convert_to_torch(example, float_dtype)

                batch_size = example["anchors"].shape[0]

                ret_dict = net(example_torch)

                # box_preds = ret_dict["box_preds"]
                cls_preds = ret_dict["cls_preds"]
                loss = ret_dict["loss"].mean()
                cls_loss_reduced = ret_dict["cls_loss_reduced"].mean()
                loc_loss_reduced = ret_dict["loc_loss_reduced"].mean()
                cls_pos_loss = ret_dict["cls_pos_loss"]
                cls_neg_loss = ret_dict["cls_neg_loss"]
                loc_loss = ret_dict["loc_loss"]
                cls_loss = ret_dict["cls_loss"]
                dir_loss_reduced = ret_dict["dir_loss_reduced"]
                cared = ret_dict["cared"]
                labels = example_torch["labels"]
                if train_cfg.enable_mixed_precision:
                    loss *= loss_scale
                loss.backward()
                torch.nn.utils.clip_grad_norm_(net.parameters(), 10.0)
                mixed_optimizer.step()
                mixed_optimizer.zero_grad()
                net.update_global_step()
                net_metrics = net.update_metrics(cls_loss_reduced,
                                                 loc_loss_reduced, cls_preds,
                                                 labels, cared)

                step_time = (time.time() - t)
                t = time.time()
                metrics = {}
                num_pos = int((labels > 0)[0].float().sum().cpu().numpy())
                num_neg = int((labels == 0)[0].float().sum().cpu().numpy())
                if 'anchors_mask' not in example_torch:
                    num_anchors = example_torch['anchors'].shape[1]
                else:
                    num_anchors = int(example_torch['anchors_mask'][0].sum())
                global_step = net.get_global_step()
                if global_step % display_step == 0:
                    loc_loss_elem = [
                        float(loc_loss[:, :, i].sum().detach().cpu().numpy() /
                              batch_size) for i in range(loc_loss.shape[-1])
                    ]
                    metrics["step"] = global_step
                    metrics["steptime"] = step_time
                    metrics.update(net_metrics)
                    metrics["loss"] = {}
                    metrics["loss"]["loc_elem"] = loc_loss_elem
                    metrics["loss"]["cls_pos_rt"] = float(
                        cls_pos_loss.detach().cpu().numpy())
                    metrics["loss"]["cls_neg_rt"] = float(
                        cls_neg_loss.detach().cpu().numpy())
                    # if unlabeled_training:
                    #     metrics["loss"]["diff_rt"] = float(
                    #         diff_loc_loss_reduced.detach().cpu().numpy())
                    if model_cfg.use_direction_classifier:
                        metrics["loss"]["dir_rt"] = float(
                            dir_loss_reduced.detach().cpu().numpy())
                    metrics["num_vox"] = int(example_torch["voxels"].shape[0])
                    metrics["num_pos"] = int(num_pos)
                    metrics["num_neg"] = int(num_neg)
                    metrics["num_anchors"] = int(num_anchors)
                    metrics["lr"] = float(
                        mixed_optimizer.param_groups[0]['lr'])
                    metrics["image_idx"] = example['image_idx'][0]
                    flatted_metrics = flat_nested_json_dict(metrics)
                    flatted_summarys = flat_nested_json_dict(metrics, "/")
                    for k, v in flatted_summarys.items():
                        if isinstance(v, (list, tuple)):
                            v = {str(i): e for i, e in enumerate(v)}
                            writer.add_scalars(k, v, global_step)
                        else:
                            writer.add_scalar(k, v, global_step)
                    metrics_str_list = []
                    for k, v in flatted_metrics.items():
                        if isinstance(v, float):
                            metrics_str_list.append(f"{k}={v:.3}")
                        elif isinstance(v, (list, tuple)):
                            if v and isinstance(v[0], float):
                                v_str = ', '.join([f"{e:.3}" for e in v])
                                metrics_str_list.append(f"{k}=[{v_str}]")
                            else:
                                metrics_str_list.append(f"{k}={v}")
                        else:
                            metrics_str_list.append(f"{k}={v}")
                    log_str = ', '.join(metrics_str_list)
                    print(log_str, file=logf)
                    print(log_str)
                ckpt_elasped_time = time.time() - ckpt_start_time
                if ckpt_elasped_time > train_cfg.save_checkpoints_secs:
                    torchplus.train.save_models(model_dir, [net, optimizer],
                                                net.get_global_step())
                    ckpt_start_time = time.time()
            total_step_elapsed += steps
            torchplus.train.save_models(model_dir, [net, optimizer],
                                        net.get_global_step())

            # Ensure that all evaluation points are saved forever
            torchplus.train.save_models(eval_checkpoint_dir, [net, optimizer],
                                        net.get_global_step(),
                                        max_to_keep=100)

            net.eval()
            result_path_step = result_path / f"step_{net.get_global_step()}"
            result_path_step.mkdir(parents=True, exist_ok=True)
            print("#################################")
            print("#################################", file=logf)
            print("# EVAL")
            print("# EVAL", file=logf)
            print("#################################")
            print("#################################", file=logf)
            print("Generate output labels...")
            print("Generate output labels...", file=logf)
            t = time.time()
            dt_annos = []
            # prog_bar = ProgressBar()
            # prog_bar.start(len(eval_dataset) // eval_input_cfg.batch_size + 1)
            for example in prog_bar(iter(eval_dataloader)):
                example = example_convert_to_torch(example, float_dtype)
                if pickle_result:
                    dt_annos += predict_kitti_to_anno(net, example,
                                                      class_names,
                                                      center_limit_range,
                                                      model_cfg.lidar_input)
                else:
                    _predict_kitti_to_file(net, example, result_path_step,
                                           class_names, center_limit_range,
                                           model_cfg.lidar_input)

                #prog_bar.print_bar()

            sec_per_ex = len(eval_dataset) / (time.time() - t)
            print(f"avg forward time per example: {net.avg_forward_time:.3f}")
            print(
                f"avg postprocess time per example: {net.avg_postprocess_time:.3f}"
            )

            net.clear_time_metrics()
            print(f'generate label finished({sec_per_ex:.2f}/s). start eval:')
            print(f'generate label finished({sec_per_ex:.2f}/s). start eval:',
                  file=logf)
            gt_annos = [
                info["annos"] for info in eval_dataset.dataset.kitti_infos
            ]
            if not pickle_result:
                dt_annos = kitti.get_label_annos(result_path_step)
            result, mAPbbox, mAPbev, mAP3d, mAPaos = get_official_eval_result(
                gt_annos, dt_annos, class_names, return_data=True)
            print(result, file=logf)
            print(result)
            writer.add_text('eval_result', result, global_step)

            for i, class_name in enumerate(class_names):
                writer.add_scalar('bev_ap:{}'.format(class_name),
                                  mAPbev[i, 1, 0], global_step)
                writer.add_scalar('3d_ap:{}'.format(class_name),
                                  mAP3d[i, 1, 0], global_step)
                writer.add_scalar('aos_ap:{}'.format(class_name),
                                  mAPaos[i, 1, 0], global_step)
            writer.add_scalar('bev_map', np.mean(mAPbev[:, 1, 0]), global_step)
            writer.add_scalar('3d_map', np.mean(mAP3d[:, 1, 0]), global_step)
            writer.add_scalar('aos_map', np.mean(mAPaos[:, 1, 0]), global_step)

            result = get_coco_eval_result(gt_annos, dt_annos, class_names)
            print(result, file=logf)
            print(result)
            if pickle_result:
                with open(result_path_step / "result.pkl", 'wb') as f:
                    pickle.dump(dt_annos, f)
            writer.add_text('eval_result', result, global_step)
            net.train()
    except Exception as e:
        torchplus.train.save_models(model_dir, [net, optimizer],
                                    net.get_global_step())
        logf.close()
        raise e
    # save model before exit
    torchplus.train.save_models(model_dir, [net, optimizer],
                                net.get_global_step())
    logf.close()
Exemplo n.º 17
0
                                                 reverse_mapper)
                review_pred = create_review(tokens[0])
                review_act = generate_list_of_tokens(real_review,
                                                     reverse_mapper)
                review_act = create_review(review_act[0])
                print('actual : ' + review_act)
                print('predicted : ' + review_pred)

        predictions += [
            model.predict(attributes[(i + 1) * 100:(i + 2) * 100, :, :])
        ]
        rouge_scores_1 = []
        rouge_scores_2 = []
        rouge_scores_3 = []
        bleu_score = []
        for index_batch, predictions_batch in prog_bar(enumerate(predictions)):
            for index, predicted_review in enumerate(predictions_batch):
                predicted_review_tokens = np.argmax(predicted_review, axis=-1)
                actual_review = reviews[index_batch * 100 + index]
                rouge_scores_1.append(
                    compute_rouge_score(actual_review,
                                        predicted_review_tokens,
                                        n=1))
                rouge_scores_2.append(
                    compute_rouge_score(actual_review,
                                        predicted_review_tokens,
                                        n=2))
                rouge_scores_3.append(
                    compute_rouge_score(actual_review,
                                        predicted_review_tokens,
                                        n=3))
Exemplo n.º 18
0
def evaluate(config_path,
             model_dir,
             result_path=None,
             predict_test=False,
             ckpt_path=None,
             ref_detfile=None,
             pickle_result=True,
             angle_deg=0.0):
    model_dir = pathlib.Path(model_dir)
    if predict_test:
        result_name = 'predict_test'
    else:
        result_name = 'eval_results'
    if result_path is None:
        result_path = model_dir / result_name
    else:
        result_path = pathlib.Path(result_path)
    config = pipeline_pb2.TrainEvalPipelineConfig()
    with open(config_path, "r") as f:
        proto_str = f.read()
        text_format.Merge(proto_str, config)

    input_cfg = config.eval_input_reader
    model_cfg = config.model.second
    train_cfg = config.train_config
    class_names = list(input_cfg.class_names)
    center_limit_range = model_cfg.post_center_limit_range
    ######################
    # BUILD VOXEL GENERATOR
    ######################
    voxel_generator = voxel_builder.build(model_cfg.voxel_generator)
    bv_range = voxel_generator.point_cloud_range[[0, 1, 3, 4]]
    box_coder = box_coder_builder.build(model_cfg.box_coder)
    target_assigner_cfg = model_cfg.target_assigner
    target_assigner = target_assigner_builder.build(target_assigner_cfg,
                                                    bv_range, box_coder)

    net = second_builder.build(model_cfg, voxel_generator, target_assigner)
    net.cuda()
    if train_cfg.enable_mixed_precision:
        net.half()
        net.metrics_to_float()
        net.convert_norm_to_float(net)

    if ckpt_path is None:
        torchplus.train.try_restore_latest_checkpoints(model_dir, [net])
    else:
        torchplus.train.restore(ckpt_path, net)

    eval_dataset = input_reader_builder.build(input_cfg,
                                              model_cfg,
                                              training=False,
                                              voxel_generator=voxel_generator,
                                              target_assigner=target_assigner,
                                              angle_deg=angle_deg)
    eval_dataloader = torch.utils.data.DataLoader(
        eval_dataset,
        batch_size=input_cfg.batch_size,
        shuffle=False,
        num_workers=input_cfg.num_workers,
        pin_memory=False,
        collate_fn=merge_second_batch)

    if train_cfg.enable_mixed_precision:
        float_dtype = torch.float16
    else:
        float_dtype = torch.float32

    net.eval()
    result_path_step = result_path / f"step_{net.get_global_step()}"
    result_path_step.mkdir(parents=True, exist_ok=True)
    t = time.time()
    dt_annos = []
    global_set = None
    print("Generate output labels...")

    # bar = ProgressBar()
    # bar.start(len(eval_dataset) // input_cfg.batch_size + 1)

    for example in prog_bar(iter(eval_dataloader)):
        example = example_convert_to_torch(example, float_dtype)
        if pickle_result:
            dt_annos += predict_kitti_to_anno(net, example, class_names,
                                              center_limit_range,
                                              model_cfg.lidar_input,
                                              global_set)
        else:
            _predict_kitti_to_file(net, example, result_path_step, class_names,
                                   center_limit_range, model_cfg.lidar_input)
        # bar.print_bar()

    sec_per_example = len(eval_dataset) / (time.time() - t)
    print(f'generate label finished({sec_per_example:.2f}/s). start eval:')

    print(f"avg forward time per example: {net.avg_forward_time:.3f}")
    print(f"avg postprocess time per example: {net.avg_postprocess_time:.3f}")
    if not predict_test:
        gt_annos = [info["annos"] for info in eval_dataset.dataset.kitti_infos]
        if not pickle_result:
            dt_annos = kitti.get_label_annos(result_path_step)
        # result = get_official_eval_result(gt_annos, dt_annos, class_names)
        # print(result)
        # result = get_coco_eval_result(gt_annos, dt_annos, class_names)
        # print(result)
    if pickle_result:
        with open(result_path_step / ("result_%03d.pkl" % angle_deg),
                  'wb') as f:
            pickle.dump(dt_annos, f)
Exemplo n.º 19
0
def building_data_train(path, limit=None):
    """function to build the dictionnary of the data"""
    list_of_reviews = []
    list_of_attributes = []

    # opening the file
    with open(path, 'r', encoding='utf-8') as file:
        for line_number, line in prog_bar(enumerate(file)):
            if line_number == 0: continue  # skipping the header line
            if '[' not in str(line): continue  # some lines are fuzzy
            if (limit is not None) and line_number > limit: break
            attributes = str(line).split('",')[0].replace('"', '')  # cosmetic
            review = str(line).split('",')[1].replace('"', '').replace(
                '\n', '')  # cosmetic
            # tokenizing the data
            review = tokenize(review)
            attributes = tokenize(attributes)

            # appending the tokenized reviews and attributes to the output lists
            list_of_reviews.append(review)
            list_of_attributes.append(attributes)

    pad_len_rev = len(max(list_of_reviews, key=len))
    pad_len_att = len(max(list_of_attributes, key=len))

    # padding attributes
    padded_attributes = []
    print('padding attributes ...')
    for attributes in prog_bar(list_of_attributes):
        att_len = len(attributes)
        attributes += ['<pad>'] * (pad_len_att - att_len)
        padded_attributes.append(['<start>'] + attributes[:pad_len_att] +
                                 ['<end>'])

    # padding the reviews
    padded_reviews = []
    print('padding reviews ...')
    for review in prog_bar(list_of_reviews):
        rev_len = len(review)
        review += ['<pad>'] * (pad_len_rev - rev_len)
        padded_reviews.append(['<start>'] + review[:pad_len_rev] + ['<end>'])
    # building vocabularies
    vocabulary_attributes = set(
        [token for att in padded_attributes for token in att])
    vocabulary_reviews = set(
        [token for review in padded_reviews for token in review])
    mapper = vocabulary_reviews.union(vocabulary_attributes)
    mapper = {token: index for index, token in enumerate(mapper)}
    # translating attributes
    print('translating attributes ...')
    translated_attributes = []
    for att in prog_bar(padded_attributes):
        translated_att = []
        for token in att:
            translated_att.append(mapper.get(token))
        translated_attributes.append(translated_att)
    # translating reviews
    print('translating reviews ...')
    translated_reviews = []
    for review in prog_bar(padded_reviews):
        translated_rev = []
        for token in review:
            translated_rev += [mapper[token]]
        translated_reviews.append(translated_rev)
    target_reviews = [[mapper.get('<pad>')] + translated_rev
                      for translated_rev in translated_reviews]
    source_reviews = [
        translated_rev + [mapper.get('<pad>')]
        for translated_rev in translated_reviews
    ]

    return mapper, np.array(translated_attributes), np.array(
        source_reviews), np.array(target_reviews)
Exemplo n.º 20
0
              'r') as file2:
        dec = model_from_json(str(json.load(file2)))

    enc.load_weights(os.path.join(PATH_TO_MODEL, 'enc_weights.h5'))
    dec.load_weights(os.path.join(PATH_TO_MODEL, 'dec_weights.h5'))
    print("####### ENCODER #######")
    enc.summary()
    print("####### DECODER #######")
    dec.summary()

    print('predicting the reviews ...')
    predictions = [
        make_prediction(att,
                        encoder=enc,
                        decoder=dec,
                        mapper=mapper,
                        review_length=100,
                        vocabulary_size=len(mapper))
        for att in prog_bar(attributes_test)
    ]
    print(predictions)
    print('cleaning the reviews ...')
    reviews = generate_list_of_tokens(predictions, reverse_mapper)
    reviews = [create_review(rev) for rev in reviews]

    print(reviews)

    with open(PATH_TO_OUTPUT, 'w') as output_file:
        for rev in reviews:
            output_file.write(rev + '\n')