示例#1
0
 def repr_code(self,model):
     vecs=None
     use_set = CodeSearchDataset(self.conf['workdir'],
                                   self.conf['use_names'],self.conf['name_len'],
                                   self.conf['use_apis'],self.conf['api_len'],
                                   self.conf['use_tokens'],self.conf['tokens_len'])
     
     data_loader = torch.utils.data.DataLoader(dataset=use_set, batch_size=1000, 
                                        shuffle=False, drop_last=False, num_workers=1)
     for names,apis,toks in data_loader:
         names, apis, toks = gVar(names), gVar(apis), gVar(toks)
         reprs = model.code_encoding(names,apis,toks).data.cpu().numpy()
         vecs=reprs if vecs is None else np.concatenate((vecs, reprs),0)
     vecs = normalize(vecs)
     save_vecs(vecs,self.path+self.conf['use_codevecs'])
     return vecs
示例#2
0
    def repr_code(self, model, norm=True):
        logging.info("Start Code Representation")
        use_set = CodeSearchDataset(self.model_params['workdir'],
                                    self.model_params['use_names'],
                                    self.model_params['name_len'],
                                    self.model_params['use_apis'],
                                    self.model_params['api_len'],
                                    self.model_params['use_tokens'],
                                    self.model_params['tokens_len'],
                                    load_in_memory=True)

        data_loader = torch.utils.data.DataLoader(dataset=use_set,
                                                  batch_size=1000,
                                                  shuffle=False,
                                                  drop_last=False,
                                                  num_workers=2,
                                                  pin_memory=True)

        vecs = []
        logging.debug("Calculating code vectors")
        for itr, (names, apis, toks) in enumerate(data_loader, start=1):
            names, apis, toks = gVar(names), gVar(apis), gVar(toks)
            reprs = model.eval().code_encoding(names, apis,
                                               toks).data.cpu().numpy()
            vecs.append(reprs)
            if itr % 100 == 0:
                logger.info('itr:{}/{}'.format(itr, len(use_set) // 1000))

        logging.debug("Concatenating all vectors")
        vecs = np.concatenate(vecs, 0)

        if norm:
            logger.debug("Normalizing...")
            vecs = normalize(vecs)

        logging.debug("Writing to disk -  vectors")
        save_vecs(vecs, self.path + self.model_params['use_codevecs'])
        return vecs