示例#1
0
 def __save_param(self, _dir, _loss):
     if self.multi_dev:
         torch.save(self.net.module.state_dict(), PJOIN(_dir, 'weight.pth'))
     else:
         torch.save(self.net.state_dict(), PJOIN(_dir, 'weight.pth'))
     torch.save(
         {
             'opt': self.opt.state_dict(),
             'sch': self.sch.state_dict(),
             'epoch': self.epoch,
             'cur_loss': _loss,
         }, PJOIN(_dir, 'others.pth'))
示例#2
0
 def load_param(self, cfg):
     direct = cfg.system['load_path']
     if direct is None: return
     if cfg.mode == 'test':
         print('Test at position: ' + direct)
     elif not cfg.optimizer['no_opt']:
         other = torch.load(PJOIN(direct, 'others.pth'),
                            map_location=lambda storage, loc: storage)
         self.opt.load_state_dict(other['opt'])
         self.sch.load_state_dict(other['sch'])
         self.best = other.get('cur_loss', None)
         self.epoch_start += other.get('epoch', 0)
         self.epoch_end += other.get('epoch', 0)
     weight = torch.load(PJOIN(direct, 'weight.pth'),
                         map_location=lambda storage, loc: storage)
     self.net.load_state_dict(weight)
示例#3
0
 def save(self, loss_now):
     best_save = PJOIN(self.result_dir, 'ckp', 'best')
     if self.epoch == self.epoch_start:
         os.makedirs(best_save)
         if self.best is None: self.best = loss_now
         self.__save_param(best_save, self.best)
         return
     if loss_now < self.best:
         self.__save_param(best_save, self.best)
     if self.epoch != self.epoch_end - 1:
         if self.save_epoch == 0: return  # Just save the best
         if self.epoch % self.save_epoch != 0:
             return  # Save every save_epoch
     now_save = PJOIN(self.result_dir, 'ckp', str(self.epoch))
     os.makedirs(now_save)
     self.__save_param(now_save, loss_now)
示例#4
0
 def log_record(self, dic, board_name):
     log = 'Epoch:{:0>4} '.format(self.epoch)
     for key, val in dic.items():
         log += '{}:{:.5f} '.format(key, val)
     if len(dic) > 4:
         print(board_name)
         print(log.replace(' ', '\n\r'))
     else:
         print(board_name, log)
     if self.writer is not None:
         with open(PJOIN(self.result_dir, board_name + '_log.txt'),
                   'a+') as f:
             f.write(log + '\n')
         self.writer.add_scalars(board_name, dic, self.epoch)
     else:
         with open(PJOIN(self.result_dir, 'FinalTest.txt'), 'a+') as f:
             f.write(log.replace(' ', '\n\r'))
示例#5
0
def MergeAll(folder_name):
    global COMMENT_ID
    for file in os.listdir(folder_name):
        if os.path.isdir(PJOIN(folder_name, file)):
            MergeAll(PJOIN(folder_name, file))
            continue

        print("Merging... ", PJOIN(folder_name, file))
        tree = ET.parse(PJOIN(folder_name, file))
        root = tree.getroot()
        for comment in root.getchildren():
            file_path = comment.attrib["file_path"]
            if file_path[:2] == '..':
                file_path = file_path[3:]
            comment.set('file_path', file_path)
            comment.set('id', str(COMMENT_ID))
            COMMENT_ID += 1
            ROOT.append(comment)
示例#6
0
def MergeAll(folder_name):
    global COMMENT_ID, ROOT
    for file in os.listdir(folder_name):
        abspath = PJOIN(folder_name, file)
        if os.path.isdir(abspath):
            MergeAll(abspath)
            continue
        
        if not abspath.endswith("_comments.xml"):
            continue

        if DEBUG:
            print("Merging... ",PJOIN(folder_name, file))

        tree = ET.parse(abspath)
        root = tree.getroot()

        for comment in root.getchildren():
            comment.set('comment_id', str(COMMENT_ID))
            COMMENT_ID += 1
            ROOT.append(comment)
def get_all_training_data():
    all_files = []
    if DATA_FILES[0] == 'all':
        for file in os.listdir(DATA_DIR):
            if file[:2] == 'X_':
                all_files.append(file[2:])
    else:
        all_files = DATA_FILES

    all_x = []
    all_y = []
    for file in all_files:
        train_x = pd.read_csv(PJOIN(DATA_DIR, "X_" + file), header=None)
        all_x.append(np.array(train_x))
        train_y = pd.read_csv(PJOIN(DATA_DIR, "Y_" + file), header=None)
        all_y.append(train_y)

    all_x = np.concatenate(all_x)
    all_y = np.concatenate(all_y)
    #print(all_x.shape,all_y.shape)
    all_y = all_y.reshape(all_y.shape[0])
    return all_x, all_y
def runForFolder(folder_name):
    for file in os.listdir(PJOIN(BASE_DIR, PROJECT_NAME, folder_name)):
        if os.path.isdir(PJOIN(BASE_DIR, PROJECT_NAME, folder_name, file)):
            runForFolder(PJOIN(folder_name, file))
            continue
        split_filename = os.path.splitext(file)
        if split_filename[1] not in [".cpp", ".c"]:
            continue
        xml_file = split_filename[0] + "_clang.xml"
        if not os.path.exists(PJOIN(XML_BASE_DIR, folder_name, xml_file)):
            print("XML File Does not Exist: ",
                  PJOIN(XML_BASE_DIR, folder_name, xml_file))
            continue
        os.system("python2 GenerateCommentsXMLForAFile.py " +
                  PJOIN(BASE_DIR, PROJECT_NAME, folder_name, file) + " " +
                  VOCAB_FILE + " " + PROBLEM_DOMAIN_FILE + " " +
                  PJOIN(XML_BASE_DIR, folder_name, xml_file) + " " +
                  PJOIN(PROJECT_NAME, folder_name, file) + " " +
                  PJOIN(OUTPUT_FOLDER, folder_name, split_filename[0] +
                        "_comments.xml"))
示例#9
0
def get_all_training_data():
    all_files = []
    if DATA_FILES[0] == 'all':
        for file in os.listdir(DATA_DIR):
            if file[:2] == 'X_':
                all_files.append(file[2:])
    else:
        all_files = DATA_FILES
    
    all_x = []
    all_y = []
    for file in all_files:
        train_x = pd.read_csv(PJOIN(DATA_DIR,"X_"+file),header=None)
        all_x.append(train_x)
    
    all_x = pd.concat(all_x)  
    return all_x
def runForFolder(folder_name, my_due=list(), first=False):
    for file in os.listdir(folder_name):
        abspath = PJOIN(folder_name, file)
        if os.path.isdir(abspath):
            runForFolder(abspath, my_due)
            continue
        (filename, ext) = os.path.splitext(abspath)
        if ext not in ['.c', '.C', '.cc', '.cpp', '.cxx', '.c++']:
            continue
        outprefix = getOutputLoc(abspath)
        if not os.path.exists(outprefix + '_clang.xml'):
            print('Skipping: No compile instructions: ' + abspath)
            continue
        if REUSE and os.path.exists(outprefix + '_comments.xml'):
            print('Skipping: Already Exists: ', outprefix + '_comments.xml')
            continue
        print('Generating comments:', abspath)
        my_due.append((abspath, outprefix))

    if first:
        with ThreadPoolExecutor(max_workers=MAX_WORKERS) as pool:
            pool.map(makeComGenCall, my_due)
示例#11
0
    def __init__(self, cfg):
        super(Docker, self).__init__()
        # 模型预设
        print('Compiling the model ...')
        network_file = 'model.{}'.format(cfg.system['net'][0])
        dataset_file = 'dataset.{}'.format(cfg.dataset['file_name'])
        network_module = importlib.import_module(network_file)
        dataset_module = importlib.import_module(dataset_file)

        self.dev = torch.device('cuda', cfg.system['gpu'][0]) if len(cfg.system['gpu'])>=1 and torch.cuda.is_available() \
            else torch.device('cpu')
        self.multi_dev = len(cfg.system['gpu']) > 1
        self.epoch = 'test'

        ## 模型加载
        self.net = getattr(network_module,
                           cfg.system['net'][1])(**cfg.system['net_param']).to(
                               self.dev)
        self.criterion = network_module.loss(**cfg.system['loss_param'])
        ## 优化器加载
        if cfg.mode == 'train':
            self.best = None
            self.epoch_start = 1
            self.eval_on_train = cfg.optimizer['eval_on_train']
            self.epoch_end = cfg.optimizer['max_epoch'] + 1
            self.save_epoch = cfg.optimizer['save_epoch']
            self.max_batch = cfg.optimizer['max_batch']
            if cfg.optimizer['type'] == 'adam':
                self.opt = torch.optim.Adam(self.net.parameters(),
                                            lr=cfg.optimizer['learning_rate'],
                                            **cfg.optimizer['adam'])
            elif cfg.optimizer['type'] == 'sgd':
                self.opt = torch.optim.SGD(self.net.parameters(),
                                           lr=cfg.optimizer['learning_rate'],
                                           **cfg.optimizer['sgd'])
            self.sch = torch.optim.lr_scheduler.MultiStepLR(
                self.opt,
                cfg.optimizer['milestones'],
                gamma=cfg.optimizer['decay_rate'],
                last_epoch=-1)
        self.load_param(cfg)
        ## GPU 分配
        if self.multi_dev and torch.cuda.device_count() > 1:
            self.net = nn.DataParallel(self.net, cfg.system['gpu'])

        # 数据集载入
        print('Loading the dataset ...')
        if cfg.mode == 'train':
            self.trainloader = dataset_module.dataloader(
                cfg.dataset[cfg.mode], cfg.mode)
            if self.max_batch is None: self.max_batch = len(self.trainloader)
            self.testloader = dataset_module.dataloader(
                cfg.dataset['test'],
                'test') if cfg.optimizer['test_on_train'] else None
        else:
            self.testloader = dataset_module.dataloader(
                cfg.dataset[cfg.mode], cfg.mode)

        # 评估方式
        self.result_dir = cfg.system['result_dir']
        self.evaluate = network_module.evaluate(**cfg.system['evaluate_param'])
        self.evaluate.result_dir = PJOIN(self.result_dir, 'save')
        # Tensorboard
        self.writer = SummaryWriter(PJOIN(
            self.result_dir, 'tensorboard')) if cfg.mode == 'train' else None
def runForFolder(folder_name):
    for file in os.listdir(PJOIN(BASE_DIR, PROJECT_NAME, folder_name)):
        if os.path.isdir(PJOIN(BASE_DIR, PROJECT_NAME, folder_name, file)):
            if not os.path.exists(PJOIN(OUTPUT_FOLDER, folder_name, file)):
                os.mkdir(PJOIN(OUTPUT_FOLDER, folder_name, file))
            runForFolder(PJOIN(folder_name, file))
            continue
        split_filename = os.path.splitext(file)
        if split_filename[1] not in [".cpp",".c"]:
            continue
        xml_file = split_filename[0] + "_clang.xml"
        if not os.path.exists(PJOIN(XML_BASE_DIR, folder_name, xml_file)):
            try:
                s = "python parsers/clang_parser.py " + PJOIN(BASE_DIR, PROJECT_NAME, folder_name, file)
                for d in dep[PJOIN(BASE_DIR, PROJECT_NAME, folder_name, file)]:
                    s += " " + d
                os.system(s)
                os.system("mv "+ PJOIN(BASE_DIR, folder_name, xml_file) + " " + PJOIN(XML_BASE_DIR, folder_name, xml_file)) 
                pass
            except:
                continue
        if os.path.exists(PJOIN(OUTPUT_FOLDER, folder_name, split_filename[0]+"_comments.xml")):
            print("Skipping: Already Exists: ", PJOIN(OUTPUT_FOLDER, folder_name, split_filename[0]+"_comments.xml"))
            continue
	os.chdir("comments/")
        os.system("python2 GenerateCommentsXMLForAFile.py " + PJOIN(BASE_DIR, PROJECT_NAME, folder_name, file) + " " + VOCAB_FILE + " " +
        PROBLEM_DOMAIN_FILE + " " + PJOIN(XML_BASE_DIR, folder_name, xml_file) + " " + PJOIN(PROJECT_NAME, folder_name, file) + " " +
        PJOIN(OUTPUT_FOLDER, folder_name, split_filename[0]+"_comments.xml"))
	os.chdir("../")
示例#13
0
X = []
Y = []
for file in os.listdir(FEATURES_DIR):
    if not file.endswith("train.csv"):
        continue
    if not PROJECT_NAME in file:
        continue
    fName = file[file.find(PROJECT_NAME) + 1 + len(PROJECT_NAME):-10]
    fName = fName.replace("_", "/")
    if fName not in found:
        print("LEFT: ", fName)
        continue
    #print(fName)
    anno_data = annotations_map[CODENAME_TO_COMMENTSFILENAME[fName]]
    features_file = pd.read_csv(PJOIN(FEATURES_DIR, file),
                                header=None,
                                encoding="ISO-8859–1")
    features_np = np.array(features_file)
    features_map = {}
    for feat in features_np:
        if len(feat[2:]) > 12:
            print("ERROR: Length of features greater than 12")
            print(feat)
        features_map[feat[1]] = feat[2:14]
    for comments_data in anno_data:
        if comments_data[0] not in features_map:
            print("Comment NOT FOUND:", comments_data[0])
            continue
        features = features_map[comments_data[0]]
        labels_intuitive = comments_data[-2]
def getOutputLoc(path):
    relpath = path[path.index(PROJECT_NAME):]
    relpath = '/'.join(relpath.split('/')[1:])
    outpath = os.path.join(OUTPUTS_DIR, relpath)
    return PJOIN(outpath, os.path.splitext(outpath.split('/')[-1])[0])
# -*- coding: utf-8 -*-
import pandas as pd
import numpy as np
from os.path import join as PJOIN
import sys

if len(sys.argv) != 3:
    print("Give 2 Arguments - 1) Annonations file name, 2) Project Name")
    exit(-1)

FILE_NAME = sys.argv[1]
PROJECT_NAME = sys.argv[2]
FILE_PATH = PJOIN("DATA", "ANNOTATED", FILE_NAME)
OUTPUT_FILE_PATH = PJOIN("DATA", "GENERATED", FILE_NAME)
MAP = {'U': 'U', 'PU': 'P', 'NU': 'N'}
THRESHOLD = 10


# c is a vector of size 31 [comment text, C1, C2, ......., C30]
def get_label(c):
    #IF C18 OR C19 OR C20  OR  C21  OR C22 OR C28 OR C29 THEN  U
    if c[18] or c[19] or c[20] or c[21] or c[22] or c[28] or c[29]:
        return 'U'
    #IF C9 AND C3 THEN U
    if c[9] and c[3]:
        return 'U'
    #IF C11 AND C3 THEN U
    if c[11] and c[3]:
        return 'U'
    #IF (C25 OR C23 OR C26 OR C27 ) AND C3 THEN U
    if (c[25] or c[23] or c[26] or c[27]) and c[3]: