def main():

    # change loadFromFile to True if you have dataset file
    dg = DatasetGenerator(token=GITHUB_TOKEN, repository=r, loadFromFile=True)

    gg = GraphGenerator(dg)
    gg.weibull()
def async_processing(post_data):
    job = get_current_job()
    
    url = post_data['github_url']
    url = clean_url(url)
    
    filters_rules = {
        'labels': {
            'must_have': post_data['must_have'],
            'blocklist_labels': post_data['blocklist_labels']
        }
    }
    
    dg = DatasetGenerator(
        token=post_data['github_token'], 
        repository=Repository(url, filters_rules), 
        loadFromFile=False
    )

    # SAVE ERROR ON DATABASE and reset progess status
    if(len(dg.filtered_issues) < 200):
        save_error(dg.repository, 
            "This repository has less than 200 issues after applying the bug filters")
        job.meta['progress'] = 'ERROR'
        job.meta['error'] = "This repository has less than 200 issues after applying the bug filters"
        job.save_meta()
        return

    gg = GraphGenerator(dg)
    gg.weibull()

    save_image(dg.repository)
    job.meta['progress'] = 100
    job.save_meta()
Exemplo n.º 3
0
def generate_dataset(dataset_type, dataset_path, new_dataset_path, image_size):

    if dataset_type == "class":
        data_proc = DatasetGenerator(new_dataset_path)
        train_dataset, test_dataset = class_dataset_wrapper(dataset_path)
        create_randomly(train_dataset,os.path.join(dataset_path, "Final_Training", "Images"), data_proc, mode="train", size=image_size)
        #create_randomly(test_dataset,os.path.join(dataset_path, "Final_Test", "Images"), data_proc, mode="test", size=image_size)

    elif dataset_type == "german":
        train_dataset, valid_dataset, test_dataset = german_dataset_wrapper(dataset_path)
        data_proc = DatasetGenerator(new_dataset_path)
        # creating training data
        #create_positives(train_dataset[0],dataset_path, data_proc, mode="train", size=image_size, full=1, crop=1, w_background=0)
        create_negatives(train_dataset[1], dataset_path, data_proc, mode="train", size=image_size, goal_amount=60000)
        # creating test_data
        #create_positives(test_dataset[0],dataset_path, data_proc, mode="test", size=image_size, full=1, crop=1, w_background=0)
        create_negatives(valid_dataset[1], dataset_path, data_proc, mode="test", size=image_size, goal_amount=10000)
Exemplo n.º 4
0
 def setup_dataset(self):
     self.dataset_generator = DatasetGenerator(self.environment_size)
     self.sequence_length = self.sequence_length
     self.offset_timing = self.offset_timing
     self.validation_timing = self.n_epoch / 40
     self.validation_dataset_length = 20
     self.validation_dataset = [
         self.generate_data() for i in range(self.validation_dataset_length)
     ]
     self.test_data = self.generate_data()
def generate_seq_sklearn(iterations):
    label = []
    input_data = []
    for i in range(iterations):
        test_data = DatasetGenerator(maze_size).generate_seq_random(100)
        test_square_sum_error, test_hh, test_error = evaluate(test_data,
                                                              test=True)
        label.extend(test_data['coordinates'])
        input_data.extend(test_hh)

    return input_data, label
Exemplo n.º 6
0
    def train(self):
        model_path = self.__get_model_path()
        if os.path.exists(model_path):
            shutil.rmtree(model_path)
        os.makedirs(model_path)

        dataset = DatasetGenerator.generate(self.__max_text_length,
                                            self.__max_named_entity_size,
                                            self.__utterances)
        dataset = dataset.shuffle(1000).repeat(None).batch(
            self.__hyper_params['batch_size'])

        return self.__slot_tagger.train(dataset, self.__hyper_params['steps'])
Exemplo n.º 7
0
def generate_seq_sklearn(iterations, test):
    label = []
    input_data = []
    for i in range(iterations):
        test_data = DatasetGenerator(maze_size).generate_seq(100)
        test_mean_squared_error, test_hh = evaluate(test_data, True)
        if test == True:
            label.append(test_data['coordinates'])
            input_data.append(test_hh)
        else:
            label.extend(test_data['coordinates'])
            input_data.extend(test_hh)

    return input_data, label
Exemplo n.º 8
0
def train(run_name, dataset_path, aligns_path):
    lipnet = LipNet().compile_model()
    datagen = DatasetGenerator(dataset_path, aligns_path)
    callbacks = create_callbacks(run_name)

    start_time = time.time()

    lipnet.model.fit_generator(generator=datagen.train_generator,
                               validation_data=datagen.val_generator,
                               epochs=1,
                               verbose=1,
                               shuffle=True,
                               max_queue_size=5,
                               workers=2,
                               callbacks=callbacks,
                               use_multiprocessing=True)
    elapsed_time = time.time() - start_time
    print('\nTraining completed in: {}'.format(
        datetime.timedelta(seconds=elapsed_time)))
Exemplo n.º 9
0
    def predict(self, text: str):
        model_path = self.__get_model_path()
        if not os.path.exists(model_path):
            raise EnvironmentError('Should be trained.')

        utterance = Utterance.parse(text, self.__vocabs, self.__named_entity)
        dataset = DatasetGenerator.generate(self.__max_text_length,
                                            self.__max_named_entity_size,
                                            {utterance})
        dataset = dataset.batch(1)

        predictions = self.__slot_tagger.predict(dataset)
        prediction = predictions[0][:len(utterance)]
        labels = list(
            map(lambda num: self.__vocabs['label'].restore(num), prediction))

        slots = []
        for token, label in zip(utterance.tokens, labels):
            if label.startswith('b-'):
                slots.append({
                    'text': [token],
                    'slot': label.replace('b-', '', 1)
                })
            elif label.startswith('i-'):
                slot = label.replace('i-', '', 1)
                if len(slots) > 0 and slots[-1]['slot'] == slot:
                    slots[-1]['text'].append(token)

        for slot in slots:
            if len(slot['text']) == 1:
                slot['text'] = slot['text'][0]['text']
            else:
                start = slot['text'][0]['span'].lower
                end = slot['text'][-1]['span'].upper
                slot['text'] = utterance.plain_text[start:end]

        return slots
Exemplo n.º 10
0
    def __init__(self,
    instence_id=0,
    config_dir='./cfg',
    ):  
        self.root=root
        print('root in :\n',os.path.join(self.root,'..'))
        sys.path.append(os.path.join(sys.path[0],'../'))
        print('workspace in:\n')
        for i in sys.path:
            print(i)
        
        DatasetGenerator.__init__(self)
        # super(NetworkGenerator,self).__init__()
        super(Instence,self).__init__()
        print('\n\n-----Instence Class Init-----\n\n')

        #####################################################
        #Dataloader
        
        self.TrainSet=DatasetGenerator()
        self.TrainSet.DefaultDataset(Mode='train')
        self.Trainloader=DataLoader(
            self.TrainSet,
            self.BatchSize,
            shuffle=True,
            num_workers=self.worker_num,
            collate_fn=self.TrainSet.detection_collate_fn
        )
        self.ValSet=DatasetGenerator()
        self.ValSet.DefaultDataset(Mode='val')
        self.Valloader=DataLoader(
            self.ValSet,
            self.BatchSize,
            shuffle=True,
            num_workers=self.worker_num,
            collate_fn=self.ValSet.detection_collate_fn
        )
Exemplo n.º 11
0
from matplotlib.mlab import PCA

import numpy as np
import matplotlib.pyplot as plt

#constants
center = Point(10, 10)
r = 5
R = 10
slope_asc = 4
slope_desc = -4
x = 0
y = 1
NUM_EXMPL = 1000

generator = DatasetGenerator()

ring_dataset = generator.ring_dataset(center, r, R)
circle_dataset = generator.circle_dataset(center, r)
asc_dataset = generator.linear_dataset(center, slope_asc)
desc_dataset = generator.linear_dataset(center, slope_desc)

fig = plt.figure(1)
fig.suptitle('Ordinairy PCA')
sub1 = fig.add_subplot(221)
sub1.set_title('Circlular dataset no PCA')
sub1.plot(circle_dataset[:, x], circle_dataset[:, y], '+r')
sub1.plot(ring_dataset[:, x], ring_dataset[:, y], '+b')

sub2 = fig.add_subplot(222)
sub2.set_title('Linear dataset no PCA')
Exemplo n.º 12
0
)

param = np.load('dae.param.npy.1')
model.copy_parameters_from(param)

def encode(x):
    for l in range(0, 4):
        x = F.sigmoid(enc_layer[l](x))
    return x

def decode(h):
    for l in range(0, 4):
        h = F.sigmoid(dec_layer[l](h))
    return h

dg = DatasetGenerator((9, 9))
data = np.asarray(dg.generate_dataset_sae(10), dtype='f')
N = len(data)

for n in range(0, N):
    x = chainer.Variable(np.asarray([data[n]], dtype='f'))
    h = encode(x)
    y = decode(h)
    err = F.mean_squared_error(y, x)
    print(err.data)
    plt.subplot(4, 1, 1)
    plt.imshow(np.flipud(x.data.reshape((90, 12)).T), cmap=plt.cm.gray, interpolation='none', vmin=0, vmax=1)
    plt.subplot(4, 1, 2)
    plt.imshow(np.flipud(y.data.reshape((90, 12)).T), cmap=plt.cm.gray, interpolation='none', vmin=0, vmax=1)
    plt.subplot(4, 1, 3)
    plt.imshow(np.absolute(np.flipud((x.data - y.data).reshape((90, 12)).T)), cmap=plt.cm.gray, interpolation='none', vmin=0, vmax=1)
    os.environ["CUDA_VISIBLE_DEVICES"] = ','.join(args.gpu.split(','))

    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    logging.info(f'Using device {device}')
    net.to(device=device)
    net.load_state_dict(torch.load(args.model, map_location=device))

    logging.info("Model loaded !")
    print("Model loaded !")
    alphanum_key = lambda key: [(int(re.split('_', key)[0][-1]), int(re.split('_', key)[1].split('.')[0]))]
    img_files = sorted(os.listdir(org_img_path), key=alphanum_key)
    true_masks = sorted(os.listdir(gt_mask_path), key=alphanum_key)
    i = 0
    pwcNetwork = PWCNet().cuda().eval()

    datasetGenerator = DatasetGenerator(src_dir=org_img_path)
    if not args.no_viz:
        plt.ion()
        fig, ax = plt.subplots(2, 2, figsize=(8, 4))
        plt.show()

    tot = 0
    total_time = 0
    while i < len(img_files):
        start_time = time.time()
        true_mask = Image.open(os.path.join(gt_mask_path, true_masks[i])).convert('L')
        print("\nPredicting image {} ...".format(img_files[i]))
        if 'png' in img_files[i] or 'jpg' in img_files[i] or 'bmp' in img_files[i]:
            org_img = Image.open(os.path.join(org_img_path, img_files[i]))
            if i == 0:
                # for the first frame, since there is no previous frame, we estimate the optical flow using it self
Exemplo n.º 14
0
list_n_units = [20, 30, 40, 50, 60]  # list_n_units = [60]

# GPU
parser = argparse.ArgumentParser()
parser.add_argument('--gpu',
                    '-g',
                    default=-1,
                    type=int,
                    help='GPU ID (negative value indicates CPU)')
args = parser.parse_args()
mod = cuda.cupy if args.gpu >= 0 else np

# validation dataset
valid_data_stack = []
for i in range(valid_iter):
    valid_data = DatasetGenerator(maze_size).generate_seq(100)
    valid_data_stack.append(valid_data)

# test dataset
test_data = DatasetGenerator(maze_size).generate_seq(100)


# one-step forward propagation
def forward_one_step(x, t, state, train=True):
    # if args.gpu >= 0:
    #     data = cuda.to_gpu(data)
    #     targets = cuda.to_gpu(targets)
    x = chainer.Variable(x, volatile=not train)
    t = chainer.Variable(t, volatile=not train)
    h_in = model.x_to_h(x) + model.h_to_h(state['h'])
    c, h = F.lstm(state['c'], h_in)
Exemplo n.º 15
0
class Instence(NetworkGenerator,DatasetGenerator,Dataset):
    def __init__(self,
    instence_id=0,
    config_dir='./cfg',
    ):  
        self.root=root
        print('root in :\n',os.path.join(self.root,'..'))
        sys.path.append(os.path.join(sys.path[0],'../'))
        print('workspace in:\n')
        for i in sys.path:
            print(i)
        
        DatasetGenerator.__init__(self)
        # super(NetworkGenerator,self).__init__()
        super(Instence,self).__init__()
        print('\n\n-----Instence Class Init-----\n\n')

        #####################################################
        #Dataloader
        
        self.TrainSet=DatasetGenerator()
        self.TrainSet.DefaultDataset(Mode='train')
        self.Trainloader=DataLoader(
            self.TrainSet,
            self.BatchSize,
            shuffle=True,
            num_workers=self.worker_num,
            collate_fn=self.TrainSet.detection_collate_fn
        )
        self.ValSet=DatasetGenerator()
        self.ValSet.DefaultDataset(Mode='val')
        self.Valloader=DataLoader(
            self.ValSet,
            self.BatchSize,
            shuffle=True,
            num_workers=self.worker_num,
            collate_fn=self.ValSet.detection_collate_fn
        )
        #######################################################

    def ToDecive(self,images,targets):
        images = list(img.to(self.device) for img in images)
        targets = [{k: v.to(self.device) for k, v in t.items()} for t in targets]
        return images,targets


    def targetmap(self):
        """
        
        """
        pass
        
    def InstenceInfo(self):
        print('\n\n-----Start with Instence ID',self.InstanceID,'-----\n\n')
        self.Enviroment_Info()
        self.DatasetInfo()
        self.NetWorkInfo()
    def train(self):
        print('\n\n----- Start Training -----\n\n')
        #####
        #Epochs
        for epoch in range(self.epochs):
            print('---Epoch : ',epoch)
            for index,(images,targets) in enumerate(self.Trainloader):
                images,targets=self.ToDecive(images,targets)
                self.optimizer.zero_grad()
                loss_dict=train.model(images,targets)
                losses = sum(loss for loss in loss_dict.values())
                loss=losses.cpu().detach().numpy()
                print('-----Step',index,'--LOSS--',loss)
                losses.backward()
                train.Optimzer.step()
        

    def val(self,valloader):
        print('\n\n----- Val Processing -----\n\n')

    
    def inference(self):
        print('\n\n----- Inference Processing -----\n\n')

    def Evaluation(self):
        print('\n\n----- Evaluation Processing -----\n\n')
    
    
    @torch.no_grad()
    def evaluate(self, data_loader):
        model=self.model
        device=self.device
        n_threads = torch.get_num_threads()
        # FIXME remove this and make paste_masks_in_image run on the GPU
        torch.set_num_threads(1)
        cpu_device = torch.device("cpu")
        model.eval()
        metric_logger = utils.MetricLogger(delimiter="  ")
        header = 'Test:'

        coco = get_coco_api_from_dataset(data_loader.dataset)
        iou_types = _get_iou_types(model)
        coco_evaluator = CocoEvaluator(coco, iou_types)

        for image, targets in metric_logger.log_every(data_loader, 100, header):
            images = list(img.to(device) for img in images)
            targets = [{k: v.to(device) for k, v in t.items()} for t in targets]


            torch.cuda.synchronize()
            model_time = time.time()
            outputs = model(image)

            outputs = [{k: v.to(cpu_device) for k, v in t.items()} for t in outputs]
            model_time = time.time() - model_time

            res = {target["image_id"].item(): output for target, output in zip(targets, outputs)}
            evaluator_time = time.time()
            coco_evaluator.update(res)
            evaluator_time = time.time() - evaluator_time
            metric_logger.update(model_time=model_time, evaluator_time=evaluator_time)
list_n_units = [20, 30, 40, 50, 60]  # list_n_units = [60]

# GPU
parser = argparse.ArgumentParser()
parser.add_argument('--gpu',
                    '-g',
                    default=-1,
                    type=int,
                    help='GPU ID (negative value indicates CPU)')
args = parser.parse_args()
mod = cuda.cupy if args.gpu >= 0 else np

# validation dataset
valid_data_stack = []
for i in range(valid_iter):
    valid_data = DatasetGenerator(maze_size).generate_seq(100)
    valid_data_stack.append(valid_data)

# test dataset
test_data = DatasetGenerator(maze_size).generate_seq(100)


# one-step forward propagation
def forward_one_step(x, t, state, train=True):
    # if args.gpu >= 0:
    #     data = cuda.to_gpu(data)
    #     targets = cuda.to_gpu(targets)
    x = chainer.Variable(x, volatile=not train)
    t = chainer.Variable(t, volatile=not train)
    h_in = model.x_to_h(x) + model.h_to_h(state['h'])
    c, h = F.lstm(state['c'], h_in)
Exemplo n.º 17
0
train_data_length = [100]
offset_timing = 1

valid_iter = 20

# GPU
parser = argparse.ArgumentParser()
parser.add_argument('--gpu', '-g', default=-1, type=int,
                    help='GPU ID (negative value indicates CPU)')                    
args = parser.parse_args()
mod = cuda.cupy if args.gpu >= 0 else np

# validation dataset
valid_data_stack = []
for i in range(valid_iter):
    valid_data = DatasetGenerator(maze_size).generate_seq(100, offset_timing)
    valid_data_stack.append(valid_data)

# test dataset
test_data = DatasetGenerator(maze_size).generate_seq(100, offset_timing)

# model
model = chainer.FunctionSet(
    x_to_h = F.Linear(64, n_units * 4),
    h_to_h = F.Linear(n_units, n_units * 4),
    h_to_y = F.Linear(n_units, maze_size[0] * maze_size[1]))
if args.gpu >= 0:
    cuda.check_cuda_available()
    cuda.get_device(args.gpu).use()
    model.to_gpu()
    
Exemplo n.º 18
0
    def __init__(
        self,
        instence_id=0,
        configfile='./cfg',
    ):

        # ---------------------------------------------------------------------------- #
        #                                workspace info                                #
        # ---------------------------------------------------------------------------- #

        self.root = root
        self.configfile = configfile
        print('root in :\n', os.path.join(self.root, '..'))
        sys.path.append(os.path.join(sys.path[0], '../'))
        print('workspace in:\n')
        for i in sys.path:
            print(i)

        DatasetGenerator.__init__(self, configfile=configfile)

        # super(Instence,self).__init__()
        print('\n\n-----Instence Class Init-----\n\n')

        # ---------------------------------------------------------------------------- #
        #                                  dataloader                                  #
        # ---------------------------------------------------------------------------- #

        # ------------------------------ dataset object ------------------------------ #

        transforms = []
        transforms.append(ConvertCocoPolysToMask())
        transforms.append(T.ToTensor())
        transforms.append(T.RandomHorizontalFlip(0.5))
        self.transform_compose = T.Compose(transforms)

        # ---------------------------------------------------------------------------- #
        #                                   temp part                                  #
        # ---------------------------------------------------------------------------- #

        if self.DefaultDataset:
            self.datasets = DatasetGenerator(transforms=self.transform_compose,
                                             configfile=configfile)
            self.datasets.DefaultDatasetFunction()

            self.trainset = _coco_remove_images_without_annotations(
                self.datasets.trainset)
            self.valset = self.datasets.valset
            print('-----train&val set already done')

        # ----------------------------- DataLoader object ---------------------------- #

        if self.DistributedDataParallel:
            self.train_sampler = torch.utils.data.distributed.DistributedSampler(
                self.trainset)
            self.test_sampler = torch.utils.data.distributed.DistributedSampler(
                self.valset)
            print("-----DistributedDataParallel Sampler build done")
            self.model = torch.nn.parallel.DistributedDataParallel(
                self.model, device_ids=self.gpu_id)
            self.model_without_ddp = self.model.module

        if not self.DistributedDataParallel:

            self.train_sampler = torch.utils.data.RandomSampler(self.trainset)
            self.test_sampler = torch.utils.data.SequentialSampler(self.valset)
            print("-----DataSampler build done")

        # ---------------------------------- Sampler --------------------------------- #

        if self.aspect_ratio_factor >= 0:
            self.group_ids = create_aspect_ratio_groups(
                self.trainset, k=self.aspect_ratio_factor)
            self.train_batch_sampler = GroupedBatchSampler(
                self.train_sampler, self.group_ids, self.BatchSize)
        else:
            self.train_batch_sampler = torch.utils.data.BatchSampler(
                self.train_sampler, self.BatchSize, drop_last=True)

        # ---------------------------------- loader ---------------------------------- #

        self.trainloader = torch.utils.data.DataLoader(
            self.trainset,
            batch_sampler=self.train_batch_sampler,
            num_workers=self.worker_num,
            collate_fn=self.collate_fn)

        self.valloader = torch.utils.data.DataLoader(
            self.valset,
            batch_size=self.BatchSize,
            sampler=self.test_sampler,
            num_workers=self.worker_num,
            collate_fn=self.collate_fn)
Exemplo n.º 19
0
mod = cuda.cupy if args.gpu >= 0 else np

# monkey patching type check
def sigmoid_cross_entropy_check_type_forward(self, in_types):
    type_check.expect(in_types.size() == 2)

    x_type, t_type = in_types
    type_check.expect(
        x_type.dtype == mod.float32,
        t_type.dtype == mod.float32,
        x_type.shape == t_type.shape
    )
F.SigmoidCrossEntropy.check_type_forward = sigmoid_cross_entropy_check_type_forward

# generate dataset
dg = DatasetGenerator(maze_size)

# validation dataset
valid_data = dg.generate_seq(100)

# test dataset
test_data = dg.generate_seq(100)

# model
model = chainer.FunctionSet(
        x_to_h = F.Linear(16, n_units * 4),
        h_to_h = F.Linear(n_units, n_units * 4),
        h_to_y = F.Linear(n_units, 12))
if args.gpu >= 0:
    print('using GPU #%s' % args.gpu)
    cuda.check_cuda_available()
Exemplo n.º 20
0
# SVM and clustering parameters
ev_iterations = 100  # iterations for generating SVM and clustering dataset

# GPU
parser = argparse.ArgumentParser()
parser.add_argument('--gpu',
                    '-g',
                    default=-1,
                    type=int,
                    help='GPU ID (negative value indicates CPU)')
args = parser.parse_args()
mod = cuda.cupy if args.gpu >= 0 else np

# LSTM validation dataset: random
valid_data = DatasetGenerator(maze_size).generate_seq_random(100)

# LSTM model
model = chainer.FunctionSet(x_to_h=F.Linear(64, n_units * 4),
                            h_to_h=F.Linear(n_units, n_units * 4),
                            h_to_y=F.Linear(n_units, 60))
if args.gpu >= 0:
    cuda.check_cuda_available()
    cuda.get_device(args.gpu).use()
    model.to_gpu()

# LSTM optimizer
optimizer = optimizers.SGD(lr=1.)
optimizer.setup(model.collect_parameters())

Exemplo n.º 21
0
def create_dataset_from_mtcnn_output(image_proc,
                                     b_boxes,
                                     width,
                                     height,
                                     sign_position,
                                     size,
                                     dataset_path,
                                     mode,
                                     neg_delete):
    # This function works with Image_processor object from module image
    # image - image_processor object
    # boxes - net output, this function create dataset from it,, its dictionary
    #     contains two keys: 1.offsets - Nx4 ndarray, where N is number of bounding boxes,
    #     for each bounding box \TODO
    # width - array containing width for each one of b_boxes
    # height - array containing heights for each one of b_boxes
    # sign_position - dictionary, key is index of box from b_boxes
    # size - image size, we want to save
    # neg_per - interval <0,1>, how many percent of negative bounding boxes should not be saved,
    #               wher 0 is 0 percent and  1 is 100 percent
    if not isinstance(image_proc, Image_processor):
        raise ValueError(
            "image_proc argument has to be instance of Image_processor class")
    #check interval range of neg_delete, <0,1>,
    if neg_delete < 0 or neg_delete > 1:
        raise ValueError("Parameter neg_delete, can have values from interval <0,1>")
    
    data_proc = DatasetGenerator(dataset_path)
    # for each bounding box
    for index in range(len(b_boxes['pictures'])):
        box_width = int(width[index]) 
        box_height = int(height[index]) 

        #!!!Carefull, if width or height is negative, skip it
        if box_width <= 0 or box_height <= 0:
            continue

        img_container = np.zeros((int(height[index]), int(width[index]), 3))

        offsets = b_boxes['offsets'][index]
        picture_coor = b_boxes['pictures'][index]
        x1 = offsets[0]
        y1 = offsets[1]
        x2 = img_container.shape[1] + offsets[2]
        y2 = img_container.shape[0] + offsets[3]

        # crop from input image, unnormalize and resize to needed size
        img_container[int(y1):int(y2), int(x1):int(x2)] = image_proc.crop_picture(*picture_coor[0:4])
        image = unnormalize_image(img_container)
        image = cv2.resize(image, dsize=(size, size))

        #change order of color in image before saving, cv saves BGR 
        image = change_channel_order(image, current=Image_processor.channel_order, new="BGR")

        # if this box dont catche any sign save as negative
        if index not in sign_position:
            ran_val = random.uniform(0.0,1.0)
            #if ran value is less then our percentage threshold, skip negative 
            if ran_val < neg_delete:
                continue

            data_proc.save_img(image=image,
                               sample_type="negatives",
                               coordinates=[0, 0, 0, 0],
                               box_width=0,
                               box_height=0,
                               mode=mode)
            continue
        # else check what type of image we are saving
        sign = sign_position[index]

        norm_coor = normalize_coordinates(norm_max=size-1, 
                                        width=img_container.shape[1], 
                                        height=img_container.shape[0],
                                        coor=sign['offset'])

        new_box_width = norm_coor[2] - norm_coor[0] + 1
        new_box_height = norm_coor[3] - norm_coor[1] + 1

        # positive images
        if sign['iou'] > 0.65:
            type_name = "positives"
        #part images
        elif sign['iou'] >= 0.40:
            type_name= "parts"
        #negative images
        elif sign['iou'] < 0.30:
            type_name= "negatives"
        else:
            #if between 0.30 and 0.40, dont save
            continue

        data_proc.save_img(image=image,
                            sample_type=type_name,
                            base_class=sign['class'],
                            super_class=sign['super-class'],
                            coordinates=norm_coor,
                            box_width=new_box_height,
                            box_height=new_box_width,
                            mode=mode)
Exemplo n.º 22
0
    silhouette_scores = []
    is_forgy = init is 'forgy'
    for num_iterations in range(1, max_iter+1):
        scores = []
        for run in range(num_runs):
            if is_forgy :
                init = data[np.random.choice(data.shape[0], k, replace=False)]
            kmeans = KMeans(n_clusters=k, init=init, max_iter=num_iterations, n_init=1).fit(data_set)
            scores.append(silhouette_score(data_set, kmeans.labels_))
        silhouette_scores.append(scores)
    print('...finished.')
    return silhouette_scores

radius = 5
scaling_factor = 10
generator = DatasetGenerator()
k= 9
num_runs = 30
max_iterations = 20
y_min, y_max = 0.4, 0.7

centers = [Point((i - i % 3), (i % 3)) for i in range(3, 12)]# 3x3 grid
centers = scale_center_points(centers, scaling_factor=scaling_factor)

data_set = []

for center in centers:
    data_set.append(generator.circle_dataset(center, radius, num=200))

data_set = np.concatenate(data_set)
Exemplo n.º 23
0
    command, "supervised", "-input", train_file, "-output", model_location,
    "-epoch",
    str(epoch), "-wordNgrams",
    str(wordNgrams), "-lr",
    str(lr), "-dim",
    str(dim), "-ws",
    str(ws), "-minn",
    str(minn), "-maxn",
    str(maxn), "-minCount",
    str(minCount)
]

precisions = []

datasetgen = DatasetGenerator(dataset_path=fname,
                              dest_folder=folder,
                              kfolds=10)

i = 0
for train_file, test_file in datasetgen:
    i += 1

    if os.path.isfile(test_file) and os.path.isfile(train_file):
        print "Shuffling training set"
        subprocess.call(["shuf", train_file, "-o", train_file])
        print "Training set", train_file
        subprocess.call(train_cmd_generator(train_file))
        print "Testing set", test_file
        cmd = subprocess.Popen(
            [command, "test", model_location + ".bin", test_file, "1"],
            stdout=subprocess.PIPE)
Exemplo n.º 24
0
maze_size = (9, 9)

train_data_length = [20, 100]

# GPU
parser = argparse.ArgumentParser()
parser.add_argument('--gpu',
                    '-g',
                    default=-1,
                    type=int,
                    help='GPU ID (negative value indicates CPU)')
args = parser.parse_args()
mod = cuda.cupy if args.gpu >= 0 else np

# generate dataset
dg = DatasetGenerator(maze_size)


# test dataset
def generate_test_dataset():
    return dg.generate_seq(100)


# model
test_data = generate_test_dataset()
f = open('pretrained_model_' + str(n_units) + '.pkl', 'rb')
model = pickle.load(f)
f.close()
if args.gpu >= 0:
    cuda.check_cuda_available()
    cuda.get_device(args.gpu).use()
Exemplo n.º 25
0
class Instence(DatasetGenerator):
    def __init__(
        self,
        instence_id=0,
        configfile='./cfg',
    ):

        # ---------------------------------------------------------------------------- #
        #                                workspace info                                #
        # ---------------------------------------------------------------------------- #

        self.root = root
        self.configfile = configfile
        print('root in :\n', os.path.join(self.root, '..'))
        sys.path.append(os.path.join(sys.path[0], '../'))
        print('workspace in:\n')
        for i in sys.path:
            print(i)

        DatasetGenerator.__init__(self, configfile=configfile)

        # super(Instence,self).__init__()
        print('\n\n-----Instence Class Init-----\n\n')

        # ---------------------------------------------------------------------------- #
        #                                  dataloader                                  #
        # ---------------------------------------------------------------------------- #

        # ------------------------------ dataset object ------------------------------ #

        transforms = []
        transforms.append(ConvertCocoPolysToMask())
        transforms.append(T.ToTensor())
        transforms.append(T.RandomHorizontalFlip(0.5))
        self.transform_compose = T.Compose(transforms)

        # ---------------------------------------------------------------------------- #
        #                                   temp part                                  #
        # ---------------------------------------------------------------------------- #

        if self.DefaultDataset:
            self.datasets = DatasetGenerator(transforms=self.transform_compose,
                                             configfile=configfile)
            self.datasets.DefaultDatasetFunction()

            self.trainset = _coco_remove_images_without_annotations(
                self.datasets.trainset)
            self.valset = self.datasets.valset
            print('-----train&val set already done')

        # ----------------------------- DataLoader object ---------------------------- #

        if self.DistributedDataParallel:
            self.train_sampler = torch.utils.data.distributed.DistributedSampler(
                self.trainset)
            self.test_sampler = torch.utils.data.distributed.DistributedSampler(
                self.valset)
            print("-----DistributedDataParallel Sampler build done")
            self.model = torch.nn.parallel.DistributedDataParallel(
                self.model, device_ids=self.gpu_id)
            self.model_without_ddp = self.model.module

        if not self.DistributedDataParallel:

            self.train_sampler = torch.utils.data.RandomSampler(self.trainset)
            self.test_sampler = torch.utils.data.SequentialSampler(self.valset)
            print("-----DataSampler build done")

        # ---------------------------------- Sampler --------------------------------- #

        if self.aspect_ratio_factor >= 0:
            self.group_ids = create_aspect_ratio_groups(
                self.trainset, k=self.aspect_ratio_factor)
            self.train_batch_sampler = GroupedBatchSampler(
                self.train_sampler, self.group_ids, self.BatchSize)
        else:
            self.train_batch_sampler = torch.utils.data.BatchSampler(
                self.train_sampler, self.BatchSize, drop_last=True)

        # ---------------------------------- loader ---------------------------------- #

        self.trainloader = torch.utils.data.DataLoader(
            self.trainset,
            batch_sampler=self.train_batch_sampler,
            num_workers=self.worker_num,
            collate_fn=self.collate_fn)

        self.valloader = torch.utils.data.DataLoader(
            self.valset,
            batch_size=self.BatchSize,
            sampler=self.test_sampler,
            num_workers=self.worker_num,
            collate_fn=self.collate_fn)

        # ---------------------------------------------------------------------------- #
        #                               Instance Function                              #
        # ---------------------------------------------------------------------------- #

    def InstenceInfo(self):
        print('\n\n-----Start with Instence ID', self.InstanceID, '-----\n\n')
        self.Enviroment_Info()
        self.DatasetInfo()
        self.NetWorkInfo()

    def init_train(self):
        """
            PROCESS OF TRAIN:
            1.INIT:
            if resume:
                load pretrain model
            init optimizer
            init lrscheduler
            init tensorboard
        """
        if self.resume:
            assert os.path.exists(self.checkpoint), "Invalid resume model path"
            self.checkpoint = torch.load(self.checkpoint)
            self.model_without_ddp.load_state_dict(self.checkpoint['model'])
            self.optimizer.load_state_dict(self.checkpoint['optimizer'])
            self.lr_scheduler.load_state_dict(self.checkpoint['lr_scheduler'])
        # ---------------------------------------------------------------------------- #
        #                                  tensorboard                                 #
        # ---------------------------------------------------------------------------- #
        if self.visualization:
            self.writer = SummaryWriter(log_dir=self.logdir,
                                        comment='experiment' +
                                        str(self.InstanceID))
            self.start = False

    def default_train(self):
        print('\n\n----- Start Training -----\n\n')
        start_time = time.time()
        self.init_train()
        baseloss = 0
        for epoch in range(0, self.epochs):
            # ---------------------------------------------------------------------------- #
            #                                 epoch process                                #
            # ---------------------------------------------------------------------------- #
            sumloss = self.train_one_epoch(epoch)
            self.lr_scheduler.step()
            self.evaluate()
            if epoch == 0:
                baseloss = sumloss
            if sumloss < baseloss:
                print("\n\n\n-----Model Update & Save")
                state = {
                    "model": self.model.state_dict(),
                    "optimizer": self.optimizer.state_dict(),
                    'epoch': epoch
                }
                torch.save(
                    state, os.path.join(self.checkpoint,
                                        str(sumloss) + '.pth'))
            # ---------------------------------------------------------------------------- #
            #                                 epoch process                                #
            # ---------------------------------------------------------------------------- #

    def default_val(self):
        print('\n\n----- Val Processing -----\n\n')

    def inference(self):
        print('\n\n----- Inference Processing -----\n\n')

    def Evaluation(self):
        print('\n\n----- Evaluation Processing -----\n\n')

    def train_one_epoch(self, epoch, print_freq=10):

        self.model.cuda()
        self.model.train()
        metric_logger = utils.MetricLogger(delimiter="  ")
        metric_logger.add_meter(
            'lr', utils.SmoothedValue(window_size=1, fmt='{value:.6f}'))
        header = 'Epoch: [{}]'.format(epoch)

        # lr_scheduler = None
        if epoch == 0:
            warmup_factor = 1. / 1000
            warmup_iters = min(1000, len(self.trainloader) - 1)

            # lr_scheduler = utils.warmup_lr_scheduler(self.optimizer, warmup_iters, warmup_factor)

        for images, targets in metric_logger.log_every(self.trainloader,
                                                       print_freq, header):

            print(images)

            print(targets)

            images, targets = self.todevice(images, targets)
            loss_dict = self.model(images, targets)
            """
            {
                'loss_classifier': tensor(0.0925, device='cuda:0', grad_fn=<NllLossBackward>), 
                'loss_box_reg': tensor(0.0355, device='cuda:0', grad_fn=<DivBackward0>), 
                'loss_objectness': tensor(0.0270, device='cuda:0', grad_fn=<BinaryCrossEntropyWithLogitsBackward>), 
                'loss_rpn_box_reg': tensor(0.0112, device='cuda:0', grad_fn=<DivBackward0>)
            }
            """

            losses = sum(loss for loss in loss_dict.values())

            loss_dict_reduced = utils.reduce_dict(loss_dict)
            losses_reduced = sum(loss for loss in loss_dict_reduced.values())

            loss_value = losses_reduced.item()

            if not math.isfinite(loss_value):
                print("Loss is {}, stopping training".format(loss_value))
                print(loss_dict_reduced)
                sys.exit(1)

            self.optimizer.zero_grad()
            losses.backward()
            self.optimizer.step()

            # if lr_scheduler is not None:
            self.lr_scheduler.step()

            metric_logger.update(loss=losses_reduced, **loss_dict_reduced)
            metric_logger.update(lr=self.optimizer.param_groups[0]["lr"])
            return losses

    def todevice(self, images, targets):
        """
        transform the local data to device 
        """
        images = list(image.to(self.device) for image in images)
        targets = [{k: v.to(self.device)
                    for k, v in t.items()} for t in targets]
        return images, targets

    # ---------------------------------------------------------------------------- #
    #                                Writer function                               #
    # ---------------------------------------------------------------------------- #

    def _get_iou_types(self):
        model_without_ddp = self.model
        if isinstance(self.model, torch.nn.parallel.DistributedDataParallel):
            model_without_ddp = self.model.module
        iou_types = ["bbox"]

        # ------------------------------- for detection ------------------------------ #

        if isinstance(model_without_ddp,
                      torchvision.models.detection.MaskRCNN):
            iou_types.append("segm")

        # ----------------------------- for segmentation ----------------------------- #

        if isinstance(model_without_ddp,
                      torchvision.models.detection.KeypointRCNN):
            iou_types.append("keypoints")

        # ------------------------------- for keypoint ------------------------------- #

        return iou_types

    @torch.no_grad()
    def evaluate(self, rate=0.1):
        n_threads = torch.get_num_threads()
        # FIXME remove this and make paste_masks_in_image run on the GPU
        torch.set_num_threads(1)
        cpu_device = torch.device("cpu")
        self.model.eval()
        metric_logger = utils.MetricLogger(delimiter="  ")
        header = 'Test:'

        coco = get_coco_api_from_dataset(self.valloader.dataset)
        iou_types = _get_iou_types(self.model)
        coco_evaluator = CocoEvaluator(coco, iou_types)

        for image, targets in metric_logger.log_every(
                self.valloader[:int(len(self.valloader) * rate)], 100, header):
            image = list(img.to(self.device) for img in image)
            targets = [{k: v.to(self.device)
                        for k, v in t.items()} for t in targets]

            torch.cuda.synchronize()
            model_time = time.time()
            outputs = self.model(image)

            outputs = [{k: v.to(cpu_device)
                        for k, v in t.items()} for t in outputs]
            model_time = time.time() - model_time

            res = {
                target["image_id"].item(): output
                for target, output in zip(targets, outputs)
            }
            evaluator_time = time.time()
            coco_evaluator.update(res)
            evaluator_time = time.time() - evaluator_time
            metric_logger.update(model_time=model_time,
                                 evaluator_time=evaluator_time)

        # gather the stats from all processes
        metric_logger.synchronize_between_processes()
        print("Averaged stats:", metric_logger)
        coco_evaluator.synchronize_between_processes()

        # accumulate predictions from all images
        coco_evaluator.accumulate()
        coco_evaluator.summarize()
        torch.set_num_threads(n_threads)
        return coco_evaluator
Exemplo n.º 26
0
INT_DATA_BIT_DEPTH = 16

BATCH = 32
EPOCHS = 150

#%%
# preparing data
image_generator = GdGramGenerator(TRAIN_DATA_DIRECTORY, SAMPLE_RATE,
                                  GDGRAM_SHAPE, GDGRAM_DURATION,
                                  INT_DATA_BIT_DEPTH)
image_generator.process_input_folder(number_of_threads=50)

#%%
# loading DataFrame with paths/labels for training and validation data and paths for testing data
dataset_generator = DatasetGenerator(label_set=LABELS,
                                     train_input_path=TRAIN_DATA_DIRECTORY,
                                     test_input_path=TEST_DATA_DIRECTORY,
                                     bit_depth=INT_DATA_BIT_DEPTH)
data_frame = dataset_generator.load_data()
dataset_generator.apply_train_test_split(test_size=0.3, random_state=911)
dataset_generator.apply_train_validation_split(validation_size=0.2,
                                               random_state=74)

#%%
# compiling model
model = resnet_model.build_resnet18(input_shape=NN_INPUT_SHAPE,
                                    num_classes=len(LABELS))

model.compile(optimizer='Adam',
              loss='categorical_crossentropy',
              metrics=['acc'])
Exemplo n.º 27
0
 def generate_dataset():
     ds = DatasetGenerator.generate_dataset(1000000)
     DatasetIO.write_dataset_to_csv_file(ds, Hyperparameters.DATASET_PATH)