Пример #1
0
def main():
    os.makedirs('working', exist_ok=True)
    for name, url in datasets.DATASETS.items():
        print('Downloading {} dataset... '.format(name), end='', flush=True)
        req = requests.get(url)
        if req.status_code != 200:
            print('Error getting {} dataset'.format(name),
                  file=sys.stderr,
                  flush=True)
            continue
        else:
            print('Done', flush=True)
        outpath = datasets.path(name)
        with open(outpath, 'w') as outfile:
            outfile.write(req.text)
Пример #2
0
def main() -> None:
    os.makedirs('output', exist_ok=True)
    fns = [(datasets.US_DAILY_NAME, process_us_daily),
           (datasets.STATES_DAILY_NAME, process_states_daily)]
    for name, process_fn in fns:
        with open(datasets.path(name)) as json_file:
            data = json.load(json_file)
            if type(data) != list:
                fatal('Error with {}: expected list of data, got {}'.format(
                    name, type(data)))
            if len(data) == 0:
                fatal('Error with {}: got no data'.format(name))
            for datum in data:
                if type(datum) != dict:
                    fatal('Error with {}: expected list of dicts, but got '
                          'type {} in list'.format(name, type(datum)))
            processed_data = process_fn(data)
            output_path = os.path.join('output', '{}.js'.format(name))
            if name == datasets.US_DAILY_NAME:
                us_data = {'US': processed_data}
                json_str = json.dumps(us_data, indent=2)
            else:
                json_str = json.dumps(processed_data, indent=2)
            with open(output_path, 'w') as output_file:
                print('export const {} = {};'.format(name, json_str),
                      file=output_file)

    # Dump the data indices as well
    index_strs = [
        '  "{}": {},'.format(DATA_ORDER[i], i) for i in range(len(DATA_ORDER))
    ]
    index_str = '\n'.join(index_strs)
    data_js = ('export const DATA_INDICES = {{\n'
               '{}\n'
               '}};').format(index_str)
    index_path = os.path.join('output', 'data_indices.js')
    with open(index_path, 'w') as output_file:
        output_file.write(data_js)
Пример #3
0
    def generate_hp(self, path):

        # Seed random number generator
        np.random.seed(datetime.now().microsecond)

        # Hyperparameters
        hp = dd()

        # Set debug mode
        hp.debug = False

        # Experiment parameters
        hp.meta = dd()
        hp.meta.path = path
        hp.meta.result_folder = './'
        hp.meta.export_folder = 'export'

        # Dataset parameters
        hp.dataset = dd()
        hp.dataset.path = join(datasets.path(), 'catsanddogs')

        # Feature extraction parameters
        hp.iterator = dd()
        hp.iterator.patch_sz = (90, 90, 3)
        hp.iterator.reshape_sz = (100, 100, 3)

        # Feature learning layers
        hp.model = dd()
        hp.model.name = 'convnet'
        hp.model.layers = dd()

        # Preprocess layer
        i = 1
        hp.model.layers[i] = dd()
        hp.model.layers[i].type = 'conv_preprocess'
        hp.model.layers[i].nb_channels = 3
        hp.model.layers[i].nb_pretrain_iterations = 1

        # Convolutional layer
        i += 1
        hp.model.layers[i] = dd()
        hp.model.layers[i].type = 'conv_vanilla'
        hp.model.layers[i].activation = "relu"
        hp.model.layers[i].nb_filters = 16
        hp.model.layers[i].filter_sz = (3, 3)

        # Max pooling layer
        i += 1
        hp.model.layers[i] = dd()
        hp.model.layers[i].type = 'conv_maxpool'
        hp.model.layers[i].downsample_sz = 2

        # Batch normalization layer
        i += 1
        hp.model.layers[i] = dd()
        hp.model.layers[i].type = 'conv_batchnorm'

        # Convolutional layer
        i += 1
        hp.model.layers[i] = dd()
        hp.model.layers[i].type = 'conv_vanilla'
        hp.model.layers[i].activation = "relu"
        hp.model.layers[i].nb_filters = 32
        hp.model.layers[i].filter_sz = (3, 3)

        # Batch normalization layer
        i += 1
        hp.model.layers[i] = dd()
        hp.model.layers[i].type = 'conv_batchnorm'

        # Convolutional layer
        i += 1
        hp.model.layers[i] = dd()
        hp.model.layers[i].type = 'conv_vanilla'
        hp.model.layers[i].activation = "relu"
        hp.model.layers[i].nb_filters = 32
        hp.model.layers[i].filter_sz = (3, 3)

        # Max pooling layer
        i += 1
        hp.model.layers[i] = dd()
        hp.model.layers[i].type = 'conv_maxpool'
        hp.model.layers[i].downsample_sz = 2

        # Batch normalization layer
        i += 1
        hp.model.layers[i] = dd()
        hp.model.layers[i].type = 'conv_batchnorm'

        # Convolutional layer
        i += 1
        hp.model.layers[i] = dd()
        hp.model.layers[i].type = 'conv_vanilla'
        hp.model.layers[i].activation = "relu"
        hp.model.layers[i].nb_filters = 64
        hp.model.layers[i].filter_sz = (5, 5)

        # Max pooling layer
        i += 1
        hp.model.layers[i] = dd()
        hp.model.layers[i].type = 'conv_maxpool'
        hp.model.layers[i].downsample_sz = 2

        # Batch normalization layer
        i += 1
        hp.model.layers[i] = dd()
        hp.model.layers[i].type = 'conv_batchnorm'

        # Convolutional layer
        i += 1
        hp.model.layers[i] = dd()
        hp.model.layers[i].type = 'conv_vanilla'
        hp.model.layers[i].activation = "relu"
        hp.model.layers[i].nb_filters = 64
        hp.model.layers[i].filter_sz = (5, 5)

        # Batch normalization layer
        i += 1
        hp.model.layers[i] = dd()
        hp.model.layers[i].type = 'conv_batchnorm'

        # Fully connected layer
        i += 1
        hp.model.layers[i] = dd()
        hp.model.layers[i].type = 'hidden'
        hp.model.layers[i].activation = "relu"
        hp.model.layers[
            i].nb_hid = 256  # int(10**np.random.uniform(log(128)/log(10), log(512)/log(10)))

        # Logistic layer
        i += 1
        hp.model.layers[i] = dd()
        hp.model.layers[i].type = 'logistic'
        hp.model.layers[i].nb_out = 2

        # Trainer
        hp.trainer = dd()
        hp.trainer.max_epoch = 1000
        hp.trainer.lookback = randint(10, 30)
        hp.trainer.minibatch_sz = 100
        hp.trainer.init_lr = None
        hp.trainer.incr_lr = None
        hp.trainer.lr = 10**np.random.uniform(
            log(0.01) / log(10),
            log(0.1) / log(10))
        hp.trainer.decay_rate = uniform(0.985, 1.0)
        hp.trainer.momentum = 10**uniform(
            log(0.8) / log(10),
            log(0.99) / log(10))
        hp.trainer.momentum_reset_prob = 0

        print("Save hyperparameters to file")
        hp.dump(join(hp.meta.path, 'hp.pkl'), save_pretty_textfile=True)

        print(hp)

        return hp
Пример #4
0
                # Extract key and value
                key = split[0]
                val = split[len(split) == 2]
                
                # Add class description if not in label mapping (from class description to class number)
                if val not in labelmap:
                    currentlabel += 1
                    labelmap[val] = currentlabel
                
                # Add key to classmap 
                if key not in classmap :
                    classmap[key] = dd()
                    
                # Assign description and label to key
                classmap[key].label = labelmap[val]
                classmap[key].desc  = val

        return classmap


if __name__ == '__main__':
    import datasets
    from os.path import join
    from cellavision_ import cellavision
    from clemex_ import clemex

    clmx = clemex(join(datasets.path(), 'leuko', 'clemex'))
    clvs = cellavision(join(datasets.path(), 'leuko', 'cellavision', 'images'))

    classmap_path = join(datasets.path(), 'leuko', 'classmap.txt')
    marshall(classmap_path, s1=clmx, s2=clvs)
Пример #5
0
            idx = int(fn[4:-4])

            # Add class to class list
            if cls not in self.classes:
                self.classes.append(cls)

            example = element(self.path,
                              fn,
                              label=self.classes.index(cls),
                              desc=cls,
                              index=idx)

            self.examples.append(example)

            if idx < 20000 // 2:
                self.splits.train.append(example)
            elif idx < 22500 // 2:
                self.splits.valid.append(example)
            else:
                self.splits.test.append(example)

        print("Cats and dogs dataset is loaded with:")
        for split_id in self.splits:
            print("    {:7d} examples in {} set".format(
                len(self.splits[split_id]), split_id))


if __name__ == '__main__':
    import datasets
    s = source(join(datasets.path(), 'catsanddogs'))