def preprocess_squad():

    download_prefix = os.path.join("download", "squad")
    data_prefix = os.path.join("data", "squad")

    print("Downloading datasets into {}".format(download_prefix))
    print("Preprocessing datasets into {}".format(data_prefix))

    if not os.path.exists(download_prefix):
        os.makedirs(download_prefix)
    if not os.path.exists(data_prefix):
        os.makedirs(data_prefix)

    train_filename = maybe_download(squad_base_url,
                                    config.SQUAD_TRAIN_FILENAME,
                                    download_prefix, 30288272)
    train_data = SquadData.load_raw(train_filename)
    train_data.shuffle()
    train_data.save(config.SQUAD_TRAIN_PREFIX)

    dev_filename = maybe_download(squad_base_url, config.SQUAD_DEV_FILENAME,
                                  download_prefix, 4854279)
    dev_data = SquadData.load_raw(dev_filename)
    dev_data.shuffle()
    dev_data.save(config.SQUAD_DEV_PREFIX)
示例#2
0
 def download(self):
     data_dir = self.path
     if not os.path.exists(data_dir):
         os.mkdir(data_dir)
     for filename in self.data_files.values():
         path = self.path + '/' + filename
         if not os.path.exists(path):
             url = urljoin(self.base_url, filename)
             util.maybe_download(url, path)
示例#3
0
def prepare_ptb( files = {'train': './ptb/ptb.train.txt',
                          'test': './ptb/ptb.test.txt',
                          'valid': './ptb/ptb.valid_txt'} ):
    train_url = 'https://raw.githubusercontent.com/tomsercu/lstm/master/data/ptb.train.txt'
    valid_url = 'https://raw.githubusercontent.com/tomsercu/lstm/master/data/ptb.valid.txt'
    test_url = 'https://raw.githubusercontent.com/tomsercu/lstm/master/data/ptb.test.txt'
    train_txt = maybe_download(files['train'], train_url)
    test_txt = maybe_download(files['test'], test_url)
    valid_txt = maybe_download(files['valid'], valid_url)
	def download(self):
		data_dir = self.path
        	if not os.path.exists(data_dir):
            		os.mkdir(data_dir)
        	for filename in self.data_files.values():
            		path = self.path+'/'+filename
            		if not os.path.exists(path):
                		url = urljoin(self.base_url, filename)
                		util.maybe_download(url, path)
示例#5
0
def get_code(buttons):
    max_x = len(buttons[0])
    max_y = len(buttons)
    x = None
    y = None

    for yi in range(max_y):
        for xi in range(max_x):
            if buttons[yi][xi] == '5':
                x = xi
                y = yi
                break
        if x is not None:
            break

    code = ''
    with maybe_download(2) as file:
        for s in file:
            for ch in s.strip():
                if ch == 'U' and y - 1 >= 0 and buttons[y - 1][x] != ' ':
                    y -= 1
                elif ch == 'D' and y + 1 < max_y and buttons[y + 1][x] != ' ':
                    y += 1
                elif ch == 'L' and x - 1 >= 0 and buttons[y][x - 1] != ' ':
                    x -= 1
                elif ch == 'R' and x + 1 < max_x and buttons[y][x + 1] != ' ':
                    x += 1
            code += buttons[y][x]
        return code
示例#6
0
def part1():
    triangles = 0
    with maybe_download(3) as file:
        for s in file.readlines():
            a, b, c = [int(x) for x in s.split()]
            if is_triangle(a, b, c):
                triangles += 1
        print('part1:', triangles)
示例#7
0
def part2():
    with maybe_download(7) as file:
        count = 0
        for s in file:
            s = s.strip()
            if is_ssl(s):
                count += 1
        print('part2', count)
示例#8
0
def part2():
    with maybe_download(4) as file:
        for s in map(str.rstrip, file):
            name, id, checksum = read_line(s)
            calculated_checksum = get_checksum(name)
            if calculated_checksum == checksum:
                real_name = decrypt(name, id)
                if real_name.find('north') != -1:
                    print(real_name, id)
示例#9
0
def part1():
    total = 0
    with maybe_download(4) as file:
        for s in map(str.rstrip, file):
            name, id, checksum = read_line(s)
            calculated_checksum = get_checksum(name)
            if calculated_checksum == checksum:
                total += id
    print('part1', total)
示例#10
0
def part1():
    display = [[0 for x in range(50)] for y in range(6)]
    with maybe_download(8) as file:
        for s in file:
            s = s.strip()
            apply_op(display, s)

    on = sum([sum(row) for row in display])
    print('part1', on)
示例#11
0
def part1():
    with maybe_download(5) as file:
        s = file.readline().strip()
        i = 0
        password = ''
        while len(password) < 8:
            hash, i = next_hash(s, i)
            password += hash[5]
            print('partial pass', password)
        print('part1', password)
示例#12
0
def part1():
    x = 0
    y = 0
    dir = 'N'
    with maybe_download(1) as file:
        data = [s.strip() for s in file.read().split(',')]

        for op in data:
            x, y, dir = fast_forward(x, y, dir, op)

        dist = abs(x) + abs(y)
        print('part1:', dist)
示例#13
0
def part1And2():
    bots = {}
    outputs = {}
    with maybe_download(10) as file:
        for s in file:
            s = s.strip()
            apply_instruction(bots, outputs, s)

        n = 1
        for out_id in [0, 1, 2]:
            for x in outputs[out_id].values:
                n *= x
        print('part2', n)
示例#14
0
def part2():
    with maybe_download(5) as file:
        s = file.readline().strip()
        i = 0
        password = [' ' for i in range(8)]
        found = 0
        while found < 8:
            hash, i = next_hash(s, i)
            pos = hash[5]
            if pos.isdigit():
                pos = int(pos)
                if pos < 8 and password[pos] == ' ':
                    password[pos] = hash[6]
                    found += 1
                    print('partial pass', ''.join(password))
        print('part2', ''.join(password))
def get_glove():
    prefix = config.GLOVE_DIR

    print("Storing datasets in {}".format(prefix))

    if not os.path.exists(prefix):
        os.makedirs(prefix)

    glove_zip = maybe_download(config.GLOVE_BASE_URL, config.GLOVE_FILENAME, config.GLOVE_DIR,
            862182613)

    if os.path.exists(os.path.join(prefix, 'glove.6B.{}d.txt'.format(config.GLOVE_DIM))):
        return

    print('Unzipping GloVe data')
    glove_zip_ref = zipfile.ZipFile(os.path.join(config.GLOVE_DIR, config.GLOVE_FILENAME), 'r')

    glove_zip_ref.extractall(config.GLOVE_DIR)
    glove_zip_ref.close()
示例#16
0
def part2():
    visited = set()
    x = 0
    y = 0
    dir = 'N'
    visited.add((x, y))
    with maybe_download(1) as file:
        data = [s.strip() for s in file.read().split(',')]

        for op in data:
            dir = next_dir(dir, op)
            n = int(op[1:])
            for i in range(n):
                x, y = next_pos(x, y, dir, 1)
                if (x, y) in visited:
                    dist = abs(x) + abs(y)
                    print('part1:', dist)
                    return
                else:
                    visited.add((x, y))
示例#17
0
def part2():
    triangles = 0
    s1 = []
    s2 = []
    s3 = []
    with maybe_download(3) as file:
        for s in file.readlines():
            a, b, c = [int(x) for x in s.split()]
            s1.append(a)
            s2.append(b)
            s3.append(c)
        for i in range(0, len(s1), 3):
            a, b, c = s1[i:i + 3]
            if is_triangle(a, b, c):
                triangles += 1
            a, b, c = s2[i:i + 3]
            if is_triangle(a, b, c):
                triangles += 1
            a, b, c = s3[i:i + 3]
            if is_triangle(a, b, c):
                triangles += 1
        print('part2:', triangles)
示例#18
0
def main():
    DEFAULT_SOURCE_URL = 'https://storage.googleapis.com/cvdf-datasets/mnist/'
    TRAIN_IMAGES = 'train-images-idx3-ubyte.gz'
    TRAIN_LABELS = 'train-labels-idx1-ubyte.gz'
    TEST_IMAGES = 't10k-images-idx3-ubyte.gz'
    TEST_LABELS = 't10k-labels-idx1-ubyte.gz'
    DATA_DIR_PATH = './data/MNIST'
    VALIDATION_SIZE = 5000

    local_file = util.maybe_download(TRAIN_IMAGES, DATA_DIR_PATH,
                                     DEFAULT_SOURCE_URL + TRAIN_IMAGES)
    train_images = util.extract_images(local_file)

    local_file = util.maybe_download(TRAIN_LABELS, DATA_DIR_PATH,
                                     DEFAULT_SOURCE_URL + TRAIN_LABELS)
    train_labels = util.extract_labels(local_file)

    local_file = util.maybe_download(TEST_IMAGES, DATA_DIR_PATH,
                                     DEFAULT_SOURCE_URL + TEST_IMAGES)
    test_images = util.extract_images(local_file)

    local_file = util.maybe_download(TEST_LABELS, DATA_DIR_PATH,
                                     DEFAULT_SOURCE_URL + TEST_LABELS)
    test_labels = util.extract_labels(local_file)

    validation_images = train_images[:VALIDATION_SIZE]
    validation_labels = train_labels[:VALIDATION_SIZE]
    train_images = train_images[VALIDATION_SIZE:]
    train_labels = train_labels[VALIDATION_SIZE:]

    Dataset = collections.namedtuple('Dataset',
                                     ['images', 'labels', 'num_examples'])
    Datasets = collections.namedtuple('Datasets',
                                      ['train', 'validation', 'test'])

    # train images
    num_train_images = train_images.shape[0]
    train_images = train_images.reshape(
        train_images.shape[0],
        train_images.shape[1] * train_images.shape[2]).astype(np.float32)
    train_images = np.multiply(train_images, 1.0 /
                               255.0)  # Convert from [0, 255] -> [0.0, 1.0].
    train = Dataset(train_images, train_labels, num_train_images)

    # validation images
    num_validation_images = validation_images.shape[0]
    validation_images = validation_images.reshape(
        validation_images.shape[0], validation_images.shape[1] *
        validation_images.shape[2]).astype(np.float32)
    validation_images = np.multiply(
        validation_images, 1.0 / 255.0)  # Convert from [0, 255] -> [0.0, 1.0].
    validation = Dataset(validation_images, validation_labels,
                         num_validation_images)

    # test images
    num_test_images = test_images.shape[0]
    test_images = test_images.reshape(
        test_images.shape[0],
        test_images.shape[1] * test_images.shape[2]).astype(np.float32)
    test_images = np.multiply(test_images, 1.0 /
                              255.0)  # Convert from [0, 255] -> [0.0, 1.0].
    test = Dataset(test_images, test_labels, num_test_images)

    mnist_data = Datasets(train=train, validation=validation, test=test)

    network(mnist_data)
示例#19
0
 def get_split(self, split_name, is_tritrain):
     return self.get_sentences(
         util.maybe_download(
             "data", "http://lsz-gpu-01.cs.washington.edu/resources/",
             split_name + ".stagged"), is_tritrain)
示例#20
0
def part2():
    with maybe_download(6) as file:
        counts = get_all_counts(file)
        letters = ''.join([c[-1][0] for c in counts])
        print('part2', letters)
示例#21
0
def part2():
    with maybe_download(9) as file:
        for s in file:
            s = s.strip()
            print('part2:', decompress_size(s, True))