Exemplo n.º 1
0
    def _read_file(self, filename: str, password: str = '') -> dict:
        """ Reads the data from filename and returns the account dictionary,
        the encrypted master key, and the decrypted master key.

        """

        # Read from the file if it exists.
        with pathlib_path(filename) as pass_file:
            lzma_data = pass_file.read_bytes() if pass_file.is_file() else b''

        # Get the json data out of the file data or an empty json dict of
        # the file was empty.
        if lzma_data:
            json_data = lzma_decompress(lzma_data).decode()
        else:
            json_data = '{}'

        accounts_dict = json_loads(json_data)

        # Pop the master key out of the accounts dictionary so it won't be
        # operated on or listed.  Also if no master key is found, create
        # one.
        encrypted_key = bytes.fromhex(accounts_dict.pop(self.MASTER_KEY_DIGEST, ''))

        if not encrypted_key:
            if not password:
                # Get the password to encrypt the master key.
                password = self._ask_pass('password')
        else:
            # Get the password to decrypt the key.
            password = self._ask_pass('password', verify=False)

        return CryptData(password, encrypted_key), accounts_dict
Exemplo n.º 2
0
def read_file(filename: str, password: str = "") -> tuple:
    """ Reads the data from filename and returns the account dictionary, the
    encrypted master key, and the decrypted master key.

    """

    # Read from the file if it exists.
    with pathlib_path(filename) as pass_file:
        lzma_data = pass_file.read_bytes() if pass_file.is_file() else b""

    # Get the json data out of the file data or an empty json dict of
    # the file was empty.
    if lzma_data:
        json_data = lzma_decompress(lzma_data).decode()
    else:
        json_data = "{}"

    accounts_dict = json_loads(json_data)

    # Pop the master key out of the accounts dictionary so it won't be
    # operated on or listed.  Also if no master key is found, create
    # one.
    encrypted_key = bytes.fromhex(accounts_dict.pop(MASTER_KEY_DIGEST, ""))
    encrypted_key, master_key = get_master_key(encrypted_key, password)

    return accounts_dict, encrypted_key, master_key
Exemplo n.º 3
0
def write_file(filename: str, accounts_dict: dict, encrypted_key: bytes):
    """ Compresses and writes the accounts_dict to the file at filename.

    """

    # Put the master key into the accounts dict.
    accounts_dict[MASTER_KEY_DIGEST] = encrypted_key.hex()

    json_data = json_dumps(accounts_dict)

    lzma_data = lzma_compress(json_data.encode())

    with pathlib_path(filename) as pass_file:
        pass_file.write_bytes(lzma_data)
Exemplo n.º 4
0
def read_file(filename: str) -> dict:
    """ Reads the data from filename and returns the account dictionary.

    """

    # Read from the file if it exists.
    with pathlib_path(filename) as pass_file:
        lzma_data = pass_file.read_bytes() if pass_file.is_file() else b''

    # Get the json data out of the file data or an empty json dict of
    # the file was empty.
    if lzma_data:
        json_data = lzma_decompress(lzma_data).decode()
    else:
        json_data = '{}'

    # Load the json data into a dictionary.
    return json_loads(json_data)
Exemplo n.º 5
0
def read_file(filename: str, password: str = '') -> tuple:
    """ Reads the data from filename and returns the account dictionary, the
    encrypted master key, and the decrypted master key.

    """

    # Read from the file if it exists.
    with pathlib_path(filename) as pass_file:
        lzma_data = pass_file.read_bytes() if pass_file.is_file() else b''

    # Get the json data out of the file data or an empty json dict of
    # the file was empty.
    if lzma_data:
        json_data = lzma_decompress(lzma_data).decode()
    else:
        json_data = '{}'

    accounts_dict = json_loads(json_data)

    # Pop the master key out of the accounts dictionary so it won't be
    # operated on or listed.  Also if no master key is found, create
    # one.
    encrypted_key = bytes.fromhex(accounts_dict.pop(MASTER_KEY_DIGEST, ''))

    if not encrypted_key:
        if not password:
            # Get the password to encrypt the master key.
            password = get_pass('password')

        # Generate the largest key possible.
        master_key = Random.new().read(KEY_LEN)

        # Encrypt the key.
        encrypted_key = encrypt_key(master_key, password)
    else:
        # Get the password to decrypt the key.
        password = get_pass('password', verify=False)
        master_key = decrypt_key(encrypted_key, password)

    return accounts_dict, encrypted_key, master_key
Exemplo n.º 6
0
    def __init__(self, n_paired):

        _train_annotations_path = '../data/mscoco/annotations/captions_train2014.json'
        _val_annotations_path = '../data/mscoco/annotations/captions_val2014.json'

        _train_images_dir = '../data/mscoco/train2014/'
        _val_images_dir = '../data/mscoco/val2014/'

        _caption_path = '../data/mscoco/captions.pickle'
        _image_path = '../data/mscoco/images.pickle'

        self._padding = '<PAD>'
        self._oov = '<OOV>'
        self._go = '<GO>'
        self._eof = '<EOF>'
        self._symbols = [self._oov, self._padding, self._eof, self._go]
        self._inverse_vocab = None

        paths = [(_train_annotations_path, _train_images_dir), (_val_annotations_path, _val_images_dir)]

        if os.path.isfile(_image_path):

            print("Loading images...", flush=True)

            with open(_image_path, 'rb') as ff:
                data = pickle.load(ff)

            self._images = data['images']
            self._val_images = data['val_images']

            print("Images loaded.", flush=True)

        else:

            for j, (ann_p,im_p) in enumerate(paths):

                with open(ann_p) as ff:
                    ann = json.load(ff)

                print("Creating image dictionary..", flush=True)
                images = dict()  # key,value ---> image_id, image array
                for k in ann['images']:
                    file_path = im_p + k['file_name']
                    im_file = pathlib_path(file_path)
                    if im_file.exists():
                        image = ndimage.imread(file_path)
                        image = imresize(image, size=(48, 64), interp='cubic')

                        if image.shape == (48, 64):
                            image = np.expand_dims(image, axis=2)
                            image = np.concatenate((image, image, image), axis=2)

                        image = np.reshape(image, newshape=[1, -1])

                        images[k['id']] = image

                if j == 0:      # training set
                    self._images = images
                else:           # validation set
                    self._val_images = images

            tosave = dict()
            tosave['images'] = self._images
            tosave['val_images'] = self._val_images

            print("Saving images...", flush=True)
            with open(_image_path, 'wb') as ff:
                pickle.dump(tosave, ff, pickle.HIGHEST_PROTOCOL)
            print("Saved.", flush=True)


        if os.path.isfile(_caption_path):  # load processed data

            print("Loading data...", flush=True)

            with open(_caption_path, 'rb') as ff:
                data = pickle.load(ff)

            self._vocab = data['vocab']

            self._captions = data['captions']
            self._imcapt = data['imcapt']
            self._val_captions = data['val_captions']
            self._val_imcapt = data['val_imcapt']
            self._max_seq_len = data['max_seq_len']

            print("Data loaded.", flush=True)


        else: # process data and load
            print("Processing data..", flush=True)

            self._max_seq_len = 1

            for j, (ann_p,im_p) in enumerate(paths):

                with open(ann_p) as ff:
                    ann = json.load(ff)

                print("Creating caption dictionary..", flush=True)
                captions = dict()   # key,value ---> caption_id, word sequence
                for k in ann['annotations']:
                    capt = k['caption']

                    # caption preprocessing
                    capt = capt.strip()     # remove unnecessary whitespace
                    capt = capt.lower()     # make lower case
                    capt = capt.replace('.', ' ')  # remove periods
                    capt = capt.replace(',', ' ')  # remove commas
                    capt = capt.replace('?', ' ')  # remove question marks
                    capt = capt.replace('-', ' ')  # remove dashes
                    capt = capt.replace('"', ' " ')  # expand double quotes
                    capt = capt.replace('(', ' ( ')  # expand brackets
                    capt = capt.replace(')', ' ) ')  # expand brackets
                    capt = capt.replace('{', ' { ')  # expand brackets
                    capt = capt.replace('}', ' } ')  # expand brackets
                    capt = capt.split()  # split string
                    capt.append(self._eof)  # pad with EOF character

                    captions[k['id']] = capt


                self._max_seq_len = max(max([len(_v) for _,_v in captions.items()]), self._max_seq_len)
                print("Max sequence length: ", self._max_seq_len, flush=True)


                if j == 0: # training set
                    print("Word frequencies", flush=True)
                    freqs = defaultdict(int)
                    for _, capt in captions.items():
                        for word in capt:
                            freqs[word] += 1

                    print("Adding <OOV> words", flush=True)
                    min_freq = 2                # minimum word frequency
                    for k,capt in captions.items():
                        for i,w in enumerate(capt):
                            if freqs[w] < min_freq:
                                if np.random.binomial(n=1, p=0.9) == 1:   # 90% chance of setting <OOV>
                                    capt[i] = self._oov


                print("Creating vocabulary..", flush=True)
                if j > 0: # validation set
                    vocab = self._vocab

                else:
                    vocab = dict()  # key,value ---> word, word_id
                    words = {w for _, _v in captions.items() for w in _v}
                    for i,w in enumerate(words):
                        vocab[w] = i

                    for s in self._symbols:  # add symbols to vocab dictionary if not already there
                        if s not in vocab:
                            idx = max([v for k,v in vocab.items()]) + 1
                            vocab[s] = idx


                print("Converting captions to ids (from vocab)..", flush=True)
                for _k,_v in captions.items():
                    for i in range(len(_v)):
                        if _v[i] in vocab:
                            _v[i] = vocab[_v[i]]
                        else:
                            _v[i] = vocab[self._oov]


                print("Creating image-caption mapping..", flush=True)
                im_capt = defaultdict(set)    # key,value ---> image_id, set of caption ids
                for k in ann['annotations']:
                    im_capt[k['image_id']].add(k['id'])


                if j == 0:      # training set
                    self._captions = captions
                    self._vocab = vocab
                    self._imcapt = im_capt

                else:           # validation set
                    self._val_captions = captions
                    self._vocab = vocab
                    self._val_imcapt = im_capt

            tosave = dict()
            tosave['vocab'] = self._vocab

            tosave['captions'] = self._captions
            tosave['imcapt'] = self._imcapt
            tosave['val_captions'] = self._val_captions
            tosave['val_imcapt'] = self._val_imcapt
            tosave['max_seq_len'] = self._max_seq_len

            print("Saving data...", flush=True)
            with open(_caption_path, 'wb') as ff:
                pickle.dump(tosave, ff, pickle.HIGHEST_PROTOCOL)
            print("Saved.", flush=True)

        # lists of image ids
        self.image_ids = list(self._images.keys())
        self.val_image_ids = list(self._val_images.keys())

        # construct pairings
        _n = len(self.image_ids)
        self.paired = set(np.random.choice(self.image_ids, size=n_paired, replace=False))
        _remain = set(self.image_ids) - self.paired
        _each_size = len(_remain) // 2
        self.image_only = set(np.random.choice(list(_remain), size=_each_size, replace=False))
        self.caption_only = _remain - self.image_only

        self.paired = list(self.paired)
        self.image_only = list(self.image_only)
        self.caption_only = list(self.caption_only)