Пример #1
0
 def __process_output(child):
     result = next(util.line_reader(child.stdout))  # one line
     child.stdout.close()
     if child.wait() != 0:
         raise RuntimeError('Could not identify image properties.')
     parts = result.split(' ', 2)
     return ImageMeta(format = parts[2].lower(),
             width = int(parts[0]), height = int(parts[1]))
Пример #2
0
 def __process_output(child):
     result = next(util.line_reader(child.stdout))  # one line
     child.stdout.close()
     if child.wait() != 0:
         raise RuntimeError('Could not identify image properties.')
     parts = result.split(' ', 2)
     return ImageMeta(format=parts[2].lower(),
                      width=int(parts[0]),
                      height=int(parts[1]))
Пример #3
0
 def read(self):
     """
     Reads word2vec-format embeddings.
     """
     ws = []
     with open(self.filename) as in_f:
         m, n = map(eval, in_f.readline().strip().split())
     e_m = np.zeros((m, n))
     for c, l in enumerate(line_reader(self.filename,
                                       skip=1)):  # skip dimensions
         w, *e = l.strip().split()
         #assert len(e) == n
         if len(e) != n:
             print("Incorrect embedding dimension, skipping.")
             continue
         if not w or not e:
             print("Empty w or e.")
         ws.append(w)
         e_m[c] = e
     #assert len(ws) == e_m.shape[0]
     self.w_index = {w: c for c, w in enumerate(ws)}
     self.inv_w_index = {v: k for k, v in self.w_index.items()}
     self.W = e_m
Пример #4
0
    def from_file(filename, digest_map = None):
        sample_rate = None
        total_samples = None
        comments = {}
        pictures = {}
        channels = 0

        parser = MetaListParser()

        desired = [BlockType.STREAMINFO, BlockType.VORBIS_COMMENT]
        if digest_map is not None:
            desired.append(BlockType.PICTURE)

        child = subprocess.Popen(['metaflac',
                '--list', '--no-utf8-convert',
                '--block-type=' + ','.join([value.name for value in desired]),
                filename], stdin = subprocess.DEVNULL,
                stdout = subprocess.PIPE, stderr = subprocess.DEVNULL)

        block_type = None

        for line in util.line_reader(child.stdout, terminate=True):
            # after last line, we'll get None because terminate is True
            field = parser.process_line(line)  # None will flush
            if field is not None:
                # processing
                if field.level == 0 and field.key == BLOCK_PREFIX:
                    block_type = None  # num = field.int_value()
                elif block_type is None and field.key == 'type':
                    block_type = BlockType(field.int_value())
                    if block_type == BlockType.PICTURE:
                        picture_bytes = 0
                        picture_type = None
                        picture_data = None
                        picture_mime = None
                        picture_description = None
                elif block_type == BlockType.STREAMINFO:
                    if field.key == 'sample_rate':
                        sample_rate = field.int_value()
                    elif field.key == 'total samples':
                        total_samples = field.int_value()
                    elif field.key == 'channels':
                        channels = field.int_value()
                elif block_type == BlockType.VORBIS_COMMENT:
                    if (field.key.startswith('comment[') and
                            field.key.endswith(']')):
                        key, value = field.value.split('=', 1)
                        comments[key] = value
                elif block_type == BlockType.PICTURE:
                    if field.key == 'data length':
                        picture_bytes = field.int_value()
                    elif field.key == 'type':
                        picture_type = PictureType(field.int_value())
                    elif field.key == 'data':
                        picture_data = bytearray()
                    elif field.key == 'description':
                        picture_description = field.value
                    elif (picture_bytes != 0 and field.key ==
                            '{:08X}'.format(len(picture_data))):
                        picture_data += bytearray.fromhex(
                                field.value[:3 * min(16,
                                        picture_bytes - len(picture_data))])
                        if len(picture_data) == picture_bytes:
                            # Add the image to the database, storing the digest
                            digest = hashlib.sha1(picture_data).digest()
                            if digest not in digest_map:
                                digest_map[digest] = picture_data
                            picture_bytes = 0
                            picture_data = None # allow freeing
                            # append the digest to the list if not present
                            # using list instead of set for defined order.
                            picture = Picture(digest, picture_description)
                            picture_list = pictures.get(picture_type, [])
                            if picture not in picture_list:
                                picture_list.append(picture)
                            pictures[picture_type] = picture_list
        child.wait()
        return FLACMeta(sample_rate, total_samples, channels, comments, pictures)