def __process_output(child): result = next(util.line_reader(child.stdout)) # one line child.stdout.close() if child.wait() != 0: raise RuntimeError('Could not identify image properties.') parts = result.split(' ', 2) return ImageMeta(format = parts[2].lower(), width = int(parts[0]), height = int(parts[1]))
def __process_output(child): result = next(util.line_reader(child.stdout)) # one line child.stdout.close() if child.wait() != 0: raise RuntimeError('Could not identify image properties.') parts = result.split(' ', 2) return ImageMeta(format=parts[2].lower(), width=int(parts[0]), height=int(parts[1]))
def read(self): """ Reads word2vec-format embeddings. """ ws = [] with open(self.filename) as in_f: m, n = map(eval, in_f.readline().strip().split()) e_m = np.zeros((m, n)) for c, l in enumerate(line_reader(self.filename, skip=1)): # skip dimensions w, *e = l.strip().split() #assert len(e) == n if len(e) != n: print("Incorrect embedding dimension, skipping.") continue if not w or not e: print("Empty w or e.") ws.append(w) e_m[c] = e #assert len(ws) == e_m.shape[0] self.w_index = {w: c for c, w in enumerate(ws)} self.inv_w_index = {v: k for k, v in self.w_index.items()} self.W = e_m
def from_file(filename, digest_map = None): sample_rate = None total_samples = None comments = {} pictures = {} channels = 0 parser = MetaListParser() desired = [BlockType.STREAMINFO, BlockType.VORBIS_COMMENT] if digest_map is not None: desired.append(BlockType.PICTURE) child = subprocess.Popen(['metaflac', '--list', '--no-utf8-convert', '--block-type=' + ','.join([value.name for value in desired]), filename], stdin = subprocess.DEVNULL, stdout = subprocess.PIPE, stderr = subprocess.DEVNULL) block_type = None for line in util.line_reader(child.stdout, terminate=True): # after last line, we'll get None because terminate is True field = parser.process_line(line) # None will flush if field is not None: # processing if field.level == 0 and field.key == BLOCK_PREFIX: block_type = None # num = field.int_value() elif block_type is None and field.key == 'type': block_type = BlockType(field.int_value()) if block_type == BlockType.PICTURE: picture_bytes = 0 picture_type = None picture_data = None picture_mime = None picture_description = None elif block_type == BlockType.STREAMINFO: if field.key == 'sample_rate': sample_rate = field.int_value() elif field.key == 'total samples': total_samples = field.int_value() elif field.key == 'channels': channels = field.int_value() elif block_type == BlockType.VORBIS_COMMENT: if (field.key.startswith('comment[') and field.key.endswith(']')): key, value = field.value.split('=', 1) comments[key] = value elif block_type == BlockType.PICTURE: if field.key == 'data length': picture_bytes = field.int_value() elif field.key == 'type': picture_type = PictureType(field.int_value()) elif field.key == 'data': picture_data = bytearray() elif field.key == 'description': picture_description = field.value elif (picture_bytes != 0 and field.key == '{:08X}'.format(len(picture_data))): picture_data += bytearray.fromhex( field.value[:3 * min(16, picture_bytes - len(picture_data))]) if len(picture_data) == picture_bytes: # Add the image to the database, storing the digest digest = hashlib.sha1(picture_data).digest() if digest not in digest_map: digest_map[digest] = picture_data picture_bytes = 0 picture_data = None # allow freeing # append the digest to the list if not present # using list instead of set for defined order. picture = Picture(digest, picture_description) picture_list = pictures.get(picture_type, []) if picture not in picture_list: picture_list.append(picture) pictures[picture_type] = picture_list child.wait() return FLACMeta(sample_rate, total_samples, channels, comments, pictures)