def kmeans(feat_mat, c1=-1, c2=-1, min_size=50, kmeans_max_iter=20, spherical=True): if c1 == -1: c1, c2 = np.random.randint(feat_mat.shape[0]), np.random.randint( 1, feat_mat.shape[0]) c1, c2 = feat_mat[c1], feat_mat[(c1 + c2) % feat_mat.shape[0]] old_indexer = np.ones(feat_mat.shape[0]) * -1 for _ in range(kmeans_max_iter): scores = np.squeeze(np.asarray(feat_mat.multiply(c1 - c2).sum(1))) indexer = scores >= 0 if indexer.sum() < min_size: indexer = np.zeros(feat_mat.shape[0], dtype=np.bool) indexer[np.argpartition(-scores, min_size)[:min_size]] = True elif (~indexer).sum() < min_size: indexer = np.zeros(feat_mat.shape[0], dtype=np.bool) indexer[np.argpartition(scores, min_size)[min_size:]] = True if np.array_equal(indexer, old_indexer): break old_indexer = indexer c1 = feat_mat[indexer].sum(0) c2 = feat_mat[~indexer].sum(0) if spherical: c1 = sk_normalize(c1) c2 = sk_normalize(c2) return indexer
def test_normalize( failure_logger, clf_dataset, axis, norm, # noqa: F811 return_norm): X_np, X = clf_dataset if return_norm: t_X, t_norms = cu_normalize(X, axis=axis, norm=norm, return_norm=return_norm) sk_t_X, sk_t_norms = sk_normalize(X_np, axis=axis, norm=norm, return_norm=return_norm) assert_allclose(t_norms, sk_t_norms) else: t_X = cu_normalize(X, axis=axis, norm=norm, return_norm=return_norm) sk_t_X = sk_normalize(X_np, axis=axis, norm=norm, return_norm=return_norm) assert type(t_X) == type(X) assert_allclose(t_X, sk_t_X)
def testNormalizeExecution(self): raw_dense = np.random.rand(10, 10) raw_sparse = sps.random(10, 10, density=0.4, format='csr') for chunk_size in [10, 6, (10, 6), (6, 10)]: for raw, x in [ (raw_dense, mt.tensor(raw_dense, chunk_size=chunk_size)), (raw_sparse, mt.tensor(raw_sparse, chunk_size=chunk_size)) ]: for norm in ['l1', 'l2', 'max']: for axis in (0, 1): for use_sklearn in [True, False]: n = normalize(x, norm=norm, axis=axis, return_norm=False) n.op._use_sklearn = use_sklearn result = self.executor.execute_tensor( n, concat=True)[0] expected = sk_normalize(raw, norm=norm, axis=axis, return_norm=False) if sps.issparse(expected): expected = expected.A np.testing.assert_almost_equal( np.asarray(result), expected) raw_dense = np.random.rand(10, 10) raw_sparse = sps.random(10, 10, density=0.4, format='csr') # test copy and return_normalize for axis in (0, 1): for chunk_size in (10, 6, (6, 10)): for raw in (raw_dense, raw_sparse): x = mt.tensor(raw, chunk_size=chunk_size) n = normalize(x, axis=axis, copy=False, return_norm=True) results = self.executor.execute_tensors(n) raw_copy = raw.copy() try: expects = sk_normalize(raw_copy, axis=axis, copy=False, return_norm=True) except NotImplementedError: continue if sps.issparse(expects[0]): expected = expects[0].A else: expected = expects[0] np.testing.assert_almost_equal(np.asarray(results[0]), expected) np.testing.assert_almost_equal(results[1], expects[1])
def test_normalize_execution(setup): raw_dense = np.random.rand(10, 10) raw_sparse = sps.random(10, 10, density=0.4, format="csr") for chunk_size in [10, 6, (10, 6), (6, 10)]: for raw, x in [ (raw_dense, mt.tensor(raw_dense, chunk_size=chunk_size)), (raw_sparse, mt.tensor(raw_sparse, chunk_size=chunk_size)), ]: for norm in ["l1", "l2", "max"]: for axis in (0, 1): for use_sklearn in [True, False]: n = normalize(x, norm=norm, axis=axis, return_norm=False) n.op._use_sklearn = use_sklearn result = n.execute().fetch() expected = sk_normalize(raw, norm=norm, axis=axis, return_norm=False) if sps.issparse(expected): expected = expected.A np.testing.assert_almost_equal(np.asarray(result), expected) raw_dense = np.random.rand(10, 10) raw_sparse = sps.random(10, 10, density=0.4, format="csr") # test copy and return_normalize for axis in (0, 1): for chunk_size in (10, 6, (6, 10)): for raw in (raw_dense, raw_sparse): x = mt.tensor(raw, chunk_size=chunk_size) n = normalize(x, axis=axis, copy=False, return_norm=True) results = n.execute().fetch() raw_copy = raw.copy() try: expects = sk_normalize(raw_copy, axis=axis, copy=False, return_norm=True) except NotImplementedError: continue if sps.issparse(expects[0]): expected = expects[0].A else: expected = expects[0] np.testing.assert_almost_equal(np.asarray(results[0]), expected) np.testing.assert_almost_equal(results[1], expects[1])
def load_feature_matrix(args): if args.feature_format % 3 == 0: X1 = HierarchicalMLModel.load_feature_matrix(args.input_inst_feat1) X2 = HierarchicalMLModel.load_feature_matrix(args.input_inst_feat2) X = smat.hstack([sk_normalize(X1, axis=1), sk_normalize(X2, axis=1)]).tocsr() elif args.feature_format % 3 == 1 and args.input_inst_feat1: X = HierarchicalMLModel.load_feature_matrix(args.input_inst_feat1) elif args.feature_format % 3 == 2 and args.input_inst_feat2: X = HierarchicalMLModel.load_feature_matrix(args.input_inst_feat2) else: raise NotImplementedError( f"args.feature_format = {args.feature_format} is not supported.") if args.feature_format // 3 == 0: X = sk_normalize(X, axis=1, copy=False) return X
def __init__(self, code_to_label): assert isinstance(code_to_label, smat.spmatrix) code_to_label = code_to_label.tocsr() self.code_to_label = sk_normalize(code_to_label, axis=1, copy=False, norm='l1')
def predict_new(self, X, only_topk=None, csr_codes=None, beam_size=2, max_depth=None, cond_prob=True, normalized=False, threads=-1): if max_depth is None: max_depth = self.depth if cond_prob is None or cond_prob == False: cond_prob = PostProcessor(Transform.identity, Combiner.noop) if cond_prob == True: cond_prob = PostProcessor(Transform.get_lpsvm(3), Combiner.mul) assert isinstance(cond_prob, PostProcessor), tpye(cond_prob) assert X.shape[1] == self.nr_features if self.bias > 0: X = smat_util.append_column(X, self.bias) pX = PyMatrix.init_from(X, dtype=self.model_chain[0].pW.dtype) max_depth = min(self.depth, max_depth) pred_csr = csr_codes for d in range(max_depth): cur_model = self.model_chain[d] local_only_topk = only_topk if d == (max_depth - 1) else beam_size pred_csr = cur_model.predict_new(pX, only_topk=local_only_topk, csr_codes=pred_csr, cond_prob=cond_prob, threads=threads) if normalized: pred_csr = sk_normalize(pred_csr, axis=1, copy=False, norm='l1') return pred_csr
def predict( self, X, csr_codes=None, only_topk=None, cond_prob=True, normalize=False, **arg_kw, ): assert csr_codes is not None, "csr_codes must be provided for CountModel.prdict)" assert csr_codes.shape[0] == X.shape[0] assert csr_codes.shape[1] == self.nr_codes if cond_prob: pred_csr = csr_codes.dot(self.code_to_label).tocsr() else: tmp = csr_codes.data tmp2 = sp.ones_like(tmp) csr_codes.data = tmp2 pred_csr = csr_codes.dot(self.code_to_label).tocsr() csr_codes.data = tmp pred_csr = smat_util.sorted_csr(pred_csr, only_topk=only_topk) if normalize: pred_csr = sk_normalize(pred_csr, axis=1, copy=False, norm="l1") return pred_csr
def test_inplace_csr_row_normalize_l2(failure_logger, sparse_random_dataset): X_np, _, _, X_sparse = sparse_random_dataset if X_sparse.format != 'csr': pytest.skip('Skip non CSR matrices') inplace_csr_row_normalize_l2(X_sparse) X_np = sk_normalize(X_np, norm='l2', axis=1) assert_allclose(X_sparse, X_np)
def orthogonalize_gram_schmidt(kmat): """Orthogonalize filters using gram_schmidt method. kmat should be num_params x num_filts""" num_par, num_filts = kmat.shape # First normalize all filters kmat_out = sk_normalize(kmat, axis=0) for nn in range(num_filts - 1): # orthogonalize all filters to the chosen for mm in range(nn + 1, num_filts): kmat_out[:, mm] = kmat_out[:, mm] - np.dot( kmat_out[:, nn], kmat_out[:, mm]) * kmat_out[:, nn] # renoramlize kmat_out = sk_normalize(kmat_out, axis=0) return kmat_out
def predict( self, X, only_topk=None, csr_codes=None, cond_prob=None, normalized=False, threads=-1, ): assert X.shape[1] == self.nr_features if csr_codes is None: dense = X.dot(self.W).toarray() if cond_prob: dense = cond_prob.transform(dense, inplace=True) coo = smat_util.dense_to_coo(dense) pred_csr = smat_util.sorted_csr_from_coo(coo.shape, coo.row, coo.col, coo.data, only_topk=only_topk) else: # csr_codes is given assert self.C is not None, "This model does not have C" assert X.shape[1] == self.nr_features assert csr_codes.shape[0] == X.shape[0] assert csr_codes.shape[1] == self.nr_codes if (csr_codes.data == 0).sum() != 0: # this is a trick to avoid zero entries explicit removal from the smat_dot_smat offset = sp.absolute(csr_codes.data).max() + 1 csr_codes = smat.csr_matrix( (csr_codes.data + offset, csr_codes.indices, csr_codes.indptr), shape=csr_codes.shape, ) csr_labels = (csr_codes.dot(self.C.T)).tocsr() csr_labels.data -= offset else: csr_labels = (csr_codes.dot(self.C.T)).tocsr() nnz_of_insts = csr_labels.indptr[1:] - csr_labels.indptr[:-1] inst_idx = sp.repeat(sp.arange(X.shape[0], dtype=sp.uint32), nnz_of_insts) label_idx = csr_labels.indices.astype(sp.uint32) val = self.predict_values(X, inst_idx, label_idx, threads=threads) if cond_prob: val = cond_prob.transform(val, inplace=True) val = cond_prob.combiner(val, csr_labels.data) pred_csr = smat_util.sorted_csr_from_coo(csr_labels.shape, inst_idx, label_idx, val, only_topk=only_topk) if normalized: pred_csr = sk_normalize(pred_csr, axis=1, copy=False, norm="l1") return pred_csr
def test_normalize_sparse(sparse_clf_dataset, norm): # noqa: F811 X_np, X = sparse_clf_dataset axis = 0 if X.format == 'csc' else 1 t_X = cu_normalize(X, axis=axis, norm=norm) assert type(t_X) == type(X) sk_t_X = sk_normalize(X_np, axis=axis, norm=norm) assert_allclose(t_X, sk_t_X)
def normalize(data): """ Normalize the data to the [-1,1] range. Arguments: data (np.array): Data to normalize. Todo: Revise the outputs. """ return sk_normalize(data, norm='max',axis=0,return_norm=True)
def predict_new( self, X, only_topk=None, csr_codes=None, cond_prob=None, normalized=False, threads=-1, ): assert X.shape[1] == self.nr_features if csr_codes is None: dense = X.dot(self.W).toarray() if cond_prob: dense = cond_prob.transform(dense, inplace=True) coo = smat_util.dense_to_coo(dense) pred_csr = smat_util.sorted_csr_from_coo(coo.shape, coo.row, coo.col, coo.data, only_topk=only_topk) else: # csr_codes is given assert self.C is not None, "This model does not have C" assert X.shape[1] == self.nr_features assert csr_codes.shape[0] == X.shape[0] assert csr_codes.shape[1] == self.nr_codes if not csr_codes.has_sorted_indices: csr_codes = csr_codes.sorted_indices() if (csr_codes.data == 0).sum() != 0: # this is a trick to avoid zero entries explicit removal from the smat_dot_smat offset = sp.absolute(csr_codes.data).max() + 1 csr_codes = smat.csr_matrix( (csr_codes.data + offset, csr_codes.indices, csr_codes.indptr), shape=csr_codes.shape, ) pZ = PyMatrix.init_from(csr_codes, self.dtype) csr_labels, pred_csr = clib.multilabel_predict_with_codes( X, self.pW, self.pC, pZ, threads=threads) csr_labels.data -= offset else: pZ = PyMatrix.init_from(csr_codes.sorted_indices(), self.dtype) csr_labels, pred_csr = clib.multilabel_predict_with_codes( X, self.pW, self.pC, pZ, threads=threads) val = pred_csr.data if cond_prob: val = cond_prob.transform(val, inplace=True) val = cond_prob.combiner(val, csr_labels.data) pred_csr = smat_util.sorted_csr(pred_csr, only_topk=only_topk) if normalized: pred_csr = sk_normalize(pred_csr, axis=1, copy=False, norm="l1") return pred_csr
def normalize(X, norm='l2', copy=False): """Normalize sparse or dense matrix Arguments: --------- X: csr_matrix or csc_matrix sparse matrix norm: str, optional, default='l2' normalize with l1/l2 copy: boolean, optional, default=False whether to copy data or not """ return sk_normalize(X, norm=norm, copy=copy)
def test_inplace_csr_row_normalize_l2(sparse_clf_dataset): # noqa: F811 X_np, X = sparse_clf_dataset if not cp.sparse.issparse(X): pytest.skip("Skipping non-CuPy or non-sparse arrays") if X.format != 'csr': X = X.tocsr() inplace_csr_row_normalize_l2(X) X_np = X_np.toarray() X_np = sk_normalize(X_np, norm='l2', axis=1) assert_allclose(X, X_np)
def main(args): # set hyper-parameters input_feat_path = args.input_feat_path depth = args.depth kdim = args.kdim algo = args.algo seed = args.seed verbose = args.verbose max_iter = args.max_iter threads = args.threads output_code_dir = args.output_code_dir if verbose: print("depth {} kdim {} algo {}".format(depth, kdim, algo)) # load label feature matrix (nr_labels * nr_features) if path.exists(input_feat_path): feat_mat = load_feature_matrix(input_feat_path) else: raise ValueError( "label embedding path does not exist {}".format(input_feat_path)) if not path.exists(output_code_dir): os.makedirs(output_code_dir, exist_ok=True) # Indexing algorithm # C: nr_labels x nr_codes, stored in csr sparse matrix indexer = Indexer(feat_mat) if algo == indexer.SKMEANS: feat_mat = sk_normalize(feat_mat, axis=1, norm="l2", copy=False) code = indexer.gen(kdim=kdim, depth=depth, algo=algo, seed=seed, max_iter=max_iter, threads=threads) if verbose: code.print() C = code.get_csc_matrix() if verbose: print("C", C.shape) # save code and args output_code_path = path.join(output_code_dir, "code.npz") smat.save_npz("{}".format(output_code_path), C, compressed=False) output_config_path = path.join(output_code_dir, "config.json") with open(output_config_path, "w") as fout: fout.write(json.dumps(vars(args), indent=True))
def test_normalize_sparse(failure_logger, sparse_clf_dataset, # noqa: F811 norm): X_np, X = sparse_clf_dataset axis = 0 if X.format == 'csc' else 1 t_X = cu_normalize(X, axis=axis, norm=norm) # assert type(t_X) == type(X) if cpx.scipy.sparse.issparse(X): assert cpx.scipy.sparse.issparse(t_X) if scipy.sparse.issparse(X): assert scipy.sparse.issparse(t_X) sk_t_X = sk_normalize(X_np, axis=axis, norm=norm) assert_allclose(t_X, sk_t_X)
def predict_new(self, X, only_topk=None, csr_codes=None, beam_size=2, max_depth=None, cond_prob=True, normalized=False, threads=-1): if max_depth is None: max_depth = self.depth if cond_prob is None or cond_prob == False: cond_prob = PostProcessor(Transform.identity, Combiner.noop) if cond_prob == True: cond_prob = PostProcessor(Transform.get_lpsvm(3), Combiner.mul) assert isinstance(cond_prob, PostProcessor), tpye(cond_prob) pX = PyMatrix.init_from(X, dtype=self.model_chain[0].pW.dtype) max_depth = min(self.depth, max_depth) transform = cond_prob.transform if cond_prob else Transform.identity pred_csr = csr_codes #timer = WallTimer() for d in range(max_depth): ''' print('predict at depth {}'.format(d)) sys.stdout.flush() timer.tic() ''' cur_model = self.model_chain[d] local_only_topk = only_topk if d == (max_depth - 1) else beam_size pred_csr = cur_model.predict_new(pX, only_topk=local_only_topk, csr_codes=pred_csr, transform=transform, cond_prob=cond_prob, threads=threads) ''' print('>>> {}ms'.format(timer.toc())) sys.stdout.flush() ''' #if cond_prob and normalized: # perform normalization to avoid numerical issue # pred_csr = sk_normalize(pred_csr, axis=1, copy=False, norm='l1') #print('d = {} codes:{} nnz:{}'.format(d, pred_csr.shape[1], pred_csr.nnz)) #pred_csr.data[:] = sp.exp(pred_csr.data[:]) if normalized: pred_csr = sk_normalize(pred_csr, axis=1, copy=False, norm='l1') return pred_csr
def predict(self, X, only_topk=None, transform=None, csr_codes=None, cond_prob=None, normalized=False, threads=-1): assert X.shape[1] == self.nr_features if csr_codes is None: dense = X.dot(self.W).toarray() if transform: dense = transform(dense, inplace=True) coo = smat_util.dense_to_coo(dense) pred_csr = smat_util.sorted_csr_from_coo(coo.shape, coo.row, coo.col, coo.data, only_topk=only_topk) else: # csr_codes is given assert self.C is not None, "This model does not have C" assert X.shape[1] == self.nr_features assert csr_codes.shape[0] == X.shape[0] assert csr_codes.shape[1] == self.nr_codes csr_labels = (csr_codes.dot(self.C.T)).tocsr() nnz_of_insts = csr_labels.indptr[1:] - csr_labels.indptr[:-1] inst_idx = sp.repeat(sp.arange(X.shape[0], dtype=sp.uint32), nnz_of_insts) label_idx = csr_labels.indices.astype(sp.uint32) val = self.predict_values(X, inst_idx, label_idx, threads=threads) if transform: val = transform(val, inplace=True) if cond_prob: val[:] = cond_prob.combiner(val, csr_labels.data) pred_csr = smat.csr_matrix((val, label_idx, csr_labels.indptr), shape=csr_labels.shape) pred_csr = smat_util.sorted_csr(pred_csr, only_topk=only_topk) #pred_csr = self.predict_with_coo_labels(X, coo_labels.row, coo_labels.cols, only_topk) if normalized: pred_csr = sk_normalize(pred_csr, axis=1, copy=False, norm='l1') return pred_csr
def generate_relevance_chain(self, R_dict, norm_type=None, induce=True): """Generate a chain of instance to cluster relevance matrix for cost sensitive learning from partial relevance chain. Args: R_dict (dict): dictionary of partial relevance chains, with keys being number of layers above leaf elements. R_dict[i].shape[0] == nr_inst, for all i. R_dict[0].shape[1] == self.chain[-1].shape[0], R_dict[i].shape[1] == self.chain[-i].shape[1], for i >= 1 R_dict.keys() \\subset range(len(self.chain)+1) norm_type (str, optional): row wise normalziation of resulting relevance matrices. Defatult None to ignore. Options: ‘l1’, ‘l2’, ‘max’, 'no-norm', None induce (bool, optional): whether to induce missing relevance matrix by label aggregation. Default True Returns: relevance_chain: list of csc matrices for relevance """ relevance_chain = [None] * (len(self) + 1) # if nothing is given, return a chain of None if R_dict is None or all(R_dict[x] is None for x in R_dict): return relevance_chain self.matrix_chain_dimension_check(R_dict) # construct relevance chain from incomplete chain relevance_chain[0] = R_dict.get(0, None) for i in range(1, len(self) + 1): if R_dict.get(i, None) is not None: relevance_chain[i] = R_dict[i] elif relevance_chain[i - 1] is not None and induce: relevance_chain[i] = clib.sparse_matmul( relevance_chain[i - 1], self.chain[-i]) else: relevance_chain[i] = None relevance_chain.reverse() if norm_type not in [None, "no-norm"]: relevance_chain = [ sk_normalize(rr.tocsr(), norm=norm_type) if rr is not None else None for rr in relevance_chain ] return relevance_chain[1:]
def load_indexed_code(code_path, label_feat): C = None mapping = { "none": Indexer.SKMEANS, "skmeans": Indexer.SKMEANS, "kmeans": Indexer.KMEANS, "kdtree": Indexer.KDTREE, "random": Indexer.PURE_RANDOM, "ordinal": Indexer.BALANCED_ORDINAL, "uniform": Indexer.UNIFORM, } if code_path is None: code_path = "none" if code_path.lower() in mapping: if label_feat is not None: algo = mapping[code_path.lower()] if algo == Indexer.SKMEANS: label_feat = sk_normalize(label_feat, axis=1, norm="l2", copy=False) indexer = Indexer(label_feat) code = indexer.gen( kdim=2, depth=indexer.estimate_depth_with_cluster_size(100), algo=algo, seed=0, max_iter=20, threads=1, ) C = code.get_csc_matrix() else: if code_path.endswith(".npz") and path.exists(code_path): C = smat.load_npz(code_path) elif path.isdir(code_path) and path.exists( path.join(code_path, "code.npz")): C = smat.load_npz(path.join(code_path, "code.npz")) else: assert False, f"'{code_path}' does not exist. Valid ones {mapping.keys()}" return C
def predict_new(self, X, only_topk=None, transform=None, csr_codes=None, cond_prob=None, normalized=False, threads=-1): assert X.shape[1] == self.nr_features if csr_codes is None: dense = X.dot(self.W).toarray() if transform: dense = transform(dense, inplace=True) coo = smat_util.dense_to_coo(dense) pred_csr = smat_util.sorted_csr_from_coo(coo.shape, coo.row, coo.col, coo.data, only_topk=only_topk) else: # csr_codes is given assert self.C is not None, "This model does not have C" assert X.shape[1] == self.nr_features assert csr_codes.shape[0] == X.shape[0] assert csr_codes.shape[1] == self.nr_codes pZ = PyMatrix.init_from(csr_codes, self.dtype) csr_labels, pred_csr = clib.multilabel_predict_with_codes( X, self.pW, self.pC, pZ, threads=threads) val = pred_csr.data if transform: val = transform(val, inplace=True) if cond_prob: val[:] = cond_prob.combiner(val, csr_labels.data) pred_csr = smat_util.sorted_csr(pred_csr, only_topk=only_topk) if normalized: pred_csr = sk_normalize(pred_csr, axis=1, copy=False, norm='l1') return pred_csr
def pifa(Y, X, dtype=sp.float32): Y_avg = sk_normalize(Y, axis=1, norm="l2") label_embedding = smat.csr_matrix(Y_avg.T.dot(X), dtype=dtype) return label_embedding
def main(): # Changing to 25, which will give slightly better intervals, 20 gives very short intervals vad_threshold = 25 # threshold for voice activity detection # Data prep # I'm saving only 2 embeddings i.e. first and last tisv_frames for given interval in an audio. So each .npy # embedding file will have a shape of (2, 256) tf.reset_default_graph() batch_size = 2 # Fixing to 2 since we take 2 for each interval #utter_batch.shape[1] verif = tf.placeholder( shape=[None, batch_size, 40], dtype=tf.float32) # verification batch (time x batch x n_mel) batch = tf.concat([ verif, ], axis=1) # embedding lstm (3-layer default) with tf.variable_scope("lstm"): lstm_cells = [ tf.contrib.rnn.LSTMCell(num_units=config.hidden, num_proj=config.proj) for i in range(config.num_layer) ] lstm = tf.contrib.rnn.MultiRNNCell( lstm_cells) # make lstm op and variables outputs, _ = tf.nn.dynamic_rnn( cell=lstm, inputs=batch, dtype=tf.float32, time_major=True) # for TI-VS must use dynamic rnn embedded = outputs[-1] # the last ouput is the embedded d-vector embedded = normalize(embedded) # normalize config_tensorflow = tf.ConfigProto(device_count={'GPU': 0}) saver = tf.train.Saver(var_list=tf.global_variables()) # Extract embeddings # Each embedding saved file will have (2, 256) with tf.Session(config=config_tensorflow) as sess: tf.global_variables_initializer().run() saver.restore(sess, config.model_path) logging.info("loading audio") audio_path = config.audio_file utter, sr = librosa.core.load(audio_path, sr=config.sr) # load audio utter_min_len = (config.tisv_frame_min * config.hop + config.window) * sr # lower bound of utterance length # Get the duration duration = librosa.get_duration(utter, sr) # Duration of each window duration_per_frame = (duration / utter.shape[0]) logging.info( f'Duration: {duration}\nDuration per frame: {duration_per_frame}s\nMin length of utterance: {utter_min_len * duration_per_frame}s' ) tisv_frame_duration_s = utter_min_len * duration_per_frame intervals = librosa.effects.split( utter, top_db=vad_threshold) # voice activity detection all_data = [] logging.info('Converting intervals to embeddings') selected_intervals_idx = [] for idx, current_interval in enumerate(intervals): if (current_interval[1] - current_interval[0]) > utter_min_len: # Save these selected intervals, as shorter ones are ignored selected_intervals_idx.append(idx) utterances_spec = [] utter_part = utter[current_interval[0]:current_interval[ 1]] # save first and last 160 frames of spectrogram. S = librosa.core.stft(y=utter_part, n_fft=config.nfft, win_length=int(config.window * sr), hop_length=int(config.hop * sr)) S = np.abs(S)**2 mel_basis = librosa.filters.mel(sr=sr, n_fft=config.nfft, n_mels=40) S = np.log10(np.dot(mel_basis, S) + 1e-6) # log mel spectrogram of utterances utterances_spec.append(S[:, :config.tisv_frame]) utterances_spec.append(S[:, -config.tisv_frame:]) utterances_spec = np.array(utterances_spec) utter_batch = np.transpose( utterances_spec, axes=(2, 0, 1)) # transpose [frames, batch, n_mels] data = sess.run(embedded, feed_dict={verif: utter_batch}) all_data.extend(data) data = np.array(all_data) # # Spectral clustering # cossine similarity similarity = np.dot(data, data.T) # squared magnitude of preference vectors (number of occurrences) (diagonals are ai*ai) square_mag = np.diag(similarity) # inverse squared magnitude inv_square_mag = 1 / square_mag # if it doesn't occur, set it's inverse magnitude to zero (instead of inf) inv_square_mag[np.isinf(inv_square_mag)] = 0 # inverse of the magnitude inv_mag = np.sqrt(inv_square_mag) # cosine similarity (elementwise multiply by inverse magnitudes) cosine = similarity * inv_mag A = cosine.T * inv_mag # Fill the diagonals with very large negative value np.fill_diagonal(A, -1000) # Fill the diagonals with the max of each row np.fill_diagonal(A, A.max(axis=1)) # final step in cossine sim A = (1 - A) / 2 # Gaussian blur sigma = 0.5 # we will select sigma as 0.5 A_gau = gaussian_filter(A, sigma) # Thresholding using multiplier = 0.01 threshold_multiplier = 0.01 A_thresh = A_gau * threshold_multiplier # Symmetrization A_sym = np.maximum(A_thresh, A_thresh.T) # Diffusion A_diffusion = A_sym * A_sym.T # Row-wise matrix Normalization Row_max = A_diffusion.max(axis=1).reshape(1, A_diffusion.shape[0]) A_norm = A_diffusion / Row_max.T # Eigen decomposition eigval, eigvec = np.linalg.eig(A_norm) # Since eigen values cannot be negative for Positive semi definite matrix, the numpy returns negative values, converting it to positive eigval = np.abs(eigval) # reordering eigen values sorted_eigval_idx = np.argsort(eigval)[::-1] sorted_eigval = np.sort(eigval)[::-1] # For division according to the equation eigval_shifted = np.roll(sorted_eigval, -1) # Thresholding eigen values because we don't need very low eigan values due to errors eigval_thresh = 0.1 sorted_eigval = sorted_eigval[sorted_eigval > eigval_thresh] eigval_shifted = eigval_shifted[:sorted_eigval.shape[0]] # Don't take the first value for calculations, if first value is large, following equation will return k=1, and we want more than one clusters # Get the argmax of the division, since its 0 indexed, add 1 k = np.argmax(sorted_eigval[1:] / eigval_shifted[1:]) + 2 logging.debug(f'Number of Eigen vectors to pick: {k}') # Get the indexes of eigen vectors idexes = sorted_eigval_idx[:k] A_eigvec = eigvec[:, idexes] A_eigvec = A_eigvec.astype('float32') # # K-Means offline clustering A_eigvec_norm = sk_normalize(A_eigvec) # l2 normalized kmeans = KMeans(n_clusters=config.number_of_speakers, init='k-means++', random_state=config.random_state) kmeans.fit(A_eigvec) labels = kmeans.labels_ output_srt_json = os.path.join(config.output_dir, os.path.basename(config.srt_path) + '.json') output_wav_json = os.path.join( config.output_dir, os.path.basename(config.srt_path) + '.wav.json') OL_INDICATOR = 'OL' SIL_INDICATOR = -1 json_data = [] for idx, i in enumerate(selected_intervals_idx): start = str( datetime.timedelta(seconds=intervals[i][0] * duration_per_frame)) end = str( datetime.timedelta(seconds=intervals[i][1] * duration_per_frame)) speaker = labels[idx * 2] if labels[idx * 2] != labels[(idx * 2) + 1]: speaker = 'OL' # possible overlap json_data.append({'start': start, 'end': end, 'speaker': str(speaker)}) # Save the output to json with open(output_wav_json, 'w') as f: json.dump(json_data, f, indent=4) complete_json = {} json_data = [] subs = pysrt.open(config.srt_path, encoding="utf-8") convert_to_ms = lambda st: (st.hours * 60 * 60 * 1000) + \ (st.minutes * 60 * 1000) +\ (st.seconds * 1000) +\ (st.milliseconds) for sub in subs: start_in_ms = convert_to_ms(sub.start) end_in_ms = convert_to_ms(sub.end) speakers = [] for idx, i in enumerate(selected_intervals_idx): start = intervals[i][0] * duration_per_frame * 1000 end = intervals[i][1] * duration_per_frame * 1000 if start_in_ms <= start <= end_in_ms: speaker = int(labels[idx * 2]) if labels[idx * 2] != labels[(idx * 2) + 1]: speaker = OL_INDICATOR # possible overlap speakers.append(speaker) json_data.append({ "index": sub.index, "start": sub.start.to_time().strftime("%H:%M:%S,%f")[:-3], "end": sub.end.to_time().strftime("%H:%M:%S,%f")[:-3], 'speakers': np.unique(speakers).tolist(), 'speakers_distribution': speakers, 'text': sub.text }) metadata = { "overlap_indicator": OL_INDICATOR, "duration": duration, "class_names": np.unique(labels).tolist(), "num_of_speakers": len(set(labels)), "silence_indicator": SIL_INDICATOR } complete_json["metadata"] = metadata complete_json["srt"] = json_data # Save the output to json with open(output_srt_json, 'w') as f: json.dump(complete_json, f, indent=4)
def spectral_clustering(data): # # Spectral clustering # cossine similarity similarity = np.dot(data, data.T) # squared magnitude of preference vectors (number of occurrences) (diagonals are ai*ai) square_mag = np.diag(similarity) # inverse squared magnitude inv_square_mag = 1 / square_mag # if it doesn't occur, set it's inverse magnitude to zero (instead of inf) inv_square_mag[np.isinf(inv_square_mag)] = 0 # inverse of the magnitude inv_mag = np.sqrt(inv_square_mag) # cosine similarity (elementwise multiply by inverse magnitudes) cosine = similarity * inv_mag A = cosine.T * inv_mag # Fill the diagonals with very large negative value np.fill_diagonal(A, -1000) # Fill the diagonals with the max of each row np.fill_diagonal(A, A.max(axis=1)) # final step in cossine sim A = (1-A)/2 # Gaussian blur sigma = 0.5 # we will select sigma as 0.5 A_gau = gaussian_filter(A, sigma) # Thresholding using multiplier = 0.01 threshold_multiplier = 0.01 A_thresh = A_gau * threshold_multiplier # Symmetrization A_sym = np.maximum(A_thresh, A_thresh.T) # Diffusion A_diffusion = A_sym * A_sym.T # Row-wise matrix Normalization Row_max = A_diffusion.max(axis=1).reshape(1, A_diffusion.shape[0]) A_norm = A_diffusion / Row_max.T # Eigen decomposition eigval, eigvec = np.linalg.eig(A_norm) # Since eigen values cannot be negative for Positive semi definite matrix, the numpy returns negative values, converting it to positive eigval = np.abs(eigval) # reordering eigen values sorted_eigval_idx = np.argsort(eigval)[::-1] sorted_eigval = np.sort(eigval)[::-1] # For division according to the equation eigval_shifted = np.roll(sorted_eigval, -1) # Thresholding eigen values because we don't need very low eigan values due to errors eigval_thresh = 0.1 sorted_eigval = sorted_eigval[sorted_eigval > eigval_thresh] eigval_shifted = eigval_shifted[:sorted_eigval.shape[0]] # Don't take the first value for calculations, if first value is large, following equation will return k=1, and we want more than one clusters # Get the argmax of the division, since its 0 indexed, add 1 k = np.argmax(sorted_eigval[1:]/eigval_shifted[1:]) + 2 print(f'Number of Eigen vectors to pick (clusters): {k}') # Get the indexes of eigen vectors idexes = sorted_eigval_idx[:k] A_eigvec = eigvec[:, idexes] A_eigvec = A_eigvec.astype('float32') # # K-Means offline clustering A_eigvec_norm = sk_normalize(A_eigvec) # l2 normalized kmeans = KMeans(n_clusters=k, init='k-means++', random_state=random_state) kmeans.fit(A_eigvec) labels = kmeans.labels_ return labels
def evaluate_multilabel(model, data, alg=None, classifier="lr", fast=False, ratio=None, cv=10, random_state=None, normalize=False): X = [] Y = [] for pid in range(len(model.word2id)): X.append(model.word_embeddings[pid]) Y = np.zeros((len(X), len(data.labels))) for y, key in enumerate(data.labels.keys()): for index, paper in enumerate(data.labels[key]): pid = model.word2id[paper] Y[pid][y] = 1 if normalize: X = sk_normalize(X) scaler = StandardScaler() X = scaler.fit_transform(X) df = defaultdict(list) if ratio is None: ratio = [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9] for r in ratio: if r <= 0: continue elif r >= 1: break if classifier.lower() == 'lr': clf = LogisticRegression() elif classifier.lower() == "svm": clf = SVC(cache_size=5000) elif classifier.lower() == "mlp": clf = MLPClassifier() elif classifier.lower() == "nb": clf = GaussianNB() micros = [] macros = [] for i in range(cv): micro, macro = evaluateNodeClassification( X, Y, 1 - r, clf=clf, random_state=random_state) micros.append(micro) macros.append(macro) micros = np.mean(micros) macros = np.mean(macros) df["ratio"].append(r) df["micro"].append(micros) df["macro"].append(macros) #df["alg"].append(alg) #df["data"].append(str(data)) #df["total_samples"].append(model.total_samples) #df["negative"].append(model.negative) #df["walk_window"].append(model.walk_window) #df["walk_probability"].append(model.walk_probability) #df["L2"].append(model.l2) logging.info("ratio: %.4f : f1_micro %.4f, f1_macro %.4f" % (r, micros, macros)) if fast: return micros, macros else: return df
def evaluate(model, data, alg=None, classifier="lr", fast=False, ratio=None, cv=10, normalize=False, random_state=None, return_y=False): X = [] Y = [] micros = [] macros = [] for y, key in enumerate(data.labels.keys()): for index, paper in enumerate(data.labels[key]): paper = paper.rstrip() if paper not in model.paper2id: print("paper not in model: ", paper) continue X.append(model.paper_embeddings[model.paper2id[paper]]) Y.append(y) print("len X: ", len(X)) print("len Y: ", len(Y)) if normalize: X = sk_normalize(X) scaler = StandardScaler() X = scaler.fit_transform(X) clf = LogisticRegression() df = defaultdict(list) if ratio is None: ratio = [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9] for r in ratio: if r <= 0: continue elif r >= 1: break micros = [] macros = [] for i in range(cv): clf = LogisticRegression() if classifier.lower() == "svm": clf = SVC(cache_size=5000) elif classifier.lower() == "mlp": clf = MLPClassifier() elif classifier.lower() == "nb": clf = GaussianNB() X_train, X_test, Y_train, Y_test = train_test_split( X, Y, test_size=1 - r, random_state=random_state) clf.fit(X_train, Y_train) prediction = clf.predict(X_test) #lpred = clf.predict_proba(X_test) #print("prediction shape: ", prediction[0]) #print("y_test shape: ", Y_test[0]) #print("Loss: ", log_loss(Y_test,lpred)) micro = f1_score(Y_test, prediction, average='micro') macro = f1_score(Y_test, prediction, average='macro') micros.append(micro) macros.append(macro) #micros = np.mean(micros) #macros = np.mean(macros) df["ratio"].append(r) df["micro"].append(np.mean(micro)) df["macro"].append(np.mean(macro)) #df["alg"].append(alg) #df["data"].append(str(data)) #df["total_samples"] = model.total_samples #df["negative"].append(model.negative) #df["walk_window"].append(model.walk_window) #df["walk_probability"].append(model.walk_probability) #df["L2"].append(model.l2) #logging.info("ratio: %.4f : f1_micro %.4f, f1_macro %.4f" % (r,micros,macros)) if fast: if return_y: return micros, macros, Y_test, prediction return micros, macros else: return pd.DataFrame(df)
def get_optimal_codes(Y, C, only_topk=None): csr_codes = smat_util.sorted_csr(Y.dot(C).tocsr(), only_topk=only_topk) csr_codes = sk_normalize(csr_codes, axis=1, copy=False, norm='l1') return csr_codes
# For division according to the equation eigval_shifted = np.roll(sorted_eigval, -1) # Thresholding eigen values because we don't need very low eigan values due to errors eigval_thresh = 0.1 sorted_eigval = sorted_eigval[sorted_eigval > eigval_thresh] eigval_shifted = eigval_shifted[:sorted_eigval.shape[0]] # Don't take the first value for calculations, if first value is large, following equation will return k=1, and we want more than one clusters # Get the argmax of the division, since its 0 indexed, add 1 k = np.argmax(sorted_eigval[1:] / eigval_shifted[1:]) + 2 print(f'Number of Eigen vectors to pick: {k}') # Get the indexes of eigen vectors idexes = sorted_eigval_idx[:k] A_eigvec = eigvec[:, idexes] np.savetxt(embeddings_path, A_eigvec, delimiter='\t') # embeddings for viz A_eigvec_norm = sk_normalize(A_eigvec) # l2 normalized kmeans = KMeans(n_clusters=number_of_clusters, init='k-means++', random_state=random_state) kmeans.fit(A_eigvec) labels = kmeans.labels_ subs = pysrt.open(srt_path, encoding="utf-8") convert_to_s = lambda st: (st.hours * 60 * 60) + \ (st.minutes * 60) +\ (st.seconds) #+ \ #(st.milliseconds / 1000) get_start_and_end = lambda sub: (convert_to_s(sub.start), convert_to_s(sub.end)) for sub in subs: