Exemplo n.º 1
0
def master(src_cfg, suffix_in, suffix_out, K, N, nr_processes, double_norm):
    D = 64

    dataset = Dataset(src_cfg, nr_clusters=K)
    samples = [
        str(sample) for sample in dataset.get_data('train')[0] +
        dataset.get_data('test')[0]
    ]

    if double_norm:
        worker = double_normalization
        suffix = '.double_norm'
        gmm = load_gmm(
            os.path.join(dataset.FEAT_DIR + suffix_in, 'gmm', 'gmm_%d' % K))
    else:
        worker = merge
        suffix = ''
        gmm = None

    path_in = os.path.join(dataset.FEAT_DIR + suffix_in,
                           'statistics_k_%d' % dataset.VOC_SIZE, 'stats.tmp')
    path_out = os.path.join(dataset.FEAT_DIR + suffix_out,
                            'statistics_k_%d' % dataset.VOC_SIZE,
                            'stats.tmp' + suffix)

    sstats_in = SstatsMap(path_in)
    sstats_out = SstatsMap(path_out)

    len_sstats = dataset.VOC_SIZE + 2 * D * dataset.VOC_SIZE

    kwargs = {
        'N': N,
        'sstats_in': sstats_in,
        'sstats_out': sstats_out,
        'len_sstats': len_sstats,
        'gmm': gmm
    }

    if nr_processes > 1:
        nr_samples_per_process = len(samples) / nr_processes + 1
        for ii in xrange(nr_processes):
            mp.Process(target=worker,
                       args=(samples[ii * nr_samples_per_process:(ii + 1) *
                                     nr_samples_per_process], ),
                       kwargs=kwargs).start()
    else:
        worker(samples, **kwargs)
def get_slice_data_from_file(dataset, split, class_idx, gmm, nr_pos, nr_neg):
    samples = _get_samples(dataset, class_idx, data_type=split, nr_pos=nr_pos,
                           nr_neg=nr_neg)[0]
    len_descs = gmm.k + 2 * gmm.d * gmm.k
    sstats, labels, info = SstatsMap(
        os.path.join(dataset.SSTATS_DIR, 'stats.tmp')).get_merged(
            samples, len_descs)
    sstats = sstats.reshape((-1, len_descs))
    binary_labels = tuple_labels_to_list_labels(labels, class_idx)
    return SliceData(sstats, binary_labels, info)