def c_code(self, node, name, ins, outs, sub): # support old pickled graphs if len(ins) == 2: (pvals, unis) = ins n = 1 else: (pvals, unis, n) = ins (z,) = outs if self.odtype == "auto": t = f"PyArray_TYPE({pvals})" else: t = Scalar(self.odtype).dtype_specs()[1] if t.startswith("theano_complex"): t = t.replace("theano_complex", "NPY_COMPLEX") else: t = t.upper() fail = sub["fail"] return ( """ if (PyArray_NDIM(%(pvals)s) != 2) { PyErr_Format(PyExc_TypeError, "pvals ndim should be 2"); %(fail)s; } if (PyArray_NDIM(%(unis)s) != 1) { PyErr_Format(PyExc_TypeError, "unis ndim should be 2"); %(fail)s; } if (PyArray_DIMS(%(unis)s)[0] != (PyArray_DIMS(%(pvals)s)[0] * %(n)s)) { PyErr_Format(PyExc_ValueError, "unis.shape[0] != pvals.shape[0] * n"); %(fail)s; } if ((NULL == %(z)s) || ((PyArray_DIMS(%(z)s))[0] != (PyArray_DIMS(%(pvals)s))[0]) || ((PyArray_DIMS(%(z)s))[1] != (PyArray_DIMS(%(pvals)s))[1]) ) { Py_XDECREF(%(z)s); %(z)s = (PyArrayObject*) PyArray_EMPTY(2, PyArray_DIMS(%(pvals)s), %(t)s, 0); if (!%(z)s) { PyErr_SetString(PyExc_MemoryError, "failed to alloc z output"); %(fail)s; } } { // NESTED SCOPE const int nb_multi = PyArray_DIMS(%(pvals)s)[0]; const int nb_outcomes = PyArray_DIMS(%(pvals)s)[1]; const int n_samples = %(n)s; // // For each multinomial, loop over each possible outcome // for (int c = 0; c < n_samples; ++c){ for (int n = 0; n < nb_multi; ++n) { int waiting = 1; double cummul = 0.; const dtype_%(unis)s* unis_n = (dtype_%(unis)s*)PyArray_GETPTR1(%(unis)s, c*nb_multi + n); for (int m = 0; m < nb_outcomes; ++m) { dtype_%(z)s* z_nm = (dtype_%(z)s*)PyArray_GETPTR2(%(z)s, n,m); const dtype_%(pvals)s* pvals_nm = (dtype_%(pvals)s*)PyArray_GETPTR2(%(pvals)s, n,m); cummul += *pvals_nm; if (c == 0) { if (waiting && (cummul > *unis_n)) { *z_nm = 1.; waiting = 0; } else { // if we re-used old z pointer, we have to clear it out. *z_nm = 0.; } } else { if (cummul > *unis_n) { *z_nm = *z_nm + 1.; break; } } } } } } // END NESTED SCOPE """ % locals() )
def c_code(self, node, name, ins, outs, sub): (pvals, unis, n) = ins (z,) = outs replace = int(self.replace) if self.odtype == "auto": t = "NPY_INT64" else: t = Scalar(self.odtype).dtype_specs()[1] if t.startswith("theano_complex"): t = t.replace("theano_complex", "NPY_COMPLEX") else: t = t.upper() fail = sub["fail"] return ( """ // create a copy of pvals matrix PyArrayObject* pvals_copy = NULL; if (PyArray_NDIM(%(pvals)s) != 2) { PyErr_Format(PyExc_TypeError, "pvals ndim should be 2"); %(fail)s; } if (PyArray_NDIM(%(unis)s) != 1) { PyErr_Format(PyExc_TypeError, "unis ndim should be 2"); %(fail)s; } if ( %(n)s > (PyArray_DIMS(%(pvals)s)[1]) ) { PyErr_Format(PyExc_ValueError, "Cannot sample without replacement n samples bigger than the size of the distribution."); %(fail)s; } if (PyArray_DIMS(%(unis)s)[0] != (PyArray_DIMS(%(pvals)s)[0] * %(n)s)) { PyErr_Format(PyExc_ValueError, "unis.shape[0] != pvals.shape[0] * n"); %(fail)s; } pvals_copy = (PyArrayObject*) PyArray_EMPTY(2, PyArray_DIMS(%(pvals)s), PyArray_TYPE(%(pvals)s), 0); if (!pvals_copy) { PyErr_SetString(PyExc_MemoryError, "failed to alloc pvals_copy"); %(fail)s; } PyArray_CopyInto(pvals_copy, %(pvals)s); if ((NULL == %(z)s) || ((PyArray_DIMS(%(z)s))[0] != (PyArray_DIMS(%(pvals)s))[0]) || ((PyArray_DIMS(%(z)s))[1] != %(n)s) ) { Py_XDECREF(%(z)s); npy_intp dims[2]; dims[0] = PyArray_DIMS(%(pvals)s)[0]; dims[1] = %(n)s; %(z)s = (PyArrayObject*) PyArray_EMPTY(2, dims, %(t)s, -1); if (!%(z)s) { PyErr_SetString(PyExc_MemoryError, "failed to alloc z output"); %(fail)s; } } { // NESTED SCOPE const int nb_multi = PyArray_DIMS(%(pvals)s)[0]; const int nb_outcomes = PyArray_DIMS(%(pvals)s)[1]; const int n_samples = %(n)s; // // For each multinomial, loop over each possible outcome, // and set selected pval to 0 after being selected // for (int c = 0; c < n_samples; ++c){ for (int n = 0; n < nb_multi; ++n) { double cummul = 0.; const dtype_%(unis)s* unis_n = (dtype_%(unis)s*)PyArray_GETPTR1(%(unis)s, c*nb_multi + n); dtype_%(z)s* z_nc = (dtype_%(z)s*)PyArray_GETPTR2(%(z)s, n, c); for (int m = 0; m < nb_outcomes; ++m) { dtype_%(pvals)s* pvals_nm = (dtype_%(pvals)s*)PyArray_GETPTR2(pvals_copy, n, m); cummul += *pvals_nm; if (cummul > *unis_n) { *z_nc = m; // No need to renormalize after the last samples. if (c == (n_samples - 1)) break; if (! %(replace)s ) { // renormalize the nth row of pvals, reuse (cummul-*pvals_nm) to initialize the sum dtype_%(pvals)s sum = cummul - *pvals_nm; dtype_%(pvals)s* pvals_n = (dtype_%(pvals)s*)PyArray_GETPTR2(pvals_copy, n, m); *pvals_nm = 0.; for (int k = m; k < nb_outcomes; ++k) { sum = sum + *pvals_n; pvals_n++; } pvals_n = (dtype_%(pvals)s*)PyArray_GETPTR2(pvals_copy, n, 0); for (int k = 0; k < nb_outcomes; ++k) { *pvals_n = *pvals_n / sum; pvals_n++; } } break; } } } } // delete pvals_copy { Py_XDECREF(pvals_copy); } } // END NESTED SCOPE """ % locals() )