def make_hdiag_csf(h1e, eri, norb, nelec, smult, csd_mask=None, hdiag_det=None): if hdiag_det is None: hdiag_det = make_hdiag_det(h1e, eri, norb, nelec) eri = ao2mo.restore(1, eri, norb) tlib = wlib = 0 neleca, nelecb = _unpack_nelec(nelec) min_npair, npair_csd_offset, npair_dconf_size, npair_sconf_size, npair_sdet_size = get_csdaddrs_shape( norb, neleca, nelecb) _, npair_csf_offset, _, _, npair_csf_size = get_csfvec_shape( norb, neleca, nelecb, smult) npair_econf_size = npair_dconf_size * npair_sconf_size max_npair = nelecb ncsf_all = count_all_csfs(norb, neleca, nelecb, smult) ndeta_all = cistring.num_strings(norb, neleca) ndetb_all = cistring.num_strings(norb, nelecb) ndet_all = ndeta_all * ndetb_all hdiag_csf = np.ascontiguousarray(np.zeros(ncsf_all, dtype=np.float64)) hdiag_csf_check = np.ones(ncsf_all, dtype=np.bool) for npair in range(min_npair, max_npair + 1): ipair = npair - min_npair nconf = npair_econf_size[ipair] ndet = npair_sdet_size[ipair] ncsf = npair_csf_size[ipair] if ncsf == 0: continue nspin = neleca + nelecb - 2 * npair csd_offset = npair_csd_offset[ipair] csf_offset = npair_csf_offset[ipair] hdiag_conf = np.ascontiguousarray( np.zeros((nconf, ndet, ndet), dtype=np.float64)) if csd_mask is None: det_addr = get_nspin_dets(norb, neleca, nelecb, nspin).ravel(order='C') else: det_addr = csd_mask[csd_offset:][:nconf * ndet] if ndet == 1: # Closed-shell singlets assert (ncsf == 1) hdiag_csf[csf_offset:][:nconf] = hdiag_det[det_addr.flat] hdiag_csf_check[csf_offset:][:nconf] = False continue det_addra, det_addrb = divmod(det_addr, ndetb_all) det_stra = np.ascontiguousarray( cistring.addrs2str(norb, neleca, det_addra).reshape(nconf, ndet, order='C')) det_strb = np.ascontiguousarray( cistring.addrs2str(norb, nelecb, det_addrb).reshape(nconf, ndet, order='C')) det_addr = det_addr.reshape(nconf, ndet, order='C') hdiag_conf = np.ascontiguousarray( np.zeros((nconf, ndet, ndet), dtype=np.float64)) hdiag_conf_det = np.ascontiguousarray(hdiag_det[det_addr], dtype=np.float64) t1 = time.clock() w1 = time.time() libcsf.FCICSFhdiag(hdiag_conf.ctypes.data_as(ctypes.c_void_p), hdiag_conf_det.ctypes.data_as(ctypes.c_void_p), eri.ctypes.data_as(ctypes.c_void_p), det_stra.ctypes.data_as(ctypes.c_void_p), det_strb.ctypes.data_as(ctypes.c_void_p), ctypes.c_uint(norb), ctypes.c_uint(nconf), ctypes.c_uint(ndet)) tlib += time.clock() - t1 wlib += time.time() - w1 umat = get_spin_evecs(nspin, neleca, nelecb, smult) hdiag_conf = np.tensordot(hdiag_conf, umat, axes=1) hdiag_conf *= umat[np.newaxis, :, :] hdiag_csf[csf_offset:][:nconf * ncsf] = hdiag_conf.sum(1).ravel(order='C') hdiag_csf_check[csf_offset:][:nconf * ncsf] = False assert (np.count_nonzero(hdiag_csf_check) == 0 ), np.count_nonzero(hdiag_csf_check) #print ("Time in hdiag_csf library: {}, {}".format (tlib, wlib)) return hdiag_csf
def _transform_det2csf (inparr, norb, neleca, nelecb, smult, reverse=False, csd_mask=None, project=False): ''' Must take an array of shape (*, ndet) or (*, ncsf) ''' t_start = time.time () time_umat = 0 time_mult = 0 time_getdet = 0 size_umat = 0 s = (smult - 1) / 2 ms = (neleca - nelecb) / 2 min_npair, npair_csd_offset, npair_dconf_size, npair_sconf_size, npair_sdet_size = csdstring.get_csdaddrs_shape (norb, neleca, nelecb) _, npair_csf_offset, _, _, npair_csf_size = get_csfvec_shape (norb, neleca, nelecb, smult) nrow = inparr.shape[0] ndeta_all = special.comb (norb, neleca, exact=True) ndetb_all = special.comb (norb, nelecb, exact=True) ndet_all = ndeta_all * ndetb_all ncsf_all = count_all_csfs (norb, neleca, nelecb, smult) ncol_out = (ncsf_all, ndet_all)[reverse or project] ncol_in = (ncsf_all, ndet_all)[~reverse or project] if not project: outarr = np.ascontiguousarray (np.zeros ((nrow, ncol_out), dtype=np.float_)) csf_addrs = np.zeros (ncsf_all, dtype=np.bool_) # Initialization is necessary because not all determinants have a csf for all spin states #max_npair = min (nelecb, (neleca + nelecb - int (round (2*s))) // 2) max_npair = min (neleca, nelecb) for npair in range (min_npair, max_npair+1): ipair = npair - min_npair ncsf = npair_csf_size[ipair] nspin = neleca + nelecb - 2*npair nconf = npair_dconf_size[ipair] * npair_sconf_size[ipair] ndet = npair_sdet_size[ipair] csf_offset = npair_csf_offset[ipair] csd_offset = npair_csd_offset[ipair] if (ncsf == 0) and not project: continue if not project: csf_addrs[:] = False csf_addrs_ipair = csf_addrs[csf_offset:][:nconf*ncsf].reshape (nconf, ncsf) # Note: this is a view, i.e., a pointer t_ref = time.time () if csd_mask is None: det_addrs = csdstring.get_nspin_dets (norb, neleca, nelecb, nspin) else: det_addrs = csd_mask[csd_offset:][:nconf*ndet].reshape (nconf, ndet, order='C') assert (det_addrs.shape[0] == nconf) assert (det_addrs.shape[1] == ndet) time_getdet += time.time () - t_ref if (ncsf == 0): inparr[:,det_addrs] = 0 continue t_ref = time.time () umat = np.asarray_chkfinite (get_spin_evecs (nspin, neleca, nelecb, smult)) size_umat = max (size_umat, umat.nbytes) ncsf_blk = ncsf # later on I can use this variable to implement a generator form of get_spin_evecs to save memory when there are too many csfs assert (umat.shape[0] == ndet) assert (umat.shape[1] == ncsf_blk) if project: Pmat = np.dot (umat, umat.T) time_umat += time.time () - t_ref if not project: csf_addrs_ipair[:,:ncsf_blk] = True # Note: edits csf_addrs # The elements of csf_addrs and det_addrs are addresses for the flattened vectors and matrices (inparr.flat and outarr.flat) # Passing them unflattened as indices of the flattened arrays should result in a 3-dimensional array if I understand numpy's indexing rules correctly # For the lvalues, I think it's necessary to flatten csf_addrs and det_addrs to avoid an exception # Hopefully this is parallel under the hood, and hopefully the OpenMP reduction epsilon doesn't ruin the spin eigenvectors t_ref = time.time () if project: inparr[:,det_addrs] = np.tensordot (inparr[:,det_addrs], Pmat, axes=1) elif not reverse: outarr[:,csf_addrs] = np.tensordot (inparr[:,det_addrs], umat, axes=1).reshape (nrow, ncsf_blk*nconf) else: outarr[:,det_addrs] = np.tensordot (inparr[:,csf_addrs].reshape (nrow, nconf, ncsf_blk), umat, axes=((2,),(1,))) time_mult += time.time () - t_ref if project: outarr = inparr else: outarr = outarr.reshape (nrow, ncol_out) d = ['determinants','csfs'] ''' print (('Transforming {} into {} summary: {:.2f} seconds to get determinants,' ' {:.2f} seconds to build umat, {:.2f} seconds matrix-vector multiplication, ' {:.2f} MB largest umat').format (d[reverse], d[~reverse], time_getdet, time_umat, ' {:.2f} MB largest umat').format (d[reverse], d[~reverse], time_getdet, time_umat, time_mult, size_umat / 1e6)) print ('Total time spend in _transform_det2csf: {:.2f} seconds'.format (time.time () - t_start)) ''' return outarr
def make_hdiag_csf_slower(h1e, eri, norb, nelec, smult, csd_mask=None, hdiag_det=None): ''' This is tricky because I need the diagonal blocks for each configuration in order to get the correct csf hdiag values, not just the diagonal elements for each determinant. ''' t0, w0 = time.clock(), time.time() tstr = tlib = tloop = wstr = wlib = wloop = 0 if hdiag_det is None: hdiag_det = make_hdiag_det(h1e, eri, norb, nelec) eri = ao2mo.restore(1, eri, norb) neleca, nelecb = _unpack_nelec(nelec) min_npair, npair_csd_offset, npair_dconf_size, npair_sconf_size, npair_sdet_size = get_csdaddrs_shape( norb, neleca, nelecb) _, npair_csf_offset, _, _, npair_csf_size = get_csfvec_shape( norb, neleca, nelecb, smult) npair_econf_size = npair_dconf_size * npair_sconf_size max_npair = nelecb ncsf_all = count_all_csfs(norb, neleca, nelecb, smult) ndeta_all = cistring.num_strings(norb, neleca) ndetb_all = cistring.num_strings(norb, nelecb) ndet_all = ndeta_all * ndetb_all hdiag_csf = np.ascontiguousarray(np.zeros(ncsf_all, dtype=np.float64)) hdiag_csf_check = np.ones(ncsf_all, dtype=np.bool) for npair in range(min_npair, max_npair + 1): ipair = npair - min_npair nconf = npair_econf_size[ipair] ndet = npair_sdet_size[ipair] ncsf = npair_csf_size[ipair] if ncsf == 0: continue nspin = neleca + nelecb - 2 * npair csd_offset = npair_csd_offset[ipair] csf_offset = npair_csf_offset[ipair] hdiag_conf = np.ascontiguousarray( np.zeros((nconf, ndet, ndet), dtype=np.float64)) if csd_mask is None: det_addr = get_nspin_dets(norb, neleca, nelecb, nspin).ravel(order='C') else: det_addr = csd_mask[csd_offset:][:nconf * ndet] if ndet == 1: # Closed-shell singlets assert (ncsf == 1) hdiag_csf[csf_offset:][:nconf] = hdiag_det[det_addr.flat] hdiag_csf_check[csf_offset:][:nconf] = False continue umat = get_spin_evecs(nspin, neleca, nelecb, smult) det_addra, det_addrb = divmod(det_addr, ndetb_all) t1, w1 = time.clock(), time.time() det_stra = cistring.addrs2str(norb, neleca, det_addra).reshape(nconf, ndet, order='C') det_strb = cistring.addrs2str(norb, nelecb, det_addrb).reshape(nconf, ndet, order='C') tstr += time.clock() - t1 wstr += time.time() - w1 det_addr = det_addr.reshape(nconf, ndet, order='C') diag_idx = np.diag_indices(ndet) triu_idx = np.triu_indices(ndet) ipair_check = 0 # It looks like the library call below is, itself, usually responsible for about 50% of the # clock and wall time that this function consumes. t1, w1 = time.clock(), time.time() for iconf in range(nconf): addr = det_addr[iconf] assert (len(addr) == ndet) stra = det_stra[iconf] strb = det_strb[iconf] t2, w2 = time.clock(), time.time() libfci.FCIpspace_h0tril( hdiag_conf[iconf].ctypes.data_as(ctypes.c_void_p), h1e.ctypes.data_as(ctypes.c_void_p), eri.ctypes.data_as(ctypes.c_void_p), stra.ctypes.data_as(ctypes.c_void_p), strb.ctypes.data_as(ctypes.c_void_p), ctypes.c_int(norb), ctypes.c_int(ndet)) tlib += time.clock() - t2 wlib += time.time() - w2 #hdiag_conf[iconf][diag_idx] = hdiag_det[addr] #hdiag_conf[iconf] = lib.hermi_triu(hdiag_conf[iconf]) for iconf in range(nconf): hdiag_conf[iconf] = lib.hermi_triu(hdiag_conf[iconf]) for iconf in range(nconf): hdiag_conf[iconf][diag_idx] = hdiag_det[det_addr[iconf]] tloop += time.clock() - t1 wloop += time.time() - w1 hdiag_conf = np.tensordot(hdiag_conf, umat, axes=1) hdiag_conf = (hdiag_conf * umat[np.newaxis, :, :]).sum(1) hdiag_csf[csf_offset:][:nconf * ncsf] = hdiag_conf.ravel(order='C') hdiag_csf_check[csf_offset:][:nconf * ncsf] = False assert (np.count_nonzero(hdiag_csf_check) == 0 ), np.count_nonzero(hdiag_csf_check) #print ("Total time in hdiag_csf: {}, {}".format (time.clock () - t0, time.time () - w0)) #print (" Loop: {}, {}".format (tloop, wloop)) #print (" Library: {}, {}".format (tlib, wlib)) #print (" Cistring: {}, {}".format (tstr, wstr)) return hdiag_csf