Exemplo n.º 1
0
def main(args):
    from cifpdfsearch.cifpdf import HDFStorage
    from cifpdfsearch import config, normcodid
    from cifpdfsearch._utils import getargswithstdin
    if args.config:
        config.initialize(args.config)
    npyfiles = list(getargswithstdin(args.args))
    filename = args.output if args.output is not None else config.PDFSTORAGE
    hdb = HDFStorage(filename)
    # TODO - add check for existing config in HDFStorage
    hdb.writeConfig(config.PDFCALCULATOR)
    r = hdb.rgrid
    progress_step = len(npyfiles) / (80.0 + 1)
    progress_next = 0
    for i, f in enumerate(npyfiles):
        if os.path.getsize(f) == 0:
            continue
        g = numpy.load(f)
        codid = normcodid(os.path.basename(f))
        hdb.writePDF(codid, r, g)
        if i >= progress_next:
            print('.', end='', flush=True)
            progress_next += progress_step
    print()
    return
Exemplo n.º 2
0
def codsearch_composition(composition, tolerance):
    from diffpy.pdfgetx.functs import composition_analysis
    from elasticsearch import Elasticsearch
    from elasticsearch.helpers import scan
    es = Elasticsearch(ELASTICHOST)
    smbls, counts = composition_analysis(composition)
    if tolerance == 0:
        mustterms = [{
            'term': {
                ("composition." + s): c
            }
        } for s, c in zip(smbls, counts)]
        q = {"bool": {"must": mustterms}}
    else:
        rangeterms = [{
            'range': {
                ("composition." + s): {
                    "gte": c - tolerance,
                    "lte": c + tolerance,
                }
            }
        } for s, c in zip(smbls, counts)]
        q = {"bool": {"must": rangeterms}}
    gscan = scan(es,
                 query={'query': q},
                 index='cod',
                 doc_type='cif',
                 _source=False)
    for e in gscan:
        codid = normcodid(e['_id'])
        yield codid
    pass
Exemplo n.º 3
0
 def readPDF(self, codid):
     scid = normcodid(codid)
     dsname = self._dspdfpath.format(scid)
     with self._openhdf('r') as hfile:
         g = hfile[dsname][()]
     rv = (self.rgrid, g)
     return rv
Exemplo n.º 4
0
def main(args):
    from cifpdfsearch import config, cifpdf, normcodid
    from cifpdfsearch._utils import getargswithstdin
    from numpy.random import randint
    if args.config:
        config.initialize(args.config)
    if not os.path.isdir(args.output):
        emsg = "{} must be a directory".format(args.output)
        raise ValueError(emsg)
    ciflist = list(getargswithstdin(args.cifs))
    pdfc = cifpdf.calculator.fromConfig(config.PDFCALCULATOR)
    while ciflist:
        cf = ciflist.pop(randint(len(ciflist)))
        codid = normcodid(cf)
        out = os.path.join(args.output, 'cod{}.npy'.format(codid))
        if not args.force and os.path.isfile(out):
            continue
        # create the file
        open(out, 'w').close()
        try:
            r, g = calculate(pdfc, cf)
        except Exception as e:
            print('{}: {} {}'.format(cf, type(e).__name__, e))
            continue
        numpy.save(out, g.astype(numpy.float32))
    return
Exemplo n.º 5
0
 def readPDF(self, codid):
     cid = codid
     if isinstance(cid, str):
         cid = int(normcodid(cid))
     row = self.index[cid]
     g = self.gdata[row]
     rv = (self.rgrid, g)
     return rv
Exemplo n.º 6
0
 def writePDF(self, codid, r, g):
     from numpy import allclose
     if r.shape != self.rgrid.shape or not allclose(r, self.rgrid):
         emsg = "r must equal the {} dataset".format(self._dsrgridpath)
         raise ValueError(emsg)
     scid = normcodid(codid)
     nm = self._dspdfpath.format(scid)
     with self._openhdf('a') as hfile:
         ds = hfile.require_dataset(nm, shape=g.shape, dtype=self.dtype)
         ds[:] = g
     return
Exemplo n.º 7
0
def genidpdf_all_hdf(hfile):
    grp = hfile['pdfc']
    for n, v in grp.items():
        yield normcodid(n), v[()]
    pass