示例#1
0
def create_correlation_test_docs():
    """
    Creates JSON file containing a certain number of materials and their
    necessary properties to load the propnet store for correlation tests.

    """
    n_materials = 200
    pnstore = MongograntStore("ro:mongodb03.nersc.gov/propnet",
                              "propnet_july2019")
    pnstore.connect()
    cursor = pnstore.query(criteria={
        '$and': [{
            '$or': [{
                p: {
                    '$exists': True
                }
            }, {
                'inputs.symbol_type': p
            }]
        } for p in PROPNET_PROPS]
    },
                           properties=['task_id', 'inputs'] +
                           [p + '.mean' for p in PROPNET_PROPS] +
                           [p + '.units' for p in PROPNET_PROPS] +
                           [p + '.quantities' for p in PROPNET_PROPS])
    data = []
    for item in cursor:
        if len(data) < n_materials:
            data.append(item)
        else:
            cursor.close()
            break
    dumpfn(data, os.path.join(CORR_TEST_DIR, "correlation_propnet_data.json"))
示例#2
0
def process_sdf_file(filename):
    mp_pubchem = MongograntStore("rw:knowhere.lbl.gov/mp_pubchem",
                                 "mp_pubchem",
                                 key="pubchem_id")
    mp_pubchem.connect()
    coll = mp_pubchem.collection

    skipped = 0
    pubchem_molecules = []
    for i, mol in enumerate(pybel.readfile("sdf", filename)):
        try:
            pubchem_id = int(mol.data["PUBCHEM_COMPOUND_CID"])
            xyz = mol.write(format="xyz")

            data = {"pubchem_id": pubchem_id, "xyz": xyz}
            for key in keys:
                if key in mol.data:
                    data[key_map[key]] = mol.data[key]

            pubchem_molecules.append(data)

        except KeyError:
            skipped += 1

    coll.insert_many(pubchem_molecules)

    os.rename(filename, filename + ".processed")
    return len(pubchem_molecules), skipped
示例#3
0
 def create_test_docs(self):
     from maggma.advanced_stores import MongograntStore
     from monty.serialization import dumpfn
     pnstore = MongograntStore("ro:knowhere.lbl.gov/mp_core", "propnet")
     pnstore.connect()
     mpstore = MongograntStore("ro:knowhere.lbl.gov/mp_core", "materials")
     mpstore.connect()
     cursor = pnstore.query(criteria={
         '$and': [{
             '$or': [{
                 p: {
                     '$exists': True
                 }
             }, {
                 'inputs.symbol_type': p
             }]
         } for p in self.propnet_props]
     },
                            properties=['task_id'])
     pn_mpids = [item['task_id'] for item in cursor]
     cursor = mpstore.query(
         criteria={p: {
             '$exists': True
         }
                   for p in self.mp_query_props},
         properties=['task_id'])
     mp_mpids = [item['task_id'] for item in cursor]
     mpids = list(set(pn_mpids).intersection(set(mp_mpids)))[:200]
     pn_data = pnstore.query(criteria={'task_id': {
         '$in': mpids
     }},
                             properties=['task_id', 'inputs'] +
                             [p + '.mean' for p in self.propnet_props] +
                             [p + '.units' for p in self.propnet_props])
     dumpfn(list(pn_data),
            os.path.join(TEST_DIR, "correlation_propnet_data.json"))
     mp_data = mpstore.query(criteria={'task_id': {
         '$in': mpids
     }},
                             properties=['task_id'] + self.mp_query_props)
     dumpfn(list(mp_data), os.path.join(TEST_DIR,
                                        "correlation_mp_data.json"))