示例#1
0
def clear_env():
    sma = SubmissionMongoAdapter.auto_load()

    lp = LaunchPad.auto_load()

    snl = SNLMongoAdapter.auto_load()

    db_dir = os.environ['DB_LOC']
    db_path = os.path.join(db_dir, 'tasks_db.json')
    with open(db_path) as f:
        db_creds = json.load(f)

    sma._reset()
    lp.reset('', require_password=False)
    snl._reset()

    conn = MongoClient(db_creds['host'], db_creds['port'])
    db = conn[db_creds['database']]
    db.authenticate(db_creds['admin_user'], db_creds['admin_password'])
    db.tasks.remove()
    db.boltztrap.remove()
    db.counter.remove()
    db['dos_fs.chunks'].remove()
    db['dos_fs.files'].remove()
    db['band_structure_fs.files'].remove()
    db['band_structure_fs.files'].remove()
示例#2
0
def clear_env():
    sma = SubmissionMongoAdapter.auto_load()

    lp = LaunchPad.auto_load()

    snl = SNLMongoAdapter.auto_load()

    db_dir = os.environ['DB_LOC']
    db_path = os.path.join(db_dir, 'tasks_db.json')
    with open(db_path) as f:
        db_creds = json.load(f)

    sma._reset()
    lp.reset('', require_password=False)
    snl._reset()

    conn = MongoClient(db_creds['host'], db_creds['port'])
    db = conn[db_creds['database']]
    db.authenticate(db_creds['admin_user'], db_creds['admin_password'])
    db.tasks.remove()
    db.boltztrap.remove()
    db.counter.remove()
    db['dos_fs.chunks'].remove()
    db['dos_fs.files'].remove()
    db['band_structure_fs.files'].remove()
    db['band_structure_fs.files'].remove()
示例#3
0
    def run_task(self, fw_spec):
        sma = SNLMongoAdapter.auto_load()
        snl = fw_spec['snl']
        mpsnl, snlgroup_id, spec_group = sma.add_snl(snl)
        mod_spec = [{"_push": {"run_tags": "species_group={}".format(spec_group)}}] if spec_group else None

        return FWAction(update_spec={'mpsnl': mpsnl.as_dict(), 'snlgroup_id': snlgroup_id}, mod_spec=mod_spec)
示例#4
0
def submit_all_snl(min=None, max=None):
    constraints = {
        'is_ordered': True,
        'is_valid': True,
        'nsites': {
            '$lte': 200
        },
        'canonical_snl.about.projects': {
            '$ne': 'CederDahn Challenge'
        }
    }
    constraints['elements'] = {'$nin': NO_POTCARS}
    constraints['canonical_snl.about.history.name'] = {
        "$ne": "Materials Project structure optimization"
    }
    constraints['canonical_snl.about.remarks'] = {"$ne": "DEPRECATED"}

    if min and max:
        constraints['snlgroup_id'] = {'$gte': min, '$lte': max}
    elif min or max:
        raise ValueError('Must specify both min AND max if you specify one')

    snldb = SNLMongoAdapter.auto_load()
    sma = SubmissionMongoAdapter.auto_load()

    for result in snldb.snlgroups.find(constraints, {
            'canonical_snl': 1,
            'snlgroup_id': 1
    }):
        snl = MPStructureNL.from_dict(result['canonical_snl'])
        parameters = {'snlgroup_id': result['snlgroup_id']}
        sma.submit_snl(snl,
                       'Anubhav Jain <*****@*****.**>',
                       parameters=parameters)
示例#5
0
    def run_task(self, fw_spec):
        # get the SNL mongo adapter
        sma = SNLMongoAdapter.auto_load()

        # get the SNL
        snl = StructureNL.from_dict(fw_spec['snl'])

        # add snl
        mpsnl, snlgroup_id = sma.add_snl(snl)

        return FWAction(update_spec={'mpsnl': mpsnl.to_dict, 'snlgroup_id': snlgroup_id})
示例#6
0
    def run_task(self, fw_spec):
        # pass-through option for when we start with an mpsnl and don't actually want to add
        if 'force_mpsnl' in fw_spec and 'force_snlgroup_id' in fw_spec:
            print 'USING FORCED MPSNL'
            return FWAction(update_spec={'mpsnl': fw_spec['force_mpsnl'], 'snlgroup_id': fw_spec['force_snlgroup_id']})

        sma = SNLMongoAdapter.auto_load()
        snl = StructureNL.from_dict(fw_spec['snl'])
        mpsnl, snlgroup_id = sma.add_snl(snl)

        return FWAction(update_spec={'mpsnl': mpsnl.to_dict, 'snlgroup_id': snlgroup_id})
示例#7
0
    def setup(cls):
        module_dir = os.path.dirname(os.path.abspath(__file__))
        snl_f = os.path.join(module_dir, 'snl.yaml')
        cls.snldb = SNLMongoAdapter.from_file(snl_f)

        tasks_f = os.path.join(module_dir, 'materials.yaml')
        with open(tasks_f) as f2:
            task_creds = yaml.load(f2)

        mc = MongoClient(task_creds['host'], task_creds['port'])
        db = mc[task_creds['database']]
        db.authenticate(task_creds['admin_user'], task_creds['admin_password'])
        cls.materials = db[task_creds['collection']]
示例#8
0
    def run_task(self, fw_spec):
        sma = SNLMongoAdapter.auto_load()
        snl = StructureNL.from_dict(fw_spec['snl'])
        mpsnl, snlgroup_id, spec_group = sma.add_snl(snl)
        mod_spec = [{
            "_push": {
                "run_tags": "species_group={}".format(spec_group)
            }
        }] if spec_group else None

        return FWAction(update_spec={
            'mpsnl': mpsnl.to_dict,
            'snlgroup_id': snlgroup_id
        },
                        mod_spec=mod_spec)
示例#9
0
def submit_all_snl(min=None, max=None):
    constraints = {'is_ordered': True, 'is_valid': True, 'nsites': {'$lte': 200}, 'canonical_snl.about.projects': {'$ne': 'CederDahn Challenge'}}
    constraints['elements'] = {'$nin': NO_POTCARS}
    constraints['canonical_snl.about.history.name'] = {"$ne":"Materials Project structure optimization"}
    constraints['canonical_snl.about.remarks'] = {"$ne": "DEPRECATED"}

    if min and max:
        constraints['snlgroup_id'] = {'$gte': min, '$lte': max}
    elif min or max:
        raise ValueError('Must specify both min AND max if you specify one')

    snldb = SNLMongoAdapter.auto_load()
    sma = SubmissionMongoAdapter.auto_load()

    for result in snldb.snlgroups.find(constraints, {'canonical_snl': 1, 'snlgroup_id': 1}):
        snl = MPStructureNL.from_dict(result['canonical_snl'])
        parameters = {'snlgroup_id': result['snlgroup_id']}
        sma.submit_snl(snl, 'Anubhav Jain <*****@*****.**>', parameters=parameters)
示例#10
0
    def run_task(self, fw_spec):
        # pass-through option for when we start with an mpsnl and don't actually want to add
        if 'force_mpsnl' in fw_spec and 'force_snlgroup_id' in fw_spec:
            print 'USING FORCED MPSNL'
            return FWAction(
                update_spec={
                    'mpsnl': fw_spec['force_mpsnl'],
                    'snlgroup_id': fw_spec['force_snlgroup_id']
                })

        sma = SNLMongoAdapter.auto_load()
        snl = StructureNL.from_dict(fw_spec['snl'])
        mpsnl, snlgroup_id = sma.add_snl(snl)

        return FWAction(update_spec={
            'mpsnl': mpsnl.to_dict,
            'snlgroup_id': snlgroup_id
        })
示例#11
0
def archive_deprecated_fws():
    # find all snlgroups that are deprecated, and archive all WFs that have deprecated fw_ids so we don't run them
    module_dir = os.path.dirname(os.path.abspath(__file__))
    snl_f = os.path.join(module_dir, 'snl.yaml')
    snldb = SNLMongoAdapter.from_file(snl_f)
    snlgroups = snldb.snlgroups

    lp_f = os.path.join(module_dir, 'my_launchpad.yaml')
    lpdb = LaunchPad.from_file(lp_f)

    for g in snlgroups.find({'canonical_snl.about.remarks':'DEPRECATED'}, {'snlgroup_id': 1}):
        while lpdb.fireworks.find_one({'spec.snlgroup_id': g['snlgroup_id'], 'state': {'$ne': 'ARCHIVED'}}, {'fw_id': 1}):
            fw = lpdb.fireworks.find_one({'spec.snlgroup_id': g['snlgroup_id'], 'state': {'$ne': 'ARCHIVED'}}, {'fw_id': 1})
            print fw['fw_id']
            lpdb.archive_wf(fw['fw_id'])


    print 'DONE'
示例#12
0
def find_alternate_canonical():
    # see if we can replace a deprecated canonical SNL with a non-deprecated one

    module_dir = os.path.dirname(os.path.abspath(__file__))

    snl_f = os.path.join(module_dir, 'snl.yaml')
    snldb = SNLMongoAdapter.from_file(snl_f)
    snl = snldb.snl
    snlgroups = snldb.snlgroups

    for g in snlgroups.find({"canonical_snl.about.remarks":"DEPRECATED"}, {"snlgroup_id": 1, "all_snl_ids": 1}):
        for s in snl.find({"snl_id": {"$in": g['all_snl_ids']}, "about.remarks": {"$ne": "DEPRECATED"}}):
            canonical_mpsnl = MPStructureNL.from_dict(s)
            snldb.switch_canonical_snl(g['snlgroup_id'], canonical_mpsnl)
            print g['snlgroup_id']
            break

    print 'DONE'
示例#13
0
def get_colls():
    colls = namedtuple('Collections', ['snl', 'snlgroups'])
    sma = SNLMongoAdapter.from_file(snl_f)
    lp = LaunchPad.from_file(fw_f)

    colls.snl = sma.snl
    colls.snlgroups = sma.snlgroups
    colls.fireworks = lp.fireworks
    colls.launches = lp.launches

    with open(tasks_f) as f2:
        task_creds = yaml.load(f2)

    mc = MongoClient(task_creds['host'], task_creds['port'])
    db = mc[task_creds['database']]
    db.authenticate(task_creds['admin_user'], task_creds['admin_password'])
    colls.tasks = db['tasks']

    return colls
示例#14
0
    def resubmit(self, submission_id, snl_db=None):
        # see if an SNL object has already been created
        if not snl_db:
            snl_db = SNLMongoAdapter.auto_load()

        mpsnl = None
        snlgroup_id = None
        snl_dict = snl_db.snl.find_one({"about._materialsproject.submission_id": submission_id})
        if snl_dict:
            mpsnl = MPStructureNL.from_dict(snl_dict)
            snlgroup_id = snl_db.snlgroups.find_one({"all_snl_ids": snl_dict['snl_id']}, {"snlgroup_id":1})['snlgroup_id']

        # Now reset the current submission parameters
        updates = {'state': 'SUBMITTED', 'state_details': {}, 'task_dict': {}}

        if mpsnl:
            updates['parameters'] = self.jobs.find_one({'submission_id': submission_id}, {'parameters': 1})['parameters']
            updates['parameters'].update({"mpsnl": mpsnl.as_dict(), "snlgroup_id": snlgroup_id})

        self.jobs.find_and_modify({'submission_id': submission_id}, {'$set': updates})
示例#15
0
    def resubmit(self, submission_id, snl_db=None):
        # see if an SNL object has already been created
        if not snl_db:
            snl_db = SNLMongoAdapter.auto_load()

        mpsnl = None
        snlgroup_id = None
        snl_dict = snl_db.snl.find_one({"about._materialsproject.submission_id": submission_id})
        if snl_dict:
            mpsnl = MPStructureNL.from_dict(snl_dict)
            snlgroup_id = snl_db.snlgroups.find_one({"all_snl_ids": snl_dict['snl_id']}, {"snlgroup_id":1})['snlgroup_id']

        # Now reset the current submission parameters
        updates = {'state': 'SUBMITTED', 'state_details': {}, 'task_dict': {}}

        if mpsnl:
            updates['parameters'] = self.jobs.find_one({'submission_id': submission_id}, {'parameters': 1})['parameters']
            updates['parameters'].update({"mpsnl": mpsnl.to_dict, "snlgroup_id": snlgroup_id})

        self.jobs.find_and_modify({'submission_id': submission_id}, {'$set': updates})
示例#16
0
def detect():
    module_dir = os.path.dirname(os.path.abspath(__file__))
    snl_f = os.path.join(module_dir, 'snl.yaml')
    snldb = SNLMongoAdapter.from_file(snl_f)

    snl = snldb.snl
    snlgroups = snldb.snlgroups
    q = {"about._icsd.icsd_id":{"$exists":True}}  # icsd strctures
    q["about._icsd.coll_code"] =  {"$exists":False} # old ICSD structure
    q["about.history.description.fw_id"] = {"$exists":False} # non structure relaxations

    for old_s in snl.find(q, {"snl_id": 1, 'about._icsd.icsd_id': 1, 'about._materialsproject.deprecated.crystal_id_deprecated': 1}):
        icsd_id = old_s['about']['_icsd']['icsd_id']
        crystal_id = old_s['about']['_materialsproject']['deprecated']['crystal_id_deprecated']

        new_s = snl.find_one({"about._icsd.icsd_id":icsd_id, "about._icsd.coll_code":{"$exists":True}}, {"snl_id": 1})
        if new_s:
            n_groups = snlgroups.find({"all_snl_ids":{"$in":[old_s['snl_id'], new_s['snl_id']]}}).count()
            if n_groups != 1:
                # The crystal_id is bad
                print crystal_id
示例#17
0
from collections import Counter
from datetime import datetime
from fnmatch import fnmatch
from custodian.vasp.handlers import VaspErrorHandler

cwd = os.getcwd()

# DONE manually: "mp-987" -> fw_id: 119629

lpdb = LaunchPad.from_file(
    '/global/homes/m/matcomp/mp_prod/config/config_Mendel/my_launchpad.yaml')
spec = {
    'task_type': 'Controller: add Electronic Structure v2',
    '_priority': 100000
}
sma = SNLMongoAdapter.from_file(
    '/global/homes/m/matcomp/mp_prod/config/dbs/snl_db.yaml')
with open('/global/homes/m/matcomp/mp_prod/materials_db_prod.yaml') as f:
    creds = yaml.load(f)
client = MongoClient(creds['host'], creds['port'])
db = client[creds['db']]
db.authenticate(creds['username'], creds['password'])
materials = db['materials']
tasks = db['tasks']
print materials.count()


def append_wf(fw_id, parent_fw_id=None):
    wf = lpdb.workflows.find_one({'nodes': fw_id}, {
        'parent_links': 1,
        'links': 1,
        'name': 1
示例#18
0
    def process_fw(self, old_task, d):
        # AJ - this whole section is different
        sma = SNLMongoAdapter.auto_load()

        d['old_engine'] = old_task.get('engine')
        if 'fw_id' in old_task:
            d['old_fw_id'] = old_task['fw_id']

        d['fw_id'] = None
        d['task_type'] = 'GGA+U optimize structure (2x)' if old_task[
            'is_hubbard'] else 'GGA optimize structure (2x)'
        d['submission_id'] = None
        d['vaspinputset_name'] = None

        snl_d = sma.snl.find_one({'about._materialsproject.deprecated.mps_ids': old_task['mps_id']})
        if old_task.get('mps_id', -1) > 0 and snl_d:
            # grab the SNL from the SNL db
            del snl_d['_id']
            d['snl'] = snl_d
            d['snlgroup_id'] = sma.snlgroups.find_one({'all_snl_ids': d['snl']['snl_id']}, {'snlgroup_id': 1})['snlgroup_id']

        elif 'mps' in old_task and old_task['mps']:
            snl = mps_dict_to_snl(old_task['mps'])
            mpsnl, snlgroup_id = sma.add_snl(snl)
            d['snl'] = mpsnl.to_dict
            d['snlgroup_id'] = snlgroup_id
        else:
            s = Structure.from_dict(old_task['input']['crystal'])
            snl = StructureNL(s, 'Anubhav Jain <*****@*****.**>', remarks=['origin unknown'])
            mpsnl, snlgroup_id = sma.add_snl(snl)
            d['snl'] = mpsnl.to_dict
            d['snlgroup_id'] = snlgroup_id


        if 'optimize structure' in d['task_type'] and 'output' in d:
            # create a new SNL based on optimized structure
            new_s = Structure.from_dict(d['output']['crystal'])
            old_snl = StructureNL.from_dict(d['snl'])
            history = old_snl.history
            history.append(
                {'name': 'Materials Project structure optimization',
                 'url': 'http://www.materialsproject.org',
                 'description': {'task_type': d['task_type'],
                                 'fw_id': d['fw_id'],
                                 'task_id': d['task_id']}})
            new_snl = StructureNL(new_s, old_snl.authors, old_snl.projects,
                                  old_snl.references, old_snl.remarks,
                                  old_snl.data, history)

            # add snl
            mpsnl, snlgroup_id = sma.add_snl(new_snl, snlgroup_guess=d['snlgroup_id'])

            d['snl_final'] = mpsnl.to_dict
            d['snlgroup_id_final'] = snlgroup_id
            d['snlgroup_changed'] = (d['snlgroup_id'] !=
                                     d['snlgroup_id_final'])

        # custom processing for detecting errors
        dir_name = old_task['dir_name']
        new_style = os.path.exists(os.path.join(dir_name, 'FW.json'))
        vasp_signals = {}
        critical_errors = ["INPUTS_DONT_EXIST",
                           "OUTPUTS_DONT_EXIST", "INCOHERENT_POTCARS",
                           "VASP_HASNT_STARTED", "VASP_HASNT_COMPLETED",
                           "CHARGE_UNCONVERGED", "NETWORK_QUIESCED",
                           "HARD_KILLED", "WALLTIME_EXCEEDED",
                           "ATOMS_TOO_CLOSE", "DISK_SPACE_EXCEEDED"]

        last_relax_dir = dir_name

        if not new_style:
            # get the last relaxation dir
            # the order is relax2, current dir, then relax1. This is because
            # after completing relax1, the job happens in the current dir.
            # Finally, it gets moved to relax2.
            # There are some weird cases where both the current dir and relax2
            # contain data. The relax2 is good, but the current dir is bad.
            if is_valid_vasp_dir(os.path.join(dir_name, "relax2")):
                last_relax_dir = os.path.join(dir_name, "relax2")
            elif is_valid_vasp_dir(dir_name):
                pass
            elif is_valid_vasp_dir(os.path.join(dir_name, "relax1")):
                last_relax_dir = os.path.join(dir_name, "relax1")

        vasp_signals['last_relax_dir'] = last_relax_dir
        ## see what error signals are present

        print "getting signals for dir :{}".format(last_relax_dir)

        sl = SignalDetectorList()
        sl.append(VASPInputsExistSignal())
        sl.append(VASPOutputsExistSignal())
        sl.append(VASPOutSignal())
        sl.append(HitAMemberSignal())
        sl.append(SegFaultSignal())
        sl.append(VASPStartedCompletedSignal())

        signals = sl.detect_all(last_relax_dir)

        signals = signals.union(WallTimeSignal().detect(dir_name))
        if not new_style:
            root_dir = os.path.dirname(dir_name)  # one level above dir_name
            signals = signals.union(WallTimeSignal().detect(root_dir))

        signals = signals.union(DiskSpaceExceededSignal().detect(dir_name))
        if not new_style:
            root_dir = os.path.dirname(dir_name)  # one level above dir_name
            signals = signals.union(DiskSpaceExceededSignal().detect(root_dir))

        signals = list(signals)

        critical_signals = [val for val in signals if val in critical_errors]

        vasp_signals['signals'] = signals
        vasp_signals['critical_signals'] = critical_signals

        vasp_signals['num_signals'] = len(signals)
        vasp_signals['num_critical'] = len(critical_signals)

        if len(critical_signals) > 0 and d['state'] == "successful":
            d["state"] = "error"

        d['analysis'] = d.get('analysis', {})
        d['analysis']['errors_MP'] = vasp_signals

        d['run_tags'] = ['PBE']
        d['run_tags'].extend(d['pseudo_potential']['labels'])
        d['run_tags'].extend([e+"="+str(d['hubbards'].get(e, 0)) for e in d['elements']])
示例#19
0
def analyze(args):
    """analyze data at any point for a copy of the streaming figure"""
    # NOTE: make copy online first with suffix _%Y-%m-%d and note figure id
    fig = py.get_figure(creds['username'], args.fig_id)
    if args.t:
        if args.fig_id == 42:
            label_entries = filter(None, '<br>'.join(fig['data'][2]['text']).split('<br>'))
            pairs = map(make_tuple, label_entries)
            grps = set(chain.from_iterable(pairs))
            snlgrp_cursor = sma.snlgroups.aggregate([
                { '$match': {
                    'snlgroup_id': { '$in': list(grps) },
                    'canonical_snl.about.projects': {'$ne': 'CederDahn Challenge'}
                } },
                { '$project': { 'snlgroup_id': 1, 'canonical_snl.snlgroup_key': 1, '_id': 0 } }
            ], cursor={})
            snlgroup_keys = {}
            for d in snlgrp_cursor:
                snlgroup_keys[d['snlgroup_id']] = d['canonical_snl']['snlgroup_key']
            print snlgroup_keys[40890]
            sma2 = SNLMongoAdapter.from_file(
                os.path.join(os.environ['DB_LOC'], 'materials_db.yaml')
            )
            materials_cursor = sma2.database.materials.aggregate([
                { '$match': {
                    'snlgroup_id_final': { '$in': list(grps) },
                    'snl_final.about.projects': {'$ne': 'CederDahn Challenge'}
                } },
                { '$project': {
                    'snlgroup_id_final': 1, '_id': 0, 'task_id': 1,
                    'final_energy_per_atom': 1,
                    'band_gap.search_gap.band_gap': 1,
                    'volume': 1, 'nsites': 1
                }}
            ], cursor={})
            snlgroup_data = {}
            for material in materials_cursor:
                snlgroup_id = material['snlgroup_id_final']
                final_energy_per_atom = material['final_energy_per_atom']
                band_gap = material['band_gap']['search_gap']['band_gap']
                volume_per_atom = material['volume'] / material['nsites']
                snlgroup_data[snlgroup_id] = {
                    'final_energy_per_atom': final_energy_per_atom,
                    'band_gap': band_gap, 'task_id': material['task_id'],
                    'volume_per_atom': volume_per_atom
                }
            print snlgroup_data[40890]
            filestem = 'mpworks/check_snl/results/bad_snlgroups_2_'
            with open(filestem+'in_matdb.csv', 'wb') as f, \
                    open(filestem+'notin_matdb.csv', 'wb') as g:
                writer1, writer2 = csv.writer(f), csv.writer(g)
                header = [
                    'category', 'composition',
                    'snlgroup_id 1', 'sg_num 1', 'task_id 1',
                    'snlgroup_id 2', 'sg_num 2', 'task_id 2',
                    'delta_energy', 'delta_bandgap', 'delta_volume_per_atom',
                    'rms_dist', 'scenario'
                ]
                writer1.writerow(header)
                writer2.writerow(header)
                for primary_id, secondary_id in pairs:
                    if primary_id not in snlgroup_keys or \
                       secondary_id not in snlgroup_keys: continue
                    composition, primary_sg_num = snlgroup_keys[primary_id].split('--')
                    secondary_sg_num = snlgroup_keys[secondary_id].split('--')[1]
                    category = 'same SGs' if primary_sg_num == secondary_sg_num else 'diff. SGs'
                    if primary_id not in snlgroup_data or secondary_id not in snlgroup_data:
                        delta_energy, delta_bandgap, delta_volume_per_atom = '', '', ''
                    else:
                        delta_energy = "{0:.3g}".format(abs(
                            snlgroup_data[primary_id]['final_energy_per_atom'] - \
                            snlgroup_data[secondary_id]['final_energy_per_atom']
                        ))
                        delta_bandgap = "{0:.3g}".format(abs(
                            snlgroup_data[primary_id]['band_gap'] - \
                            snlgroup_data[secondary_id]['band_gap']
                        ))
                        delta_volume_per_atom = "{0:.3g}".format(abs(
                            snlgroup_data[primary_id]['volume_per_atom'] - \
                            snlgroup_data[secondary_id]['volume_per_atom']
                        ))
                    scenario, rms_dist_str = '', ''
                    if category == 'diff. SGs' and delta_energy and delta_bandgap:
                        scenario = 'different' if (
                            float(delta_energy) > 0.01 or float(delta_bandgap) > 0.1
                        ) else 'similar'
                        snlgrp1_dict = sma.snlgroups.find_one({ "snlgroup_id": primary_id })
                        snlgrp2_dict = sma.snlgroups.find_one({ "snlgroup_id": secondary_id })
                        snlgrp1 = SNLGroup.from_dict(snlgrp1_dict)
                        snlgrp2 = SNLGroup.from_dict(snlgrp2_dict)
                        primary_structure = snlgrp1.canonical_structure
                        secondary_structure = snlgrp2.canonical_structure
                        rms_dist = matcher.get_rms_dist(primary_structure, secondary_structure)
                        if rms_dist is not None:
                            rms_dist_str = "({0:.3g},{1:.3g})".format(*rms_dist)
                            print rms_dist_str
                    row = [
                        category, composition,
                        primary_id, primary_sg_num,
                        snlgroup_data[primary_id]['task_id'] \
                        if primary_id in snlgroup_data else '',
                        secondary_id, secondary_sg_num,
                        snlgroup_data[secondary_id]['task_id'] \
                        if secondary_id in snlgroup_data else '',
                        delta_energy, delta_bandgap, delta_volume_per_atom,
                        rms_dist_str, scenario
                    ]
                    if delta_energy and delta_bandgap: writer1.writerow(row)
                    else: writer2.writerow(row)
        elif args.fig_id == 16:
            out_fig = Figure()
            badsnls_trace = Scatter(x=[], y=[], text=[], mode='markers', name='SG Changes')
            bisectrix = Scatter(x=[0,230], y=[0,230], mode='lines', name='bisectrix')
            print 'pulling bad snls from plotly ...'
            bad_snls = OrderedDict()
            for category, text in zip(fig['data'][2]['y'], fig['data'][2]['text']):
                for snl_id in map(int, text.split('<br>')):
                    bad_snls[snl_id] = category
            with open('mpworks/check_snl/results/bad_snls.csv', 'wb') as f:
                print 'pulling bad snls from database ...'
                mpsnl_cursor = sma.snl.find({
                    'snl_id': { '$in': bad_snls.keys() },
                    'about.projects': {'$ne': 'CederDahn Challenge'}
                })
                writer = csv.writer(f)
                writer.writerow([
                    'snl_id', 'category', 'snlgroup_key', 'nsites', 'remarks', 'projects', 'authors'
                ])
                print 'writing bad snls to file ...'
                for mpsnl_dict in mpsnl_cursor:
                    mpsnl = MPStructureNL.from_dict(mpsnl_dict)
                    row = [ mpsnl.snl_id, bad_snls[mpsnl.snl_id], mpsnl.snlgroup_key ]
                    row += _get_snl_extra_info(mpsnl)
                    writer.writerow(row)
                    sg_num = mpsnl.snlgroup_key.split('--')[1]
                    if (bad_snls[mpsnl.snl_id] == 'SG default' and sg_num != '-1') or \
                       bad_snls[mpsnl.snl_id] == 'SG change':
                        mpsnl.structure.remove_oxidation_states()
                        sf = SpacegroupAnalyzer(mpsnl.structure, symprec=0.1)
                        badsnls_trace['x'].append(mpsnl.sg_num)
                        badsnls_trace['y'].append(sf.get_spacegroup_number())
                        badsnls_trace['text'].append(mpsnl.snl_id)
                        if bad_snls[mpsnl.snl_id] == 'SG default':
                            print sg_num, sf.get_spacegroup_number()
                print 'plotting out-fig ...'
                out_fig['data'] = Data([bisectrix, badsnls_trace])
                out_fig['layout'] = Layout(
                    showlegend=False, hovermode='closest',
                    title='Spacegroup Assignment Changes',
                    xaxis=XAxis(showgrid=False, title='old SG number', range=[0,230]),
                    yaxis=YAxis(showgrid=False, title='new SG number', range=[0,230]),
                )
                filename = 'spacegroup_changes_'
                filename += datetime.datetime.now().strftime('%Y-%m-%d') 
                py.plot(out_fig, filename=filename, auto_open=False)
        elif args.fig_id == 43: # SNLGroupMemberChecker
            matcher2 = StructureMatcher(
                ltol=0.2, stol=0.3, angle_tol=5, primitive_cell=False, scale=True,
                attempt_supercell=True, comparator=ElementComparator()
            )
            print 'pulling data from plotly ...'
            trace = Scatter(x=[], y=[], text=[], mode='markers', name='mismatches')
            bad_snls = OrderedDict() # snlgroup_id : [ mismatching snl_ids ]
            for category, text in zip(fig['data'][2]['y'], fig['data'][2]['text']):
                if category != 'mismatch': continue
                for entry in text.split('<br>'):
                    fields = entry.split(':')
                    snlgroup_id = int(fields[0].split(',')[0])
                    print snlgroup_id
                    snlgrp_dict = sma.snlgroups.find_one({ 'snlgroup_id': snlgroup_id })
                    snlgrp = SNLGroup.from_dict(snlgrp_dict)
                    s1 = snlgrp.canonical_structure.get_primitive_structure()
                    bad_snls[snlgroup_id] = []
                    for i, snl_id in enumerate(fields[1].split(',')):
                        mpsnl_dict = sma.snl.find_one({ 'snl_id': int(snl_id) })
                        if 'CederDahn Challenge' in mpsnl_dict['about']['projects']:
                            print 'skip CederDahn: %s' % snl_id
                            continue
                        mpsnl = MPStructureNL.from_dict(mpsnl_dict)
                        s2 = mpsnl.structure.get_primitive_structure()
                        is_match = matcher2.fit(s1, s2)
                        if is_match: continue
                        bad_snls[snlgroup_id].append(snl_id)
                        trace['x'].append(snlgroup_id)
                        trace['y'].append(i+1)
                        trace['text'].append(snl_id)
                    if len(bad_snls[snlgroup_id]) < 1:
                        bad_snls.pop(snlgroup_id, None)
            with open('mpworks/check_snl/results/bad_snlgroups.csv', 'wb') as f:
                print 'pulling bad snlgroups from database ...'
                snlgroup_cursor = sma.snlgroups.find({
                    'snlgroup_id': { '$in': bad_snls.keys() },
                })
                writer = csv.writer(f)
                writer.writerow(['snlgroup_id', 'snlgroup_key', 'mismatching snl_ids'])
                print 'writing bad snlgroups to file ...'
                for snlgroup_dict in snlgroup_cursor:
                    snlgroup = SNLGroup.from_dict(snlgroup_dict)
                    row = [
                        snlgroup.snlgroup_id, snlgroup.canonical_snl.snlgroup_key,
                        ' '.join(bad_snls[snlgroup.snlgroup_id])
                    ]
                    writer.writerow(row)
            print 'plotting out-fig ...'
            out_fig = Figure()
            out_fig['data'] = Data([trace])
            out_fig['layout'] = Layout(
                showlegend=False, hovermode='closest',
                title='Member Mismatches of SNLGroup Canonicals',
                xaxis=XAxis(showgrid=False, title='snlgroup_id', showexponent='none'),
                yaxis=YAxis(showgrid=False, title='# mismatching SNLs'),
            )
            filename = 'groupmember_mismatches_'
            filename += datetime.datetime.now().strftime('%Y-%m-%d') 
            py.plot(out_fig, filename=filename, auto_open=False)
    else:
        errors = Counter()
        bad_snls = OrderedDict()
        bad_snlgroups = OrderedDict()
        for i,d in enumerate(fig['data']):
            if not isinstance(d, Scatter): continue
            if not 'x' in d or not 'y' in d or not 'text' in d: continue
            start_id = int(d['name'].split(' - ')[0][:-1])*1000
            marker_colors = d['marker']['color']
            if i < 2*num_snl_streams: # spacegroups
                errors += Counter(marker_colors)
                for idx,color in enumerate(marker_colors):
                    snl_id = start_id + d['x'][idx]
                    color_index = category_colors.index(color)
                    category = categories[color_index]
                    bad_snls[snl_id] = category
            else: # groupmembers
                for idx,color in enumerate(marker_colors):
                    if color != category_colors[0]: continue
                    snlgroup_id = start_id + d['x'][idx]
                    mismatch_snl_id, canonical_snl_id = d['text'][idx].split(' != ')
                    bad_snlgroups[snlgroup_id] = int(mismatch_snl_id)
        print errors
        fig_data = fig['data'][-1]
        fig_data['x'] = [ errors[color] for color in fig_data['marker']['color'] ]
        filename = _get_filename()
        print filename
        #py.plot(fig, filename=filename)
        with open('mpworks/check_snl/results/bad_snls.csv', 'wb') as f:
            mpsnl_cursor = sma.snl.find({ 'snl_id': { '$in': bad_snls.keys() } })
            writer = csv.writer(f)
            writer.writerow([
                'snl_id', 'category', 'snlgroup_key', 'nsites', 'remarks', 'projects', 'authors'
            ])
            for mpsnl_dict in mpsnl_cursor:
                mpsnl = MPStructureNL.from_dict(mpsnl_dict)
                row = [ mpsnl.snl_id, bad_snls[mpsnl.snl_id], mpsnl.snlgroup_key ]
                row += _get_snl_extra_info(mpsnl)
                writer.writerow(row)
        with open('mpworks/check_snl/results/bad_snlgroups.csv', 'wb') as f:
            snlgrp_cursor = sma.snlgroups.find({ 'snlgroup_id': { '$in': bad_snlgroups.keys() } })
            first_mismatch_snls_cursor = sma.snl.find({ 'snl_id': { '$in': bad_snlgroups.values() } })
            first_mismatch_snl_info = OrderedDict()
            for mpsnl_dict in first_mismatch_snls_cursor:
                mpsnl = MPStructureNL.from_dict(mpsnl_dict)
                first_mismatch_snl_info[mpsnl.snl_id] = _get_snl_extra_info(mpsnl)
            writer = csv.writer(f)
            writer.writerow([
                'snlgroup_id', 'snlgroup_key',
                'canonical_snl_id', 'first_mismatching_snl_id',
                 'nsites', 'remarks', 'projects', 'authors'
            ])
            for snlgrp_dict in snlgrp_cursor:
                snlgrp = SNLGroup.from_dict(snlgrp_dict)
                first_mismatch_snl_id = bad_snlgroups[snlgrp.snlgroup_id]
                row = [
                    snlgrp.snlgroup_id, snlgrp.canonical_snl.snlgroup_key,
                    snlgrp.canonical_snl.snl_id, first_mismatch_snl_id
                ]
                row += [
                    ' & '.join(pair) if pair[0] != pair[1] else pair[0]
                    for pair in zip(
                        _get_snl_extra_info(snlgrp.canonical_snl),
                        first_mismatch_snl_info[int(first_mismatch_snl_id)]
                    )
                ]
                writer.writerow(row)
示例#20
0
import os
import plotly.plotly as py
from pandas import DataFrame
from mpworks.snl_utils.snl_mongo import SNLMongoAdapter

sma = SNLMongoAdapter.auto_load()
sma2 = SNLMongoAdapter.from_file(
    os.path.join(os.environ['DB_LOC'], 'materials_db.yaml'))


def _get_snlgroup_id(snl_id):
    return sma.snlgroups.find_one({'all_snl_ids': int(snl_id)}, {
        'snlgroup_id': 1,
        '_id': 0
    })['snlgroup_id']


def _get_mp_id(snlgroup_id):
    mat = sma2.database.materials.find_one({'snlgroup_id_final': snlgroup_id},
                                           {
                                               '_id': 0,
                                               'task_id': 1
                                           })
    if mat is not None:
        return mat['task_id']
    return 'not found'


def _get_mp_link(mp_id):
    if mp_id == 'not found': return mp_id
    url = 'link:$$https://materialsproject.org/materials/'
示例#21
0
if __name__ == '__main__':

    module_dir = os.path.dirname(os.path.abspath(__file__))
    automation_f = os.path.join(module_dir, 'automation.yaml')
    snl_f = os.path.join(module_dir, 'snl.yaml')

    with open(automation_f) as f:
        y = yaml.load(f)

    mc = MongoClient(y['host'], y['port'])
    db = mc[y['db']]

    db.authenticate(y['username'], y['password'])

    snldb = SNLMongoAdapter.from_file(snl_f)

    prev_ids = []  # MPS ids that we already took care of

    print 'INITIALIZING'
    if RESET:
        snldb._reset()
        time.sleep(10)  # makes me sleep better at night

    else:
        for mps in snldb.snl.find({}, {"about._materialsproject.deprecated.mps_ids": 1}):
            prev_ids.extend(mps['about']['_materialsproject']['deprecated']['mps_ids'])

    print 'PROCESSING'
    for mps in db.mps.find(timeout=False):
        try:
示例#22
0
    def process_fw(self, old_task, d):
        # AJ - this whole section is different
        sma = SNLMongoAdapter.auto_load()

        d["old_engine"] = old_task.get("engine")
        if "fw_id" in old_task:
            d["old_fw_id"] = old_task["fw_id"]

        d["fw_id"] = None
        d["task_type"] = "GGA+U optimize structure (2x)" if old_task["is_hubbard"] else "GGA optimize structure (2x)"
        d["submission_id"] = None
        d["vaspinputset_name"] = None

        snl_d = sma.snl.find_one({"about._materialsproject.deprecated.mps_ids": old_task["mps_id"]})
        if old_task.get("mps_id", -1) > 0 and snl_d:
            # grab the SNL from the SNL db
            del snl_d["_id"]
            d["snl"] = snl_d
            d["snlgroup_id"] = sma.snlgroups.find_one({"all_snl_ids": d["snl"]["snl_id"]}, {"snlgroup_id": 1})[
                "snlgroup_id"
            ]

        elif "mps" in old_task and old_task["mps"]:
            snl = mps_dict_to_snl(old_task["mps"])
            mpsnl, snlgroup_id = sma.add_snl(snl)
            d["snl"] = mpsnl.as_dict()
            d["snlgroup_id"] = snlgroup_id
        else:
            s = Structure.from_dict(old_task["input"]["crystal"])
            snl = StructureNL(s, "Anubhav Jain <*****@*****.**>", remarks=["origin unknown"])
            mpsnl, snlgroup_id = sma.add_snl(snl)
            d["snl"] = mpsnl.as_dict()
            d["snlgroup_id"] = snlgroup_id

        if "optimize structure" in d["task_type"] and "output" in d:
            # create a new SNL based on optimized structure
            new_s = Structure.from_dict(d["output"]["crystal"])
            old_snl = StructureNL.from_dict(d["snl"])
            history = old_snl.history
            history.append(
                {
                    "name": "Materials Project structure optimization",
                    "url": "http://www.materialsproject.org",
                    "description": {"task_type": d["task_type"], "fw_id": d["fw_id"], "task_id": d["task_id"]},
                }
            )
            new_snl = StructureNL(
                new_s, old_snl.authors, old_snl.projects, old_snl.references, old_snl.remarks, old_snl.data, history
            )

            # add snl
            mpsnl, snlgroup_id = sma.add_snl(new_snl, snlgroup_guess=d["snlgroup_id"])

            d["snl_final"] = mpsnl.as_dict()
            d["snlgroup_id_final"] = snlgroup_id
            d["snlgroup_changed"] = d["snlgroup_id"] != d["snlgroup_id_final"]

        # custom processing for detecting errors
        dir_name = old_task["dir_name"]
        new_style = os.path.exists(os.path.join(dir_name, "FW.json"))
        vasp_signals = {}
        critical_errors = [
            "INPUTS_DONT_EXIST",
            "OUTPUTS_DONT_EXIST",
            "INCOHERENT_POTCARS",
            "VASP_HASNT_STARTED",
            "VASP_HASNT_COMPLETED",
            "CHARGE_UNCONVERGED",
            "NETWORK_QUIESCED",
            "HARD_KILLED",
            "WALLTIME_EXCEEDED",
            "ATOMS_TOO_CLOSE",
            "DISK_SPACE_EXCEEDED",
        ]

        last_relax_dir = dir_name

        if not new_style:
            # get the last relaxation dir
            # the order is relax2, current dir, then relax1. This is because
            # after completing relax1, the job happens in the current dir.
            # Finally, it gets moved to relax2.
            # There are some weird cases where both the current dir and relax2
            # contain data. The relax2 is good, but the current dir is bad.
            if is_valid_vasp_dir(os.path.join(dir_name, "relax2")):
                last_relax_dir = os.path.join(dir_name, "relax2")
            elif is_valid_vasp_dir(dir_name):
                pass
            elif is_valid_vasp_dir(os.path.join(dir_name, "relax1")):
                last_relax_dir = os.path.join(dir_name, "relax1")

        vasp_signals["last_relax_dir"] = last_relax_dir
        ## see what error signals are present

        print "getting signals for dir :{}".format(last_relax_dir)

        sl = SignalDetectorList()
        sl.append(VASPInputsExistSignal())
        sl.append(VASPOutputsExistSignal())
        sl.append(VASPOutSignal())
        sl.append(HitAMemberSignal())
        sl.append(SegFaultSignal())
        sl.append(VASPStartedCompletedSignal())

        signals = sl.detect_all(last_relax_dir)

        signals = signals.union(WallTimeSignal().detect(dir_name))
        if not new_style:
            root_dir = os.path.dirname(dir_name)  # one level above dir_name
            signals = signals.union(WallTimeSignal().detect(root_dir))

        signals = signals.union(DiskSpaceExceededSignal().detect(dir_name))
        if not new_style:
            root_dir = os.path.dirname(dir_name)  # one level above dir_name
            signals = signals.union(DiskSpaceExceededSignal().detect(root_dir))

        signals = list(signals)

        critical_signals = [val for val in signals if val in critical_errors]

        vasp_signals["signals"] = signals
        vasp_signals["critical_signals"] = critical_signals

        vasp_signals["num_signals"] = len(signals)
        vasp_signals["num_critical"] = len(critical_signals)

        if len(critical_signals) > 0 and d["state"] == "successful":
            d["state"] = "error"

        d["analysis"] = d.get("analysis", {})
        d["analysis"]["errors_MP"] = vasp_signals

        d["run_tags"] = ["PBE"]
        d["run_tags"].extend(d["pseudo_potential"]["labels"])
        d["run_tags"].extend([e + "=" + str(d["hubbards"].get(e, 0)) for e in d["elements"]])
示例#23
0
from mpworks.snl_utils.snl_mongo import SNLMongoAdapter
from mpworks.snl_utils.mpsnl import MPStructureNL, SNLGroup
from pymatgen.symmetry.analyzer import SpacegroupAnalyzer
from pymatgen.analysis.structure_matcher import StructureMatcher, ElementComparator, SpeciesComparator
import plotly.plotly as py
import plotly.tools as tls
from plotly.graph_objs import *
from mpworks.check_snl.utils import div_plus_mod, sleep
from ast import literal_eval as make_tuple
from itertools import chain

creds = tls.get_credentials_file()
stream_ids = creds['stream_ids']
min_sleep = 0.052

sma = SNLMongoAdapter.auto_load()
matcher = StructureMatcher(ltol=0.2,
                           stol=0.3,
                           angle_tol=5,
                           primitive_cell=True,
                           scale=True,
                           attempt_supercell=False,
                           comparator=ElementComparator())

num_ids_per_stream = 20000
num_ids_per_stream_k = num_ids_per_stream / 1000
num_snls = sma.snl.count()
num_snlgroups = sma.snlgroups.count()
num_pairs_per_job = 1000 * num_ids_per_stream
num_pairs_max = num_snlgroups * (num_snlgroups - 1) / 2
示例#24
0
def analyze(args):
    """analyze data at any point for a copy of the streaming figure"""
    # NOTE: make copy online first with suffix _%Y-%m-%d and note figure id
    fig = py.get_figure(creds['username'], args.fig_id)
    if args.t:
        if args.fig_id == 42:
            label_entries = filter(
                None, '<br>'.join(fig['data'][2]['text']).split('<br>'))
            pairs = map(make_tuple, label_entries)
            grps = set(chain.from_iterable(pairs))
            snlgrp_cursor = sma.snlgroups.aggregate([{
                '$match': {
                    'snlgroup_id': {
                        '$in': list(grps)
                    },
                    'canonical_snl.about.projects': {
                        '$ne': 'CederDahn Challenge'
                    }
                }
            }, {
                '$project': {
                    'snlgroup_id': 1,
                    'canonical_snl.snlgroup_key': 1,
                    '_id': 0
                }
            }],
                                                    cursor={})
            snlgroup_keys = {}
            for d in snlgrp_cursor:
                snlgroup_keys[
                    d['snlgroup_id']] = d['canonical_snl']['snlgroup_key']
            print snlgroup_keys[40890]
            sma2 = SNLMongoAdapter.from_file(
                os.path.join(os.environ['DB_LOC'], 'materials_db.yaml'))
            materials_cursor = sma2.database.materials.aggregate([{
                '$match': {
                    'snlgroup_id_final': {
                        '$in': list(grps)
                    },
                    'snl_final.about.projects': {
                        '$ne': 'CederDahn Challenge'
                    }
                }
            }, {
                '$project': {
                    'snlgroup_id_final': 1,
                    '_id': 0,
                    'task_id': 1,
                    'final_energy_per_atom': 1,
                    'band_gap.search_gap.band_gap': 1,
                    'volume': 1,
                    'nsites': 1
                }
            }],
                                                                 cursor={})
            snlgroup_data = {}
            for material in materials_cursor:
                snlgroup_id = material['snlgroup_id_final']
                final_energy_per_atom = material['final_energy_per_atom']
                band_gap = material['band_gap']['search_gap']['band_gap']
                volume_per_atom = material['volume'] / material['nsites']
                snlgroup_data[snlgroup_id] = {
                    'final_energy_per_atom': final_energy_per_atom,
                    'band_gap': band_gap,
                    'task_id': material['task_id'],
                    'volume_per_atom': volume_per_atom
                }
            print snlgroup_data[40890]
            filestem = 'mpworks/check_snl/results/bad_snlgroups_2_'
            with open(filestem+'in_matdb.csv', 'wb') as f, \
                    open(filestem+'notin_matdb.csv', 'wb') as g:
                writer1, writer2 = csv.writer(f), csv.writer(g)
                header = [
                    'category', 'composition', 'snlgroup_id 1', 'sg_num 1',
                    'task_id 1', 'snlgroup_id 2', 'sg_num 2', 'task_id 2',
                    'delta_energy', 'delta_bandgap', 'delta_volume_per_atom',
                    'rms_dist', 'scenario'
                ]
                writer1.writerow(header)
                writer2.writerow(header)
                for primary_id, secondary_id in pairs:
                    if primary_id not in snlgroup_keys or \
                       secondary_id not in snlgroup_keys:
                        continue
                    composition, primary_sg_num = snlgroup_keys[
                        primary_id].split('--')
                    secondary_sg_num = snlgroup_keys[secondary_id].split(
                        '--')[1]
                    category = 'same SGs' if primary_sg_num == secondary_sg_num else 'diff. SGs'
                    if primary_id not in snlgroup_data or secondary_id not in snlgroup_data:
                        delta_energy, delta_bandgap, delta_volume_per_atom = '', '', ''
                    else:
                        delta_energy = "{0:.3g}".format(abs(
                            snlgroup_data[primary_id]['final_energy_per_atom'] - \
                            snlgroup_data[secondary_id]['final_energy_per_atom']
                        ))
                        delta_bandgap = "{0:.3g}".format(abs(
                            snlgroup_data[primary_id]['band_gap'] - \
                            snlgroup_data[secondary_id]['band_gap']
                        ))
                        delta_volume_per_atom = "{0:.3g}".format(abs(
                            snlgroup_data[primary_id]['volume_per_atom'] - \
                            snlgroup_data[secondary_id]['volume_per_atom']
                        ))
                    scenario, rms_dist_str = '', ''
                    if category == 'diff. SGs' and delta_energy and delta_bandgap:
                        scenario = 'different' if (
                            float(delta_energy) > 0.01
                            or float(delta_bandgap) > 0.1) else 'similar'
                        snlgrp1_dict = sma.snlgroups.find_one(
                            {"snlgroup_id": primary_id})
                        snlgrp2_dict = sma.snlgroups.find_one(
                            {"snlgroup_id": secondary_id})
                        snlgrp1 = SNLGroup.from_dict(snlgrp1_dict)
                        snlgrp2 = SNLGroup.from_dict(snlgrp2_dict)
                        primary_structure = snlgrp1.canonical_structure
                        secondary_structure = snlgrp2.canonical_structure
                        rms_dist = matcher.get_rms_dist(
                            primary_structure, secondary_structure)
                        if rms_dist is not None:
                            rms_dist_str = "({0:.3g},{1:.3g})".format(
                                *rms_dist)
                            print rms_dist_str
                    row = [
                        category, composition,
                        primary_id, primary_sg_num,
                        snlgroup_data[primary_id]['task_id'] \
                        if primary_id in snlgroup_data else '',
                        secondary_id, secondary_sg_num,
                        snlgroup_data[secondary_id]['task_id'] \
                        if secondary_id in snlgroup_data else '',
                        delta_energy, delta_bandgap, delta_volume_per_atom,
                        rms_dist_str, scenario
                    ]
                    if delta_energy and delta_bandgap: writer1.writerow(row)
                    else: writer2.writerow(row)
        elif args.fig_id == 16:
            out_fig = Figure()
            badsnls_trace = Scatter(x=[],
                                    y=[],
                                    text=[],
                                    mode='markers',
                                    name='SG Changes')
            bisectrix = Scatter(x=[0, 230],
                                y=[0, 230],
                                mode='lines',
                                name='bisectrix')
            print 'pulling bad snls from plotly ...'
            bad_snls = OrderedDict()
            for category, text in zip(fig['data'][2]['y'],
                                      fig['data'][2]['text']):
                for snl_id in map(int, text.split('<br>')):
                    bad_snls[snl_id] = category
            with open('mpworks/check_snl/results/bad_snls.csv', 'wb') as f:
                print 'pulling bad snls from database ...'
                mpsnl_cursor = sma.snl.find({
                    'snl_id': {
                        '$in': bad_snls.keys()
                    },
                    'about.projects': {
                        '$ne': 'CederDahn Challenge'
                    }
                })
                writer = csv.writer(f)
                writer.writerow([
                    'snl_id', 'category', 'snlgroup_key', 'nsites', 'remarks',
                    'projects', 'authors'
                ])
                print 'writing bad snls to file ...'
                for mpsnl_dict in mpsnl_cursor:
                    mpsnl = MPStructureNL.from_dict(mpsnl_dict)
                    row = [
                        mpsnl.snl_id, bad_snls[mpsnl.snl_id],
                        mpsnl.snlgroup_key
                    ]
                    row += _get_snl_extra_info(mpsnl)
                    writer.writerow(row)
                    sg_num = mpsnl.snlgroup_key.split('--')[1]
                    if (bad_snls[mpsnl.snl_id] == 'SG default' and sg_num != '-1') or \
                       bad_snls[mpsnl.snl_id] == 'SG change':
                        mpsnl.structure.remove_oxidation_states()
                        sf = SpacegroupAnalyzer(mpsnl.structure, symprec=0.1)
                        badsnls_trace['x'].append(mpsnl.sg_num)
                        badsnls_trace['y'].append(sf.get_spacegroup_number())
                        badsnls_trace['text'].append(mpsnl.snl_id)
                        if bad_snls[mpsnl.snl_id] == 'SG default':
                            print sg_num, sf.get_spacegroup_number()
                print 'plotting out-fig ...'
                out_fig['data'] = Data([bisectrix, badsnls_trace])
                out_fig['layout'] = Layout(
                    showlegend=False,
                    hovermode='closest',
                    title='Spacegroup Assignment Changes',
                    xaxis=XAxis(showgrid=False,
                                title='old SG number',
                                range=[0, 230]),
                    yaxis=YAxis(showgrid=False,
                                title='new SG number',
                                range=[0, 230]),
                )
                filename = 'spacegroup_changes_'
                filename += datetime.datetime.now().strftime('%Y-%m-%d')
                py.plot(out_fig, filename=filename, auto_open=False)
        elif args.fig_id == 43:  # SNLGroupMemberChecker
            matcher2 = StructureMatcher(ltol=0.2,
                                        stol=0.3,
                                        angle_tol=5,
                                        primitive_cell=False,
                                        scale=True,
                                        attempt_supercell=True,
                                        comparator=ElementComparator())
            print 'pulling data from plotly ...'
            trace = Scatter(x=[],
                            y=[],
                            text=[],
                            mode='markers',
                            name='mismatches')
            bad_snls = OrderedDict()  # snlgroup_id : [ mismatching snl_ids ]
            for category, text in zip(fig['data'][2]['y'],
                                      fig['data'][2]['text']):
                if category != 'mismatch': continue
                for entry in text.split('<br>'):
                    fields = entry.split(':')
                    snlgroup_id = int(fields[0].split(',')[0])
                    print snlgroup_id
                    snlgrp_dict = sma.snlgroups.find_one(
                        {'snlgroup_id': snlgroup_id})
                    snlgrp = SNLGroup.from_dict(snlgrp_dict)
                    s1 = snlgrp.canonical_structure.get_primitive_structure()
                    bad_snls[snlgroup_id] = []
                    for i, snl_id in enumerate(fields[1].split(',')):
                        mpsnl_dict = sma.snl.find_one({'snl_id': int(snl_id)})
                        if 'CederDahn Challenge' in mpsnl_dict['about'][
                                'projects']:
                            print 'skip CederDahn: %s' % snl_id
                            continue
                        mpsnl = MPStructureNL.from_dict(mpsnl_dict)
                        s2 = mpsnl.structure.get_primitive_structure()
                        is_match = matcher2.fit(s1, s2)
                        if is_match: continue
                        bad_snls[snlgroup_id].append(snl_id)
                        trace['x'].append(snlgroup_id)
                        trace['y'].append(i + 1)
                        trace['text'].append(snl_id)
                    if len(bad_snls[snlgroup_id]) < 1:
                        bad_snls.pop(snlgroup_id, None)
            with open('mpworks/check_snl/results/bad_snlgroups.csv',
                      'wb') as f:
                print 'pulling bad snlgroups from database ...'
                snlgroup_cursor = sma.snlgroups.find({
                    'snlgroup_id': {
                        '$in': bad_snls.keys()
                    },
                })
                writer = csv.writer(f)
                writer.writerow(
                    ['snlgroup_id', 'snlgroup_key', 'mismatching snl_ids'])
                print 'writing bad snlgroups to file ...'
                for snlgroup_dict in snlgroup_cursor:
                    snlgroup = SNLGroup.from_dict(snlgroup_dict)
                    row = [
                        snlgroup.snlgroup_id,
                        snlgroup.canonical_snl.snlgroup_key,
                        ' '.join(bad_snls[snlgroup.snlgroup_id])
                    ]
                    writer.writerow(row)
            print 'plotting out-fig ...'
            out_fig = Figure()
            out_fig['data'] = Data([trace])
            out_fig['layout'] = Layout(
                showlegend=False,
                hovermode='closest',
                title='Member Mismatches of SNLGroup Canonicals',
                xaxis=XAxis(showgrid=False,
                            title='snlgroup_id',
                            showexponent='none'),
                yaxis=YAxis(showgrid=False, title='# mismatching SNLs'),
            )
            filename = 'groupmember_mismatches_'
            filename += datetime.datetime.now().strftime('%Y-%m-%d')
            py.plot(out_fig, filename=filename, auto_open=False)
    else:
        errors = Counter()
        bad_snls = OrderedDict()
        bad_snlgroups = OrderedDict()
        for i, d in enumerate(fig['data']):
            if not isinstance(d, Scatter): continue
            if not 'x' in d or not 'y' in d or not 'text' in d: continue
            start_id = int(d['name'].split(' - ')[0][:-1]) * 1000
            marker_colors = d['marker']['color']
            if i < 2 * num_snl_streams:  # spacegroups
                errors += Counter(marker_colors)
                for idx, color in enumerate(marker_colors):
                    snl_id = start_id + d['x'][idx]
                    color_index = category_colors.index(color)
                    category = categories[color_index]
                    bad_snls[snl_id] = category
            else:  # groupmembers
                for idx, color in enumerate(marker_colors):
                    if color != category_colors[0]: continue
                    snlgroup_id = start_id + d['x'][idx]
                    mismatch_snl_id, canonical_snl_id = d['text'][idx].split(
                        ' != ')
                    bad_snlgroups[snlgroup_id] = int(mismatch_snl_id)
        print errors
        fig_data = fig['data'][-1]
        fig_data['x'] = [
            errors[color] for color in fig_data['marker']['color']
        ]
        filename = _get_filename()
        print filename
        #py.plot(fig, filename=filename)
        with open('mpworks/check_snl/results/bad_snls.csv', 'wb') as f:
            mpsnl_cursor = sma.snl.find({'snl_id': {'$in': bad_snls.keys()}})
            writer = csv.writer(f)
            writer.writerow([
                'snl_id', 'category', 'snlgroup_key', 'nsites', 'remarks',
                'projects', 'authors'
            ])
            for mpsnl_dict in mpsnl_cursor:
                mpsnl = MPStructureNL.from_dict(mpsnl_dict)
                row = [
                    mpsnl.snl_id, bad_snls[mpsnl.snl_id], mpsnl.snlgroup_key
                ]
                row += _get_snl_extra_info(mpsnl)
                writer.writerow(row)
        with open('mpworks/check_snl/results/bad_snlgroups.csv', 'wb') as f:
            snlgrp_cursor = sma.snlgroups.find(
                {'snlgroup_id': {
                    '$in': bad_snlgroups.keys()
                }})
            first_mismatch_snls_cursor = sma.snl.find(
                {'snl_id': {
                    '$in': bad_snlgroups.values()
                }})
            first_mismatch_snl_info = OrderedDict()
            for mpsnl_dict in first_mismatch_snls_cursor:
                mpsnl = MPStructureNL.from_dict(mpsnl_dict)
                first_mismatch_snl_info[mpsnl.snl_id] = _get_snl_extra_info(
                    mpsnl)
            writer = csv.writer(f)
            writer.writerow([
                'snlgroup_id', 'snlgroup_key', 'canonical_snl_id',
                'first_mismatching_snl_id', 'nsites', 'remarks', 'projects',
                'authors'
            ])
            for snlgrp_dict in snlgrp_cursor:
                snlgrp = SNLGroup.from_dict(snlgrp_dict)
                first_mismatch_snl_id = bad_snlgroups[snlgrp.snlgroup_id]
                row = [
                    snlgrp.snlgroup_id, snlgrp.canonical_snl.snlgroup_key,
                    snlgrp.canonical_snl.snl_id, first_mismatch_snl_id
                ]
                row += [
                    ' & '.join(pair) if pair[0] != pair[1] else pair[0]
                    for pair in zip(
                        _get_snl_extra_info(snlgrp.canonical_snl),
                        first_mismatch_snl_info[int(first_mismatch_snl_id)])
                ]
                writer.writerow(row)
示例#25
0
import csv
from mpworks.snl_utils.snl_mongo import SNLMongoAdapter
sma = SNLMongoAdapter.auto_load()
with open('mpworks/check_snl/results/zero_occu_sites.csv', 'wb') as f:
    writer = csv.writer(f)
    writer.writerow([
        'snl_id', 'num_zero_occu_sites', 'icsd_id', 'is_valid', 'formula'
    ])
    for doc in sma.snl.aggregate([
        #{ '$match': { 'about._icsd.icsd_id': { '$exists': True } } },
        { '$unwind': '$sites' },
        { '$unwind': '$sites.species' },
        { '$project': {
            'snl_id': 1, 'sites.species.occu': 1, '_id': 0, 
            'about._icsd.icsd_id': 1, 'is_valid': 1,
            'reduced_cell_formula_abc': 1
        } },
        { '$match': { 'sites.species.occu': 0.0 } },
        { '$group': {
            '_id': '$snl_id',
            'num_zero_occu_sites': { '$sum': 1 },
            'icsd_ids': { '$addToSet': '$about._icsd.icsd_id' },
            'is_valid': { '$addToSet': '$is_valid' },
            'formula': { '$addToSet': '$reduced_cell_formula_abc' }
        } },
    ], cursor={}):
        icsd_id = doc['icsd_ids'][0] if len(doc['icsd_ids']) > 0 else ''
        row = [
            doc['_id'], doc['num_zero_occu_sites'], icsd_id, doc['is_valid'][0],
            doc['formula'][0]
        ]
示例#26
0
    def process_fw(self, dir_name, d):
        d["task_id_deprecated"] = int(
            d["task_id"].split('-')[-1])  # useful for WC and AJ

        # update the run fields to give species group in root, if exists
        for r in d['run_tags']:
            if "species_group=" in r:
                d["species_group"] = int(r.split("=")[-1])
                break

        # custom Materials Project post-processing for FireWorks
        with zopen(zpath(os.path.join(dir_name, 'FW.json'))) as f:
            fw_dict = json.load(f)
            d['fw_id'] = fw_dict['fw_id']
            d['snl'] = fw_dict['spec']['mpsnl']
            d['snlgroup_id'] = fw_dict['spec']['snlgroup_id']
            d['vaspinputset_name'] = fw_dict['spec'].get('vaspinputset_name')
            d['task_type'] = fw_dict['spec']['task_type']
            # Process data for deformed structures
            if 'deformed' in d['task_type']:
                d['deformation_matrix'] = fw_dict['spec']['deformation_matrix']
                d['original_task_id'] = fw_dict['spec']['original_task_id']
            if not self.update_duplicates:
                if 'optimize structure' in d['task_type'] and 'output' in d:
                    # create a new SNL based on optimized structure
                    new_s = Structure.from_dict(d['output']['crystal'])
                    old_snl = StructureNL.from_dict(d['snl'])
                    history = old_snl.history
                    history.append({
                        'name': 'Materials Project structure optimization',
                        'url': 'http://www.materialsproject.org',
                        'description': {
                            'task_type': d['task_type'],
                            'fw_id': d['fw_id'],
                            'task_id': d['task_id']
                        }
                    })
                    new_snl = StructureNL(new_s, old_snl.authors,
                                          old_snl.projects, old_snl.references,
                                          old_snl.remarks, old_snl.data,
                                          history)

                    # enter new SNL into SNL db
                    # get the SNL mongo adapter
                    sma = SNLMongoAdapter.auto_load()

                    # add snl
                    mpsnl, snlgroup_id, spec_group = sma.add_snl(
                        new_snl, snlgroup_guess=d['snlgroup_id'])
                    d['snl_final'] = mpsnl.as_dict()
                    d['snlgroup_id_final'] = snlgroup_id
                    d['snlgroup_changed'] = (d['snlgroup_id'] !=
                                             d['snlgroup_id_final'])
                else:
                    d['snl_final'] = d['snl']
                    d['snlgroup_id_final'] = d['snlgroup_id']
                    d['snlgroup_changed'] = False

        # custom processing for detecting errors
        new_style = os.path.exists(zpath(os.path.join(dir_name, 'FW.json')))
        vasp_signals = {}
        critical_errors = [
            "INPUTS_DONT_EXIST", "OUTPUTS_DONT_EXIST", "INCOHERENT_POTCARS",
            "VASP_HASNT_STARTED", "VASP_HASNT_COMPLETED", "CHARGE_UNCONVERGED",
            "NETWORK_QUIESCED", "HARD_KILLED", "WALLTIME_EXCEEDED",
            "ATOMS_TOO_CLOSE", "DISK_SPACE_EXCEEDED", "NO_RELAX2",
            "POSITIVE_ENERGY"
        ]

        last_relax_dir = dir_name

        if not new_style:
            # get the last relaxation dir
            # the order is relax2, current dir, then relax1. This is because
            # after completing relax1, the job happens in the current dir.
            # Finally, it gets moved to relax2.
            # There are some weird cases where both the current dir and relax2
            # contain data. The relax2 is good, but the current dir is bad.
            if is_valid_vasp_dir(os.path.join(dir_name, "relax2")):
                last_relax_dir = os.path.join(dir_name, "relax2")
            elif is_valid_vasp_dir(dir_name):
                pass
            elif is_valid_vasp_dir(os.path.join(dir_name, "relax1")):
                last_relax_dir = os.path.join(dir_name, "relax1")

        vasp_signals['last_relax_dir'] = last_relax_dir
        ## see what error signals are present

        print "getting signals for dir :{}".format(last_relax_dir)

        sl = SignalDetectorList()
        sl.append(VASPInputsExistSignal())
        sl.append(VASPOutputsExistSignal())
        sl.append(VASPOutSignal())
        sl.append(HitAMemberSignal())
        sl.append(SegFaultSignal())
        sl.append(VASPStartedCompletedSignal())

        if d['state'] == 'successful' and 'optimize structure' in d[
                'task_type']:
            sl.append(Relax2ExistsSignal())

        signals = sl.detect_all(last_relax_dir)

        signals = signals.union(WallTimeSignal().detect(dir_name))
        if not new_style:
            root_dir = os.path.dirname(dir_name)  # one level above dir_name
            signals = signals.union(WallTimeSignal().detect(root_dir))

        signals = signals.union(DiskSpaceExceededSignal().detect(dir_name))
        if not new_style:
            root_dir = os.path.dirname(dir_name)  # one level above dir_name
            signals = signals.union(DiskSpaceExceededSignal().detect(root_dir))

        if d.get('output', {}).get('final_energy', None) > 0:
            signals.add('POSITIVE_ENERGY')

        signals = list(signals)

        critical_signals = [val for val in signals if val in critical_errors]

        vasp_signals['signals'] = signals
        vasp_signals['critical_signals'] = critical_signals

        vasp_signals['num_signals'] = len(signals)
        vasp_signals['num_critical'] = len(critical_signals)

        if len(critical_signals) > 0 and d['state'] == "successful":
            d["state"] = "error"

        d['analysis'] = d.get('analysis', {})
        d['analysis']['errors_MP'] = vasp_signals
示例#27
0
    def process_fw(self, dir_name, d):
        d["task_id_deprecated"] = int(d["task_id"].split('-')[-1])  # useful for WC and AJ

        # update the run fields to give species group in root, if exists
        for r in d['run_tags']:
            if "species_group=" in r:
                d["species_group"] = int(r.split("=")[-1])
                break

        # custom Materials Project post-processing for FireWorks
        with zopen(zpath(os.path.join(dir_name, 'FW.json'))) as f:
            fw_dict = json.load(f)
            d['fw_id'] = fw_dict['fw_id']
            d['snl'] = fw_dict['spec']['mpsnl']
            d['snlgroup_id'] = fw_dict['spec']['snlgroup_id']
            d['vaspinputset_name'] = fw_dict['spec'].get('vaspinputset_name')
            d['task_type'] = fw_dict['spec']['task_type']
            # Process data for deformed structures
            if 'deformed' in d['task_type']:
                d['deformation_matrix'] = fw_dict['spec']['deformation_matrix']
                d['original_task_id'] = fw_dict['spec']['original_task_id']
            if not self.update_duplicates:
                if 'optimize structure' in d['task_type'] and 'output' in d:
                    # create a new SNL based on optimized structure
                    new_s = Structure.from_dict(d['output']['crystal'])
                    old_snl = StructureNL.from_dict(d['snl'])
                    history = old_snl.history
                    history.append(
                        {'name': 'Materials Project structure optimization',
                         'url': 'http://www.materialsproject.org',
                         'description': {'task_type': d['task_type'],
                                         'fw_id': d['fw_id'],
                                         'task_id': d['task_id']}})
                    new_snl = StructureNL(new_s, old_snl.authors, old_snl.projects,
                                          old_snl.references, old_snl.remarks,
                                          old_snl.data, history)

                    # enter new SNL into SNL db
                    # get the SNL mongo adapter
                    sma = SNLMongoAdapter.auto_load()

                    # add snl
                    mpsnl, snlgroup_id, spec_group = sma.add_snl(new_snl, snlgroup_guess=d['snlgroup_id'])
                    d['snl_final'] = mpsnl.as_dict()
                    d['snlgroup_id_final'] = snlgroup_id
                    d['snlgroup_changed'] = (d['snlgroup_id'] !=
                                             d['snlgroup_id_final'])
                else:
                    d['snl_final'] = d['snl']
                    d['snlgroup_id_final'] = d['snlgroup_id']
                    d['snlgroup_changed'] = False

        # custom processing for detecting errors
        new_style = os.path.exists(zpath(os.path.join(dir_name, 'FW.json')))
        vasp_signals = {}
        critical_errors = ["INPUTS_DONT_EXIST",
                           "OUTPUTS_DONT_EXIST", "INCOHERENT_POTCARS",
                           "VASP_HASNT_STARTED", "VASP_HASNT_COMPLETED",
                           "CHARGE_UNCONVERGED", "NETWORK_QUIESCED",
                           "HARD_KILLED", "WALLTIME_EXCEEDED",
                           "ATOMS_TOO_CLOSE", "DISK_SPACE_EXCEEDED", "NO_RELAX2", "POSITIVE_ENERGY"]

        last_relax_dir = dir_name

        if not new_style:
            # get the last relaxation dir
            # the order is relax2, current dir, then relax1. This is because
            # after completing relax1, the job happens in the current dir.
            # Finally, it gets moved to relax2.
            # There are some weird cases where both the current dir and relax2
            # contain data. The relax2 is good, but the current dir is bad.
            if is_valid_vasp_dir(os.path.join(dir_name, "relax2")):
                last_relax_dir = os.path.join(dir_name, "relax2")
            elif is_valid_vasp_dir(dir_name):
                pass
            elif is_valid_vasp_dir(os.path.join(dir_name, "relax1")):
                last_relax_dir = os.path.join(dir_name, "relax1")

        vasp_signals['last_relax_dir'] = last_relax_dir
        ## see what error signals are present

        print "getting signals for dir :{}".format(last_relax_dir)

        sl = SignalDetectorList()
        sl.append(VASPInputsExistSignal())
        sl.append(VASPOutputsExistSignal())
        sl.append(VASPOutSignal())
        sl.append(HitAMemberSignal())
        sl.append(SegFaultSignal())
        sl.append(VASPStartedCompletedSignal())

        if d['state'] == 'successful' and 'optimize structure' in d['task_type']:
            sl.append(Relax2ExistsSignal())

        signals = sl.detect_all(last_relax_dir)

        signals = signals.union(WallTimeSignal().detect(dir_name))
        if not new_style:
            root_dir = os.path.dirname(dir_name)  # one level above dir_name
            signals = signals.union(WallTimeSignal().detect(root_dir))

        signals = signals.union(DiskSpaceExceededSignal().detect(dir_name))
        if not new_style:
            root_dir = os.path.dirname(dir_name)  # one level above dir_name
            signals = signals.union(DiskSpaceExceededSignal().detect(root_dir))

        if d.get('output',{}).get('final_energy', None) > 0:
            signals.add('POSITIVE_ENERGY')

        signals = list(signals)

        critical_signals = [val for val in signals if val in critical_errors]

        vasp_signals['signals'] = signals
        vasp_signals['critical_signals'] = critical_signals

        vasp_signals['num_signals'] = len(signals)
        vasp_signals['num_critical'] = len(critical_signals)

        if len(critical_signals) > 0 and d['state'] == "successful":
            d["state"] = "error"

        d['analysis'] = d.get('analysis', {})
        d['analysis']['errors_MP'] = vasp_signals
import time, yaml, sys
from fireworks.core.launchpad import LaunchPad
from fireworks.core.firework import Firework, Workflow
from mpworks.firetasks.controller_tasks import AddEStructureTask
from fireworks.utilities.fw_utilities import get_slug
from mpworks.snl_utils.snl_mongo import SNLMongoAdapter
from pymongo import MongoClient
from collections import Counter
from datetime import datetime
from fnmatch import fnmatch

# DONE manually: "mp-987" -> fw_id: 119629

lpdb = LaunchPad.from_file('/global/homes/m/matcomp/mp_prod/config/config_Mendel/my_launchpad.yaml')
spec = {'task_type': 'Controller: add Electronic Structure v2', '_priority': 100000}
sma = SNLMongoAdapter.from_file('/global/homes/m/matcomp/mp_prod/config/dbs/snl_db.yaml')
with open('/global/homes/m/matcomp/mp_prod/materials_db_prod.yaml') as f:
    creds = yaml.load(f)
client = MongoClient(creds['host'], creds['port'])
db = client[creds['db']]
db.authenticate(creds['username'], creds['password'])
materials = db['materials']
tasks = db['tasks']

def append_wf(fw_id, parent_fw_id=None):
    wf = lpdb.workflows.find_one({'nodes':fw_id}, {'parent_links':1,'links':1,'name':1})
    try:
        if parent_fw_id is None:
            parent_fw_id = wf['parent_links'][str(fw_id)][-1]
        # non-defused AddEStructureTask v2 already in children?
        for child_fw_id in wf['links'][str(parent_fw_id)]:
示例#29
0
def fix():

    # initialize databases
    module_dir = os.path.dirname(os.path.abspath(__file__))

    snl_f = os.path.join(module_dir, 'snl.yaml')
    snldb = SNLMongoAdapter.from_file(snl_f)
    snl = snldb.snl
    snlgroups = snldb.snlgroups

    tasks_f = os.path.join(module_dir, 'tasks.yaml')
    with open(tasks_f) as f2:
        task_creds = yaml.load(f2)

    mc = MongoClient(task_creds['host'], task_creds['port'])
    db = mc[task_creds['database']]
    db.authenticate(task_creds['admin_user'], task_creds['admin_password'])
    tasks = db['tasks']

    tasks_f = os.path.join(module_dir, 'tasks.yaml')
    with open(tasks_f) as f2:
        task_creds = yaml.load(f2)

    mc = MongoClient(task_creds['host'], task_creds['port'])
    db = mc[task_creds['database']]
    db.authenticate(task_creds['admin_user'], task_creds['admin_password'])
    tasks = db['tasks']

    lp_f = os.path.join(module_dir, 'my_launchpad.yaml')
    lpdb = LaunchPad.from_file(lp_f)
    fws = lpdb.fireworks
    launches = lpdb.launches

    sb_f = os.path.join(module_dir, 'submission.yaml')
    sbdb = SubmissionMongoAdapter.from_file(sb_f)
    submissions = sbdb.jobs

    bad_crystal_ids = []

    crystals_file = os.path.join(module_dir, 'bad_crystals.txt')
    with open(crystals_file) as f:
        for line in f:
            bad_crystal_ids.append(int(line.strip()))


    for c_id in bad_crystal_ids:
        if c_id == 100892 or c_id == 100202:
            print 'SKIP'

        else:
            # FIX SNL
            for s in snl.find({'about._materialsproject.deprecated.crystal_id_deprecated': c_id}, {'snl_id': 1}):
                snl.update({'snl_id': s['snl_id']}, {'$pushAll': {"about.remarks": ['DEPRECATED', 'SEVERE BUG IN ICSD CONVERSION']}})

            # FIX SNLGROUPS
            for s in snlgroups.find({'canonical_snl.about._materialsproject.deprecated.crystal_id_deprecated': c_id}, {'snlgroup_id': 1}):
                snlgroups.update({'snlgroup_id': s['snlgroup_id']}, {'$pushAll': {"canonical_snl.about.remarks": ['DEPRECATED', 'SEVERE BUG IN ICSD CONVERSION']}})

            # FIX FWs pt 1
            for s in fws.find({'spec.mpsnl.about._materialsproject.deprecated.crystal_id_deprecated': c_id}, {'fw_id': 1}):
                fws.update({'fw_id': s['fw_id']}, {'$pushAll': {"spec.mpsnl.about.remarks": ['DEPRECATED', 'SEVERE BUG IN ICSD CONVERSION']}})

            # FIX FWs pt 2
            for s in fws.find({'spec.force_mpsnl.about._materialsproject.deprecated.crystal_id_deprecated': c_id}, {'fw_id': 1}):
                fws.update({'fw_id': s['fw_id']}, {'$pushAll': {"spec.force_mpsnl.about.remarks": ['DEPRECATED', 'SEVERE BUG IN ICSD CONVERSION']}})

            # FIX Launches
            for s in launches.find({'action.update_spec.mpsnl.about._materialsproject.deprecated.crystal_id_deprecated': c_id}, {'launch_id': 1}):
                launches.update({'launch_id': s['launch_id']}, {'$pushAll': {"action.update_spec.mpsnl.about.remarks": ['DEPRECATED', 'SEVERE BUG IN ICSD CONVERSION']}})

            # FIX TASKS
            for s in tasks.find({'snl.about._materialsproject.deprecated.crystal_id_deprecated': c_id}, {'task_id': 1}):
                tasks.update({'task_id': s['task_id']}, {'$pushAll': {"snl.about.remarks": ['DEPRECATED', 'SEVERE BUG IN ICSD CONVERSION']}})
                tasks.update({'task_id': s['task_id']}, {'$pushAll': {"snl_final.about.remarks": ['DEPRECATED', 'SEVERE BUG IN ICSD CONVERSION']}})

            # FIX SUBMISSIONS
            for s in submissions.find({'about._materialsproject.deprecated.crystal_id_deprecated': c_id}, {'submission_id': 1}):
                submissions.update({'submission_id': s['submission_id']}, {'$pushAll': {"about.remarks": ['DEPRECATED', 'SEVERE BUG IN ICSD CONVERSION']}})

            print 'FIXED', c_id
示例#30
0
import os
import plotly.plotly as py
from pandas import DataFrame
from mpworks.snl_utils.snl_mongo import SNLMongoAdapter

sma = SNLMongoAdapter.auto_load()
sma2 = SNLMongoAdapter.from_file(
    os.path.join(os.environ['DB_LOC'], 'materials_db.yaml')
)


def _get_snlgroup_id(snl_id):
    return sma.snlgroups.find_one(
        {'all_snl_ids': int(snl_id)},
        {'snlgroup_id': 1, '_id': 0}
    )['snlgroup_id']

def _get_mp_id(snlgroup_id):
    mat = sma2.database.materials.find_one(
        {'snlgroup_id_final': snlgroup_id},
        {'_id': 0, 'task_id': 1}
    )
    if mat is not None:
        return mat['task_id']
    return 'not found'

def _get_mp_link(mp_id):
    if mp_id == 'not found': return mp_id
    url = 'link:$$https://materialsproject.org/materials/'
    url += mp_id
    url += '$$[%s]' % mp_id