def clear_env(): sma = SubmissionMongoAdapter.auto_load() lp = LaunchPad.auto_load() snl = SNLMongoAdapter.auto_load() db_dir = os.environ['DB_LOC'] db_path = os.path.join(db_dir, 'tasks_db.json') with open(db_path) as f: db_creds = json.load(f) sma._reset() lp.reset('', require_password=False) snl._reset() conn = MongoClient(db_creds['host'], db_creds['port']) db = conn[db_creds['database']] db.authenticate(db_creds['admin_user'], db_creds['admin_password']) db.tasks.remove() db.boltztrap.remove() db.counter.remove() db['dos_fs.chunks'].remove() db['dos_fs.files'].remove() db['band_structure_fs.files'].remove() db['band_structure_fs.files'].remove()
def run_task(self, fw_spec): sma = SNLMongoAdapter.auto_load() snl = fw_spec['snl'] mpsnl, snlgroup_id, spec_group = sma.add_snl(snl) mod_spec = [{"_push": {"run_tags": "species_group={}".format(spec_group)}}] if spec_group else None return FWAction(update_spec={'mpsnl': mpsnl.as_dict(), 'snlgroup_id': snlgroup_id}, mod_spec=mod_spec)
def submit_all_snl(min=None, max=None): constraints = { 'is_ordered': True, 'is_valid': True, 'nsites': { '$lte': 200 }, 'canonical_snl.about.projects': { '$ne': 'CederDahn Challenge' } } constraints['elements'] = {'$nin': NO_POTCARS} constraints['canonical_snl.about.history.name'] = { "$ne": "Materials Project structure optimization" } constraints['canonical_snl.about.remarks'] = {"$ne": "DEPRECATED"} if min and max: constraints['snlgroup_id'] = {'$gte': min, '$lte': max} elif min or max: raise ValueError('Must specify both min AND max if you specify one') snldb = SNLMongoAdapter.auto_load() sma = SubmissionMongoAdapter.auto_load() for result in snldb.snlgroups.find(constraints, { 'canonical_snl': 1, 'snlgroup_id': 1 }): snl = MPStructureNL.from_dict(result['canonical_snl']) parameters = {'snlgroup_id': result['snlgroup_id']} sma.submit_snl(snl, 'Anubhav Jain <*****@*****.**>', parameters=parameters)
def run_task(self, fw_spec): # get the SNL mongo adapter sma = SNLMongoAdapter.auto_load() # get the SNL snl = StructureNL.from_dict(fw_spec['snl']) # add snl mpsnl, snlgroup_id = sma.add_snl(snl) return FWAction(update_spec={'mpsnl': mpsnl.to_dict, 'snlgroup_id': snlgroup_id})
def run_task(self, fw_spec): # pass-through option for when we start with an mpsnl and don't actually want to add if 'force_mpsnl' in fw_spec and 'force_snlgroup_id' in fw_spec: print 'USING FORCED MPSNL' return FWAction(update_spec={'mpsnl': fw_spec['force_mpsnl'], 'snlgroup_id': fw_spec['force_snlgroup_id']}) sma = SNLMongoAdapter.auto_load() snl = StructureNL.from_dict(fw_spec['snl']) mpsnl, snlgroup_id = sma.add_snl(snl) return FWAction(update_spec={'mpsnl': mpsnl.to_dict, 'snlgroup_id': snlgroup_id})
def setup(cls): module_dir = os.path.dirname(os.path.abspath(__file__)) snl_f = os.path.join(module_dir, 'snl.yaml') cls.snldb = SNLMongoAdapter.from_file(snl_f) tasks_f = os.path.join(module_dir, 'materials.yaml') with open(tasks_f) as f2: task_creds = yaml.load(f2) mc = MongoClient(task_creds['host'], task_creds['port']) db = mc[task_creds['database']] db.authenticate(task_creds['admin_user'], task_creds['admin_password']) cls.materials = db[task_creds['collection']]
def run_task(self, fw_spec): sma = SNLMongoAdapter.auto_load() snl = StructureNL.from_dict(fw_spec['snl']) mpsnl, snlgroup_id, spec_group = sma.add_snl(snl) mod_spec = [{ "_push": { "run_tags": "species_group={}".format(spec_group) } }] if spec_group else None return FWAction(update_spec={ 'mpsnl': mpsnl.to_dict, 'snlgroup_id': snlgroup_id }, mod_spec=mod_spec)
def submit_all_snl(min=None, max=None): constraints = {'is_ordered': True, 'is_valid': True, 'nsites': {'$lte': 200}, 'canonical_snl.about.projects': {'$ne': 'CederDahn Challenge'}} constraints['elements'] = {'$nin': NO_POTCARS} constraints['canonical_snl.about.history.name'] = {"$ne":"Materials Project structure optimization"} constraints['canonical_snl.about.remarks'] = {"$ne": "DEPRECATED"} if min and max: constraints['snlgroup_id'] = {'$gte': min, '$lte': max} elif min or max: raise ValueError('Must specify both min AND max if you specify one') snldb = SNLMongoAdapter.auto_load() sma = SubmissionMongoAdapter.auto_load() for result in snldb.snlgroups.find(constraints, {'canonical_snl': 1, 'snlgroup_id': 1}): snl = MPStructureNL.from_dict(result['canonical_snl']) parameters = {'snlgroup_id': result['snlgroup_id']} sma.submit_snl(snl, 'Anubhav Jain <*****@*****.**>', parameters=parameters)
def run_task(self, fw_spec): # pass-through option for when we start with an mpsnl and don't actually want to add if 'force_mpsnl' in fw_spec and 'force_snlgroup_id' in fw_spec: print 'USING FORCED MPSNL' return FWAction( update_spec={ 'mpsnl': fw_spec['force_mpsnl'], 'snlgroup_id': fw_spec['force_snlgroup_id'] }) sma = SNLMongoAdapter.auto_load() snl = StructureNL.from_dict(fw_spec['snl']) mpsnl, snlgroup_id = sma.add_snl(snl) return FWAction(update_spec={ 'mpsnl': mpsnl.to_dict, 'snlgroup_id': snlgroup_id })
def archive_deprecated_fws(): # find all snlgroups that are deprecated, and archive all WFs that have deprecated fw_ids so we don't run them module_dir = os.path.dirname(os.path.abspath(__file__)) snl_f = os.path.join(module_dir, 'snl.yaml') snldb = SNLMongoAdapter.from_file(snl_f) snlgroups = snldb.snlgroups lp_f = os.path.join(module_dir, 'my_launchpad.yaml') lpdb = LaunchPad.from_file(lp_f) for g in snlgroups.find({'canonical_snl.about.remarks':'DEPRECATED'}, {'snlgroup_id': 1}): while lpdb.fireworks.find_one({'spec.snlgroup_id': g['snlgroup_id'], 'state': {'$ne': 'ARCHIVED'}}, {'fw_id': 1}): fw = lpdb.fireworks.find_one({'spec.snlgroup_id': g['snlgroup_id'], 'state': {'$ne': 'ARCHIVED'}}, {'fw_id': 1}) print fw['fw_id'] lpdb.archive_wf(fw['fw_id']) print 'DONE'
def find_alternate_canonical(): # see if we can replace a deprecated canonical SNL with a non-deprecated one module_dir = os.path.dirname(os.path.abspath(__file__)) snl_f = os.path.join(module_dir, 'snl.yaml') snldb = SNLMongoAdapter.from_file(snl_f) snl = snldb.snl snlgroups = snldb.snlgroups for g in snlgroups.find({"canonical_snl.about.remarks":"DEPRECATED"}, {"snlgroup_id": 1, "all_snl_ids": 1}): for s in snl.find({"snl_id": {"$in": g['all_snl_ids']}, "about.remarks": {"$ne": "DEPRECATED"}}): canonical_mpsnl = MPStructureNL.from_dict(s) snldb.switch_canonical_snl(g['snlgroup_id'], canonical_mpsnl) print g['snlgroup_id'] break print 'DONE'
def get_colls(): colls = namedtuple('Collections', ['snl', 'snlgroups']) sma = SNLMongoAdapter.from_file(snl_f) lp = LaunchPad.from_file(fw_f) colls.snl = sma.snl colls.snlgroups = sma.snlgroups colls.fireworks = lp.fireworks colls.launches = lp.launches with open(tasks_f) as f2: task_creds = yaml.load(f2) mc = MongoClient(task_creds['host'], task_creds['port']) db = mc[task_creds['database']] db.authenticate(task_creds['admin_user'], task_creds['admin_password']) colls.tasks = db['tasks'] return colls
def resubmit(self, submission_id, snl_db=None): # see if an SNL object has already been created if not snl_db: snl_db = SNLMongoAdapter.auto_load() mpsnl = None snlgroup_id = None snl_dict = snl_db.snl.find_one({"about._materialsproject.submission_id": submission_id}) if snl_dict: mpsnl = MPStructureNL.from_dict(snl_dict) snlgroup_id = snl_db.snlgroups.find_one({"all_snl_ids": snl_dict['snl_id']}, {"snlgroup_id":1})['snlgroup_id'] # Now reset the current submission parameters updates = {'state': 'SUBMITTED', 'state_details': {}, 'task_dict': {}} if mpsnl: updates['parameters'] = self.jobs.find_one({'submission_id': submission_id}, {'parameters': 1})['parameters'] updates['parameters'].update({"mpsnl": mpsnl.as_dict(), "snlgroup_id": snlgroup_id}) self.jobs.find_and_modify({'submission_id': submission_id}, {'$set': updates})
def resubmit(self, submission_id, snl_db=None): # see if an SNL object has already been created if not snl_db: snl_db = SNLMongoAdapter.auto_load() mpsnl = None snlgroup_id = None snl_dict = snl_db.snl.find_one({"about._materialsproject.submission_id": submission_id}) if snl_dict: mpsnl = MPStructureNL.from_dict(snl_dict) snlgroup_id = snl_db.snlgroups.find_one({"all_snl_ids": snl_dict['snl_id']}, {"snlgroup_id":1})['snlgroup_id'] # Now reset the current submission parameters updates = {'state': 'SUBMITTED', 'state_details': {}, 'task_dict': {}} if mpsnl: updates['parameters'] = self.jobs.find_one({'submission_id': submission_id}, {'parameters': 1})['parameters'] updates['parameters'].update({"mpsnl": mpsnl.to_dict, "snlgroup_id": snlgroup_id}) self.jobs.find_and_modify({'submission_id': submission_id}, {'$set': updates})
def detect(): module_dir = os.path.dirname(os.path.abspath(__file__)) snl_f = os.path.join(module_dir, 'snl.yaml') snldb = SNLMongoAdapter.from_file(snl_f) snl = snldb.snl snlgroups = snldb.snlgroups q = {"about._icsd.icsd_id":{"$exists":True}} # icsd strctures q["about._icsd.coll_code"] = {"$exists":False} # old ICSD structure q["about.history.description.fw_id"] = {"$exists":False} # non structure relaxations for old_s in snl.find(q, {"snl_id": 1, 'about._icsd.icsd_id': 1, 'about._materialsproject.deprecated.crystal_id_deprecated': 1}): icsd_id = old_s['about']['_icsd']['icsd_id'] crystal_id = old_s['about']['_materialsproject']['deprecated']['crystal_id_deprecated'] new_s = snl.find_one({"about._icsd.icsd_id":icsd_id, "about._icsd.coll_code":{"$exists":True}}, {"snl_id": 1}) if new_s: n_groups = snlgroups.find({"all_snl_ids":{"$in":[old_s['snl_id'], new_s['snl_id']]}}).count() if n_groups != 1: # The crystal_id is bad print crystal_id
from collections import Counter from datetime import datetime from fnmatch import fnmatch from custodian.vasp.handlers import VaspErrorHandler cwd = os.getcwd() # DONE manually: "mp-987" -> fw_id: 119629 lpdb = LaunchPad.from_file( '/global/homes/m/matcomp/mp_prod/config/config_Mendel/my_launchpad.yaml') spec = { 'task_type': 'Controller: add Electronic Structure v2', '_priority': 100000 } sma = SNLMongoAdapter.from_file( '/global/homes/m/matcomp/mp_prod/config/dbs/snl_db.yaml') with open('/global/homes/m/matcomp/mp_prod/materials_db_prod.yaml') as f: creds = yaml.load(f) client = MongoClient(creds['host'], creds['port']) db = client[creds['db']] db.authenticate(creds['username'], creds['password']) materials = db['materials'] tasks = db['tasks'] print materials.count() def append_wf(fw_id, parent_fw_id=None): wf = lpdb.workflows.find_one({'nodes': fw_id}, { 'parent_links': 1, 'links': 1, 'name': 1
def process_fw(self, old_task, d): # AJ - this whole section is different sma = SNLMongoAdapter.auto_load() d['old_engine'] = old_task.get('engine') if 'fw_id' in old_task: d['old_fw_id'] = old_task['fw_id'] d['fw_id'] = None d['task_type'] = 'GGA+U optimize structure (2x)' if old_task[ 'is_hubbard'] else 'GGA optimize structure (2x)' d['submission_id'] = None d['vaspinputset_name'] = None snl_d = sma.snl.find_one({'about._materialsproject.deprecated.mps_ids': old_task['mps_id']}) if old_task.get('mps_id', -1) > 0 and snl_d: # grab the SNL from the SNL db del snl_d['_id'] d['snl'] = snl_d d['snlgroup_id'] = sma.snlgroups.find_one({'all_snl_ids': d['snl']['snl_id']}, {'snlgroup_id': 1})['snlgroup_id'] elif 'mps' in old_task and old_task['mps']: snl = mps_dict_to_snl(old_task['mps']) mpsnl, snlgroup_id = sma.add_snl(snl) d['snl'] = mpsnl.to_dict d['snlgroup_id'] = snlgroup_id else: s = Structure.from_dict(old_task['input']['crystal']) snl = StructureNL(s, 'Anubhav Jain <*****@*****.**>', remarks=['origin unknown']) mpsnl, snlgroup_id = sma.add_snl(snl) d['snl'] = mpsnl.to_dict d['snlgroup_id'] = snlgroup_id if 'optimize structure' in d['task_type'] and 'output' in d: # create a new SNL based on optimized structure new_s = Structure.from_dict(d['output']['crystal']) old_snl = StructureNL.from_dict(d['snl']) history = old_snl.history history.append( {'name': 'Materials Project structure optimization', 'url': 'http://www.materialsproject.org', 'description': {'task_type': d['task_type'], 'fw_id': d['fw_id'], 'task_id': d['task_id']}}) new_snl = StructureNL(new_s, old_snl.authors, old_snl.projects, old_snl.references, old_snl.remarks, old_snl.data, history) # add snl mpsnl, snlgroup_id = sma.add_snl(new_snl, snlgroup_guess=d['snlgroup_id']) d['snl_final'] = mpsnl.to_dict d['snlgroup_id_final'] = snlgroup_id d['snlgroup_changed'] = (d['snlgroup_id'] != d['snlgroup_id_final']) # custom processing for detecting errors dir_name = old_task['dir_name'] new_style = os.path.exists(os.path.join(dir_name, 'FW.json')) vasp_signals = {} critical_errors = ["INPUTS_DONT_EXIST", "OUTPUTS_DONT_EXIST", "INCOHERENT_POTCARS", "VASP_HASNT_STARTED", "VASP_HASNT_COMPLETED", "CHARGE_UNCONVERGED", "NETWORK_QUIESCED", "HARD_KILLED", "WALLTIME_EXCEEDED", "ATOMS_TOO_CLOSE", "DISK_SPACE_EXCEEDED"] last_relax_dir = dir_name if not new_style: # get the last relaxation dir # the order is relax2, current dir, then relax1. This is because # after completing relax1, the job happens in the current dir. # Finally, it gets moved to relax2. # There are some weird cases where both the current dir and relax2 # contain data. The relax2 is good, but the current dir is bad. if is_valid_vasp_dir(os.path.join(dir_name, "relax2")): last_relax_dir = os.path.join(dir_name, "relax2") elif is_valid_vasp_dir(dir_name): pass elif is_valid_vasp_dir(os.path.join(dir_name, "relax1")): last_relax_dir = os.path.join(dir_name, "relax1") vasp_signals['last_relax_dir'] = last_relax_dir ## see what error signals are present print "getting signals for dir :{}".format(last_relax_dir) sl = SignalDetectorList() sl.append(VASPInputsExistSignal()) sl.append(VASPOutputsExistSignal()) sl.append(VASPOutSignal()) sl.append(HitAMemberSignal()) sl.append(SegFaultSignal()) sl.append(VASPStartedCompletedSignal()) signals = sl.detect_all(last_relax_dir) signals = signals.union(WallTimeSignal().detect(dir_name)) if not new_style: root_dir = os.path.dirname(dir_name) # one level above dir_name signals = signals.union(WallTimeSignal().detect(root_dir)) signals = signals.union(DiskSpaceExceededSignal().detect(dir_name)) if not new_style: root_dir = os.path.dirname(dir_name) # one level above dir_name signals = signals.union(DiskSpaceExceededSignal().detect(root_dir)) signals = list(signals) critical_signals = [val for val in signals if val in critical_errors] vasp_signals['signals'] = signals vasp_signals['critical_signals'] = critical_signals vasp_signals['num_signals'] = len(signals) vasp_signals['num_critical'] = len(critical_signals) if len(critical_signals) > 0 and d['state'] == "successful": d["state"] = "error" d['analysis'] = d.get('analysis', {}) d['analysis']['errors_MP'] = vasp_signals d['run_tags'] = ['PBE'] d['run_tags'].extend(d['pseudo_potential']['labels']) d['run_tags'].extend([e+"="+str(d['hubbards'].get(e, 0)) for e in d['elements']])
def analyze(args): """analyze data at any point for a copy of the streaming figure""" # NOTE: make copy online first with suffix _%Y-%m-%d and note figure id fig = py.get_figure(creds['username'], args.fig_id) if args.t: if args.fig_id == 42: label_entries = filter(None, '<br>'.join(fig['data'][2]['text']).split('<br>')) pairs = map(make_tuple, label_entries) grps = set(chain.from_iterable(pairs)) snlgrp_cursor = sma.snlgroups.aggregate([ { '$match': { 'snlgroup_id': { '$in': list(grps) }, 'canonical_snl.about.projects': {'$ne': 'CederDahn Challenge'} } }, { '$project': { 'snlgroup_id': 1, 'canonical_snl.snlgroup_key': 1, '_id': 0 } } ], cursor={}) snlgroup_keys = {} for d in snlgrp_cursor: snlgroup_keys[d['snlgroup_id']] = d['canonical_snl']['snlgroup_key'] print snlgroup_keys[40890] sma2 = SNLMongoAdapter.from_file( os.path.join(os.environ['DB_LOC'], 'materials_db.yaml') ) materials_cursor = sma2.database.materials.aggregate([ { '$match': { 'snlgroup_id_final': { '$in': list(grps) }, 'snl_final.about.projects': {'$ne': 'CederDahn Challenge'} } }, { '$project': { 'snlgroup_id_final': 1, '_id': 0, 'task_id': 1, 'final_energy_per_atom': 1, 'band_gap.search_gap.band_gap': 1, 'volume': 1, 'nsites': 1 }} ], cursor={}) snlgroup_data = {} for material in materials_cursor: snlgroup_id = material['snlgroup_id_final'] final_energy_per_atom = material['final_energy_per_atom'] band_gap = material['band_gap']['search_gap']['band_gap'] volume_per_atom = material['volume'] / material['nsites'] snlgroup_data[snlgroup_id] = { 'final_energy_per_atom': final_energy_per_atom, 'band_gap': band_gap, 'task_id': material['task_id'], 'volume_per_atom': volume_per_atom } print snlgroup_data[40890] filestem = 'mpworks/check_snl/results/bad_snlgroups_2_' with open(filestem+'in_matdb.csv', 'wb') as f, \ open(filestem+'notin_matdb.csv', 'wb') as g: writer1, writer2 = csv.writer(f), csv.writer(g) header = [ 'category', 'composition', 'snlgroup_id 1', 'sg_num 1', 'task_id 1', 'snlgroup_id 2', 'sg_num 2', 'task_id 2', 'delta_energy', 'delta_bandgap', 'delta_volume_per_atom', 'rms_dist', 'scenario' ] writer1.writerow(header) writer2.writerow(header) for primary_id, secondary_id in pairs: if primary_id not in snlgroup_keys or \ secondary_id not in snlgroup_keys: continue composition, primary_sg_num = snlgroup_keys[primary_id].split('--') secondary_sg_num = snlgroup_keys[secondary_id].split('--')[1] category = 'same SGs' if primary_sg_num == secondary_sg_num else 'diff. SGs' if primary_id not in snlgroup_data or secondary_id not in snlgroup_data: delta_energy, delta_bandgap, delta_volume_per_atom = '', '', '' else: delta_energy = "{0:.3g}".format(abs( snlgroup_data[primary_id]['final_energy_per_atom'] - \ snlgroup_data[secondary_id]['final_energy_per_atom'] )) delta_bandgap = "{0:.3g}".format(abs( snlgroup_data[primary_id]['band_gap'] - \ snlgroup_data[secondary_id]['band_gap'] )) delta_volume_per_atom = "{0:.3g}".format(abs( snlgroup_data[primary_id]['volume_per_atom'] - \ snlgroup_data[secondary_id]['volume_per_atom'] )) scenario, rms_dist_str = '', '' if category == 'diff. SGs' and delta_energy and delta_bandgap: scenario = 'different' if ( float(delta_energy) > 0.01 or float(delta_bandgap) > 0.1 ) else 'similar' snlgrp1_dict = sma.snlgroups.find_one({ "snlgroup_id": primary_id }) snlgrp2_dict = sma.snlgroups.find_one({ "snlgroup_id": secondary_id }) snlgrp1 = SNLGroup.from_dict(snlgrp1_dict) snlgrp2 = SNLGroup.from_dict(snlgrp2_dict) primary_structure = snlgrp1.canonical_structure secondary_structure = snlgrp2.canonical_structure rms_dist = matcher.get_rms_dist(primary_structure, secondary_structure) if rms_dist is not None: rms_dist_str = "({0:.3g},{1:.3g})".format(*rms_dist) print rms_dist_str row = [ category, composition, primary_id, primary_sg_num, snlgroup_data[primary_id]['task_id'] \ if primary_id in snlgroup_data else '', secondary_id, secondary_sg_num, snlgroup_data[secondary_id]['task_id'] \ if secondary_id in snlgroup_data else '', delta_energy, delta_bandgap, delta_volume_per_atom, rms_dist_str, scenario ] if delta_energy and delta_bandgap: writer1.writerow(row) else: writer2.writerow(row) elif args.fig_id == 16: out_fig = Figure() badsnls_trace = Scatter(x=[], y=[], text=[], mode='markers', name='SG Changes') bisectrix = Scatter(x=[0,230], y=[0,230], mode='lines', name='bisectrix') print 'pulling bad snls from plotly ...' bad_snls = OrderedDict() for category, text in zip(fig['data'][2]['y'], fig['data'][2]['text']): for snl_id in map(int, text.split('<br>')): bad_snls[snl_id] = category with open('mpworks/check_snl/results/bad_snls.csv', 'wb') as f: print 'pulling bad snls from database ...' mpsnl_cursor = sma.snl.find({ 'snl_id': { '$in': bad_snls.keys() }, 'about.projects': {'$ne': 'CederDahn Challenge'} }) writer = csv.writer(f) writer.writerow([ 'snl_id', 'category', 'snlgroup_key', 'nsites', 'remarks', 'projects', 'authors' ]) print 'writing bad snls to file ...' for mpsnl_dict in mpsnl_cursor: mpsnl = MPStructureNL.from_dict(mpsnl_dict) row = [ mpsnl.snl_id, bad_snls[mpsnl.snl_id], mpsnl.snlgroup_key ] row += _get_snl_extra_info(mpsnl) writer.writerow(row) sg_num = mpsnl.snlgroup_key.split('--')[1] if (bad_snls[mpsnl.snl_id] == 'SG default' and sg_num != '-1') or \ bad_snls[mpsnl.snl_id] == 'SG change': mpsnl.structure.remove_oxidation_states() sf = SpacegroupAnalyzer(mpsnl.structure, symprec=0.1) badsnls_trace['x'].append(mpsnl.sg_num) badsnls_trace['y'].append(sf.get_spacegroup_number()) badsnls_trace['text'].append(mpsnl.snl_id) if bad_snls[mpsnl.snl_id] == 'SG default': print sg_num, sf.get_spacegroup_number() print 'plotting out-fig ...' out_fig['data'] = Data([bisectrix, badsnls_trace]) out_fig['layout'] = Layout( showlegend=False, hovermode='closest', title='Spacegroup Assignment Changes', xaxis=XAxis(showgrid=False, title='old SG number', range=[0,230]), yaxis=YAxis(showgrid=False, title='new SG number', range=[0,230]), ) filename = 'spacegroup_changes_' filename += datetime.datetime.now().strftime('%Y-%m-%d') py.plot(out_fig, filename=filename, auto_open=False) elif args.fig_id == 43: # SNLGroupMemberChecker matcher2 = StructureMatcher( ltol=0.2, stol=0.3, angle_tol=5, primitive_cell=False, scale=True, attempt_supercell=True, comparator=ElementComparator() ) print 'pulling data from plotly ...' trace = Scatter(x=[], y=[], text=[], mode='markers', name='mismatches') bad_snls = OrderedDict() # snlgroup_id : [ mismatching snl_ids ] for category, text in zip(fig['data'][2]['y'], fig['data'][2]['text']): if category != 'mismatch': continue for entry in text.split('<br>'): fields = entry.split(':') snlgroup_id = int(fields[0].split(',')[0]) print snlgroup_id snlgrp_dict = sma.snlgroups.find_one({ 'snlgroup_id': snlgroup_id }) snlgrp = SNLGroup.from_dict(snlgrp_dict) s1 = snlgrp.canonical_structure.get_primitive_structure() bad_snls[snlgroup_id] = [] for i, snl_id in enumerate(fields[1].split(',')): mpsnl_dict = sma.snl.find_one({ 'snl_id': int(snl_id) }) if 'CederDahn Challenge' in mpsnl_dict['about']['projects']: print 'skip CederDahn: %s' % snl_id continue mpsnl = MPStructureNL.from_dict(mpsnl_dict) s2 = mpsnl.structure.get_primitive_structure() is_match = matcher2.fit(s1, s2) if is_match: continue bad_snls[snlgroup_id].append(snl_id) trace['x'].append(snlgroup_id) trace['y'].append(i+1) trace['text'].append(snl_id) if len(bad_snls[snlgroup_id]) < 1: bad_snls.pop(snlgroup_id, None) with open('mpworks/check_snl/results/bad_snlgroups.csv', 'wb') as f: print 'pulling bad snlgroups from database ...' snlgroup_cursor = sma.snlgroups.find({ 'snlgroup_id': { '$in': bad_snls.keys() }, }) writer = csv.writer(f) writer.writerow(['snlgroup_id', 'snlgroup_key', 'mismatching snl_ids']) print 'writing bad snlgroups to file ...' for snlgroup_dict in snlgroup_cursor: snlgroup = SNLGroup.from_dict(snlgroup_dict) row = [ snlgroup.snlgroup_id, snlgroup.canonical_snl.snlgroup_key, ' '.join(bad_snls[snlgroup.snlgroup_id]) ] writer.writerow(row) print 'plotting out-fig ...' out_fig = Figure() out_fig['data'] = Data([trace]) out_fig['layout'] = Layout( showlegend=False, hovermode='closest', title='Member Mismatches of SNLGroup Canonicals', xaxis=XAxis(showgrid=False, title='snlgroup_id', showexponent='none'), yaxis=YAxis(showgrid=False, title='# mismatching SNLs'), ) filename = 'groupmember_mismatches_' filename += datetime.datetime.now().strftime('%Y-%m-%d') py.plot(out_fig, filename=filename, auto_open=False) else: errors = Counter() bad_snls = OrderedDict() bad_snlgroups = OrderedDict() for i,d in enumerate(fig['data']): if not isinstance(d, Scatter): continue if not 'x' in d or not 'y' in d or not 'text' in d: continue start_id = int(d['name'].split(' - ')[0][:-1])*1000 marker_colors = d['marker']['color'] if i < 2*num_snl_streams: # spacegroups errors += Counter(marker_colors) for idx,color in enumerate(marker_colors): snl_id = start_id + d['x'][idx] color_index = category_colors.index(color) category = categories[color_index] bad_snls[snl_id] = category else: # groupmembers for idx,color in enumerate(marker_colors): if color != category_colors[0]: continue snlgroup_id = start_id + d['x'][idx] mismatch_snl_id, canonical_snl_id = d['text'][idx].split(' != ') bad_snlgroups[snlgroup_id] = int(mismatch_snl_id) print errors fig_data = fig['data'][-1] fig_data['x'] = [ errors[color] for color in fig_data['marker']['color'] ] filename = _get_filename() print filename #py.plot(fig, filename=filename) with open('mpworks/check_snl/results/bad_snls.csv', 'wb') as f: mpsnl_cursor = sma.snl.find({ 'snl_id': { '$in': bad_snls.keys() } }) writer = csv.writer(f) writer.writerow([ 'snl_id', 'category', 'snlgroup_key', 'nsites', 'remarks', 'projects', 'authors' ]) for mpsnl_dict in mpsnl_cursor: mpsnl = MPStructureNL.from_dict(mpsnl_dict) row = [ mpsnl.snl_id, bad_snls[mpsnl.snl_id], mpsnl.snlgroup_key ] row += _get_snl_extra_info(mpsnl) writer.writerow(row) with open('mpworks/check_snl/results/bad_snlgroups.csv', 'wb') as f: snlgrp_cursor = sma.snlgroups.find({ 'snlgroup_id': { '$in': bad_snlgroups.keys() } }) first_mismatch_snls_cursor = sma.snl.find({ 'snl_id': { '$in': bad_snlgroups.values() } }) first_mismatch_snl_info = OrderedDict() for mpsnl_dict in first_mismatch_snls_cursor: mpsnl = MPStructureNL.from_dict(mpsnl_dict) first_mismatch_snl_info[mpsnl.snl_id] = _get_snl_extra_info(mpsnl) writer = csv.writer(f) writer.writerow([ 'snlgroup_id', 'snlgroup_key', 'canonical_snl_id', 'first_mismatching_snl_id', 'nsites', 'remarks', 'projects', 'authors' ]) for snlgrp_dict in snlgrp_cursor: snlgrp = SNLGroup.from_dict(snlgrp_dict) first_mismatch_snl_id = bad_snlgroups[snlgrp.snlgroup_id] row = [ snlgrp.snlgroup_id, snlgrp.canonical_snl.snlgroup_key, snlgrp.canonical_snl.snl_id, first_mismatch_snl_id ] row += [ ' & '.join(pair) if pair[0] != pair[1] else pair[0] for pair in zip( _get_snl_extra_info(snlgrp.canonical_snl), first_mismatch_snl_info[int(first_mismatch_snl_id)] ) ] writer.writerow(row)
import os import plotly.plotly as py from pandas import DataFrame from mpworks.snl_utils.snl_mongo import SNLMongoAdapter sma = SNLMongoAdapter.auto_load() sma2 = SNLMongoAdapter.from_file( os.path.join(os.environ['DB_LOC'], 'materials_db.yaml')) def _get_snlgroup_id(snl_id): return sma.snlgroups.find_one({'all_snl_ids': int(snl_id)}, { 'snlgroup_id': 1, '_id': 0 })['snlgroup_id'] def _get_mp_id(snlgroup_id): mat = sma2.database.materials.find_one({'snlgroup_id_final': snlgroup_id}, { '_id': 0, 'task_id': 1 }) if mat is not None: return mat['task_id'] return 'not found' def _get_mp_link(mp_id): if mp_id == 'not found': return mp_id url = 'link:$$https://materialsproject.org/materials/'
if __name__ == '__main__': module_dir = os.path.dirname(os.path.abspath(__file__)) automation_f = os.path.join(module_dir, 'automation.yaml') snl_f = os.path.join(module_dir, 'snl.yaml') with open(automation_f) as f: y = yaml.load(f) mc = MongoClient(y['host'], y['port']) db = mc[y['db']] db.authenticate(y['username'], y['password']) snldb = SNLMongoAdapter.from_file(snl_f) prev_ids = [] # MPS ids that we already took care of print 'INITIALIZING' if RESET: snldb._reset() time.sleep(10) # makes me sleep better at night else: for mps in snldb.snl.find({}, {"about._materialsproject.deprecated.mps_ids": 1}): prev_ids.extend(mps['about']['_materialsproject']['deprecated']['mps_ids']) print 'PROCESSING' for mps in db.mps.find(timeout=False): try:
def process_fw(self, old_task, d): # AJ - this whole section is different sma = SNLMongoAdapter.auto_load() d["old_engine"] = old_task.get("engine") if "fw_id" in old_task: d["old_fw_id"] = old_task["fw_id"] d["fw_id"] = None d["task_type"] = "GGA+U optimize structure (2x)" if old_task["is_hubbard"] else "GGA optimize structure (2x)" d["submission_id"] = None d["vaspinputset_name"] = None snl_d = sma.snl.find_one({"about._materialsproject.deprecated.mps_ids": old_task["mps_id"]}) if old_task.get("mps_id", -1) > 0 and snl_d: # grab the SNL from the SNL db del snl_d["_id"] d["snl"] = snl_d d["snlgroup_id"] = sma.snlgroups.find_one({"all_snl_ids": d["snl"]["snl_id"]}, {"snlgroup_id": 1})[ "snlgroup_id" ] elif "mps" in old_task and old_task["mps"]: snl = mps_dict_to_snl(old_task["mps"]) mpsnl, snlgroup_id = sma.add_snl(snl) d["snl"] = mpsnl.as_dict() d["snlgroup_id"] = snlgroup_id else: s = Structure.from_dict(old_task["input"]["crystal"]) snl = StructureNL(s, "Anubhav Jain <*****@*****.**>", remarks=["origin unknown"]) mpsnl, snlgroup_id = sma.add_snl(snl) d["snl"] = mpsnl.as_dict() d["snlgroup_id"] = snlgroup_id if "optimize structure" in d["task_type"] and "output" in d: # create a new SNL based on optimized structure new_s = Structure.from_dict(d["output"]["crystal"]) old_snl = StructureNL.from_dict(d["snl"]) history = old_snl.history history.append( { "name": "Materials Project structure optimization", "url": "http://www.materialsproject.org", "description": {"task_type": d["task_type"], "fw_id": d["fw_id"], "task_id": d["task_id"]}, } ) new_snl = StructureNL( new_s, old_snl.authors, old_snl.projects, old_snl.references, old_snl.remarks, old_snl.data, history ) # add snl mpsnl, snlgroup_id = sma.add_snl(new_snl, snlgroup_guess=d["snlgroup_id"]) d["snl_final"] = mpsnl.as_dict() d["snlgroup_id_final"] = snlgroup_id d["snlgroup_changed"] = d["snlgroup_id"] != d["snlgroup_id_final"] # custom processing for detecting errors dir_name = old_task["dir_name"] new_style = os.path.exists(os.path.join(dir_name, "FW.json")) vasp_signals = {} critical_errors = [ "INPUTS_DONT_EXIST", "OUTPUTS_DONT_EXIST", "INCOHERENT_POTCARS", "VASP_HASNT_STARTED", "VASP_HASNT_COMPLETED", "CHARGE_UNCONVERGED", "NETWORK_QUIESCED", "HARD_KILLED", "WALLTIME_EXCEEDED", "ATOMS_TOO_CLOSE", "DISK_SPACE_EXCEEDED", ] last_relax_dir = dir_name if not new_style: # get the last relaxation dir # the order is relax2, current dir, then relax1. This is because # after completing relax1, the job happens in the current dir. # Finally, it gets moved to relax2. # There are some weird cases where both the current dir and relax2 # contain data. The relax2 is good, but the current dir is bad. if is_valid_vasp_dir(os.path.join(dir_name, "relax2")): last_relax_dir = os.path.join(dir_name, "relax2") elif is_valid_vasp_dir(dir_name): pass elif is_valid_vasp_dir(os.path.join(dir_name, "relax1")): last_relax_dir = os.path.join(dir_name, "relax1") vasp_signals["last_relax_dir"] = last_relax_dir ## see what error signals are present print "getting signals for dir :{}".format(last_relax_dir) sl = SignalDetectorList() sl.append(VASPInputsExistSignal()) sl.append(VASPOutputsExistSignal()) sl.append(VASPOutSignal()) sl.append(HitAMemberSignal()) sl.append(SegFaultSignal()) sl.append(VASPStartedCompletedSignal()) signals = sl.detect_all(last_relax_dir) signals = signals.union(WallTimeSignal().detect(dir_name)) if not new_style: root_dir = os.path.dirname(dir_name) # one level above dir_name signals = signals.union(WallTimeSignal().detect(root_dir)) signals = signals.union(DiskSpaceExceededSignal().detect(dir_name)) if not new_style: root_dir = os.path.dirname(dir_name) # one level above dir_name signals = signals.union(DiskSpaceExceededSignal().detect(root_dir)) signals = list(signals) critical_signals = [val for val in signals if val in critical_errors] vasp_signals["signals"] = signals vasp_signals["critical_signals"] = critical_signals vasp_signals["num_signals"] = len(signals) vasp_signals["num_critical"] = len(critical_signals) if len(critical_signals) > 0 and d["state"] == "successful": d["state"] = "error" d["analysis"] = d.get("analysis", {}) d["analysis"]["errors_MP"] = vasp_signals d["run_tags"] = ["PBE"] d["run_tags"].extend(d["pseudo_potential"]["labels"]) d["run_tags"].extend([e + "=" + str(d["hubbards"].get(e, 0)) for e in d["elements"]])
from mpworks.snl_utils.snl_mongo import SNLMongoAdapter from mpworks.snl_utils.mpsnl import MPStructureNL, SNLGroup from pymatgen.symmetry.analyzer import SpacegroupAnalyzer from pymatgen.analysis.structure_matcher import StructureMatcher, ElementComparator, SpeciesComparator import plotly.plotly as py import plotly.tools as tls from plotly.graph_objs import * from mpworks.check_snl.utils import div_plus_mod, sleep from ast import literal_eval as make_tuple from itertools import chain creds = tls.get_credentials_file() stream_ids = creds['stream_ids'] min_sleep = 0.052 sma = SNLMongoAdapter.auto_load() matcher = StructureMatcher(ltol=0.2, stol=0.3, angle_tol=5, primitive_cell=True, scale=True, attempt_supercell=False, comparator=ElementComparator()) num_ids_per_stream = 20000 num_ids_per_stream_k = num_ids_per_stream / 1000 num_snls = sma.snl.count() num_snlgroups = sma.snlgroups.count() num_pairs_per_job = 1000 * num_ids_per_stream num_pairs_max = num_snlgroups * (num_snlgroups - 1) / 2
def analyze(args): """analyze data at any point for a copy of the streaming figure""" # NOTE: make copy online first with suffix _%Y-%m-%d and note figure id fig = py.get_figure(creds['username'], args.fig_id) if args.t: if args.fig_id == 42: label_entries = filter( None, '<br>'.join(fig['data'][2]['text']).split('<br>')) pairs = map(make_tuple, label_entries) grps = set(chain.from_iterable(pairs)) snlgrp_cursor = sma.snlgroups.aggregate([{ '$match': { 'snlgroup_id': { '$in': list(grps) }, 'canonical_snl.about.projects': { '$ne': 'CederDahn Challenge' } } }, { '$project': { 'snlgroup_id': 1, 'canonical_snl.snlgroup_key': 1, '_id': 0 } }], cursor={}) snlgroup_keys = {} for d in snlgrp_cursor: snlgroup_keys[ d['snlgroup_id']] = d['canonical_snl']['snlgroup_key'] print snlgroup_keys[40890] sma2 = SNLMongoAdapter.from_file( os.path.join(os.environ['DB_LOC'], 'materials_db.yaml')) materials_cursor = sma2.database.materials.aggregate([{ '$match': { 'snlgroup_id_final': { '$in': list(grps) }, 'snl_final.about.projects': { '$ne': 'CederDahn Challenge' } } }, { '$project': { 'snlgroup_id_final': 1, '_id': 0, 'task_id': 1, 'final_energy_per_atom': 1, 'band_gap.search_gap.band_gap': 1, 'volume': 1, 'nsites': 1 } }], cursor={}) snlgroup_data = {} for material in materials_cursor: snlgroup_id = material['snlgroup_id_final'] final_energy_per_atom = material['final_energy_per_atom'] band_gap = material['band_gap']['search_gap']['band_gap'] volume_per_atom = material['volume'] / material['nsites'] snlgroup_data[snlgroup_id] = { 'final_energy_per_atom': final_energy_per_atom, 'band_gap': band_gap, 'task_id': material['task_id'], 'volume_per_atom': volume_per_atom } print snlgroup_data[40890] filestem = 'mpworks/check_snl/results/bad_snlgroups_2_' with open(filestem+'in_matdb.csv', 'wb') as f, \ open(filestem+'notin_matdb.csv', 'wb') as g: writer1, writer2 = csv.writer(f), csv.writer(g) header = [ 'category', 'composition', 'snlgroup_id 1', 'sg_num 1', 'task_id 1', 'snlgroup_id 2', 'sg_num 2', 'task_id 2', 'delta_energy', 'delta_bandgap', 'delta_volume_per_atom', 'rms_dist', 'scenario' ] writer1.writerow(header) writer2.writerow(header) for primary_id, secondary_id in pairs: if primary_id not in snlgroup_keys or \ secondary_id not in snlgroup_keys: continue composition, primary_sg_num = snlgroup_keys[ primary_id].split('--') secondary_sg_num = snlgroup_keys[secondary_id].split( '--')[1] category = 'same SGs' if primary_sg_num == secondary_sg_num else 'diff. SGs' if primary_id not in snlgroup_data or secondary_id not in snlgroup_data: delta_energy, delta_bandgap, delta_volume_per_atom = '', '', '' else: delta_energy = "{0:.3g}".format(abs( snlgroup_data[primary_id]['final_energy_per_atom'] - \ snlgroup_data[secondary_id]['final_energy_per_atom'] )) delta_bandgap = "{0:.3g}".format(abs( snlgroup_data[primary_id]['band_gap'] - \ snlgroup_data[secondary_id]['band_gap'] )) delta_volume_per_atom = "{0:.3g}".format(abs( snlgroup_data[primary_id]['volume_per_atom'] - \ snlgroup_data[secondary_id]['volume_per_atom'] )) scenario, rms_dist_str = '', '' if category == 'diff. SGs' and delta_energy and delta_bandgap: scenario = 'different' if ( float(delta_energy) > 0.01 or float(delta_bandgap) > 0.1) else 'similar' snlgrp1_dict = sma.snlgroups.find_one( {"snlgroup_id": primary_id}) snlgrp2_dict = sma.snlgroups.find_one( {"snlgroup_id": secondary_id}) snlgrp1 = SNLGroup.from_dict(snlgrp1_dict) snlgrp2 = SNLGroup.from_dict(snlgrp2_dict) primary_structure = snlgrp1.canonical_structure secondary_structure = snlgrp2.canonical_structure rms_dist = matcher.get_rms_dist( primary_structure, secondary_structure) if rms_dist is not None: rms_dist_str = "({0:.3g},{1:.3g})".format( *rms_dist) print rms_dist_str row = [ category, composition, primary_id, primary_sg_num, snlgroup_data[primary_id]['task_id'] \ if primary_id in snlgroup_data else '', secondary_id, secondary_sg_num, snlgroup_data[secondary_id]['task_id'] \ if secondary_id in snlgroup_data else '', delta_energy, delta_bandgap, delta_volume_per_atom, rms_dist_str, scenario ] if delta_energy and delta_bandgap: writer1.writerow(row) else: writer2.writerow(row) elif args.fig_id == 16: out_fig = Figure() badsnls_trace = Scatter(x=[], y=[], text=[], mode='markers', name='SG Changes') bisectrix = Scatter(x=[0, 230], y=[0, 230], mode='lines', name='bisectrix') print 'pulling bad snls from plotly ...' bad_snls = OrderedDict() for category, text in zip(fig['data'][2]['y'], fig['data'][2]['text']): for snl_id in map(int, text.split('<br>')): bad_snls[snl_id] = category with open('mpworks/check_snl/results/bad_snls.csv', 'wb') as f: print 'pulling bad snls from database ...' mpsnl_cursor = sma.snl.find({ 'snl_id': { '$in': bad_snls.keys() }, 'about.projects': { '$ne': 'CederDahn Challenge' } }) writer = csv.writer(f) writer.writerow([ 'snl_id', 'category', 'snlgroup_key', 'nsites', 'remarks', 'projects', 'authors' ]) print 'writing bad snls to file ...' for mpsnl_dict in mpsnl_cursor: mpsnl = MPStructureNL.from_dict(mpsnl_dict) row = [ mpsnl.snl_id, bad_snls[mpsnl.snl_id], mpsnl.snlgroup_key ] row += _get_snl_extra_info(mpsnl) writer.writerow(row) sg_num = mpsnl.snlgroup_key.split('--')[1] if (bad_snls[mpsnl.snl_id] == 'SG default' and sg_num != '-1') or \ bad_snls[mpsnl.snl_id] == 'SG change': mpsnl.structure.remove_oxidation_states() sf = SpacegroupAnalyzer(mpsnl.structure, symprec=0.1) badsnls_trace['x'].append(mpsnl.sg_num) badsnls_trace['y'].append(sf.get_spacegroup_number()) badsnls_trace['text'].append(mpsnl.snl_id) if bad_snls[mpsnl.snl_id] == 'SG default': print sg_num, sf.get_spacegroup_number() print 'plotting out-fig ...' out_fig['data'] = Data([bisectrix, badsnls_trace]) out_fig['layout'] = Layout( showlegend=False, hovermode='closest', title='Spacegroup Assignment Changes', xaxis=XAxis(showgrid=False, title='old SG number', range=[0, 230]), yaxis=YAxis(showgrid=False, title='new SG number', range=[0, 230]), ) filename = 'spacegroup_changes_' filename += datetime.datetime.now().strftime('%Y-%m-%d') py.plot(out_fig, filename=filename, auto_open=False) elif args.fig_id == 43: # SNLGroupMemberChecker matcher2 = StructureMatcher(ltol=0.2, stol=0.3, angle_tol=5, primitive_cell=False, scale=True, attempt_supercell=True, comparator=ElementComparator()) print 'pulling data from plotly ...' trace = Scatter(x=[], y=[], text=[], mode='markers', name='mismatches') bad_snls = OrderedDict() # snlgroup_id : [ mismatching snl_ids ] for category, text in zip(fig['data'][2]['y'], fig['data'][2]['text']): if category != 'mismatch': continue for entry in text.split('<br>'): fields = entry.split(':') snlgroup_id = int(fields[0].split(',')[0]) print snlgroup_id snlgrp_dict = sma.snlgroups.find_one( {'snlgroup_id': snlgroup_id}) snlgrp = SNLGroup.from_dict(snlgrp_dict) s1 = snlgrp.canonical_structure.get_primitive_structure() bad_snls[snlgroup_id] = [] for i, snl_id in enumerate(fields[1].split(',')): mpsnl_dict = sma.snl.find_one({'snl_id': int(snl_id)}) if 'CederDahn Challenge' in mpsnl_dict['about'][ 'projects']: print 'skip CederDahn: %s' % snl_id continue mpsnl = MPStructureNL.from_dict(mpsnl_dict) s2 = mpsnl.structure.get_primitive_structure() is_match = matcher2.fit(s1, s2) if is_match: continue bad_snls[snlgroup_id].append(snl_id) trace['x'].append(snlgroup_id) trace['y'].append(i + 1) trace['text'].append(snl_id) if len(bad_snls[snlgroup_id]) < 1: bad_snls.pop(snlgroup_id, None) with open('mpworks/check_snl/results/bad_snlgroups.csv', 'wb') as f: print 'pulling bad snlgroups from database ...' snlgroup_cursor = sma.snlgroups.find({ 'snlgroup_id': { '$in': bad_snls.keys() }, }) writer = csv.writer(f) writer.writerow( ['snlgroup_id', 'snlgroup_key', 'mismatching snl_ids']) print 'writing bad snlgroups to file ...' for snlgroup_dict in snlgroup_cursor: snlgroup = SNLGroup.from_dict(snlgroup_dict) row = [ snlgroup.snlgroup_id, snlgroup.canonical_snl.snlgroup_key, ' '.join(bad_snls[snlgroup.snlgroup_id]) ] writer.writerow(row) print 'plotting out-fig ...' out_fig = Figure() out_fig['data'] = Data([trace]) out_fig['layout'] = Layout( showlegend=False, hovermode='closest', title='Member Mismatches of SNLGroup Canonicals', xaxis=XAxis(showgrid=False, title='snlgroup_id', showexponent='none'), yaxis=YAxis(showgrid=False, title='# mismatching SNLs'), ) filename = 'groupmember_mismatches_' filename += datetime.datetime.now().strftime('%Y-%m-%d') py.plot(out_fig, filename=filename, auto_open=False) else: errors = Counter() bad_snls = OrderedDict() bad_snlgroups = OrderedDict() for i, d in enumerate(fig['data']): if not isinstance(d, Scatter): continue if not 'x' in d or not 'y' in d or not 'text' in d: continue start_id = int(d['name'].split(' - ')[0][:-1]) * 1000 marker_colors = d['marker']['color'] if i < 2 * num_snl_streams: # spacegroups errors += Counter(marker_colors) for idx, color in enumerate(marker_colors): snl_id = start_id + d['x'][idx] color_index = category_colors.index(color) category = categories[color_index] bad_snls[snl_id] = category else: # groupmembers for idx, color in enumerate(marker_colors): if color != category_colors[0]: continue snlgroup_id = start_id + d['x'][idx] mismatch_snl_id, canonical_snl_id = d['text'][idx].split( ' != ') bad_snlgroups[snlgroup_id] = int(mismatch_snl_id) print errors fig_data = fig['data'][-1] fig_data['x'] = [ errors[color] for color in fig_data['marker']['color'] ] filename = _get_filename() print filename #py.plot(fig, filename=filename) with open('mpworks/check_snl/results/bad_snls.csv', 'wb') as f: mpsnl_cursor = sma.snl.find({'snl_id': {'$in': bad_snls.keys()}}) writer = csv.writer(f) writer.writerow([ 'snl_id', 'category', 'snlgroup_key', 'nsites', 'remarks', 'projects', 'authors' ]) for mpsnl_dict in mpsnl_cursor: mpsnl = MPStructureNL.from_dict(mpsnl_dict) row = [ mpsnl.snl_id, bad_snls[mpsnl.snl_id], mpsnl.snlgroup_key ] row += _get_snl_extra_info(mpsnl) writer.writerow(row) with open('mpworks/check_snl/results/bad_snlgroups.csv', 'wb') as f: snlgrp_cursor = sma.snlgroups.find( {'snlgroup_id': { '$in': bad_snlgroups.keys() }}) first_mismatch_snls_cursor = sma.snl.find( {'snl_id': { '$in': bad_snlgroups.values() }}) first_mismatch_snl_info = OrderedDict() for mpsnl_dict in first_mismatch_snls_cursor: mpsnl = MPStructureNL.from_dict(mpsnl_dict) first_mismatch_snl_info[mpsnl.snl_id] = _get_snl_extra_info( mpsnl) writer = csv.writer(f) writer.writerow([ 'snlgroup_id', 'snlgroup_key', 'canonical_snl_id', 'first_mismatching_snl_id', 'nsites', 'remarks', 'projects', 'authors' ]) for snlgrp_dict in snlgrp_cursor: snlgrp = SNLGroup.from_dict(snlgrp_dict) first_mismatch_snl_id = bad_snlgroups[snlgrp.snlgroup_id] row = [ snlgrp.snlgroup_id, snlgrp.canonical_snl.snlgroup_key, snlgrp.canonical_snl.snl_id, first_mismatch_snl_id ] row += [ ' & '.join(pair) if pair[0] != pair[1] else pair[0] for pair in zip( _get_snl_extra_info(snlgrp.canonical_snl), first_mismatch_snl_info[int(first_mismatch_snl_id)]) ] writer.writerow(row)
import csv from mpworks.snl_utils.snl_mongo import SNLMongoAdapter sma = SNLMongoAdapter.auto_load() with open('mpworks/check_snl/results/zero_occu_sites.csv', 'wb') as f: writer = csv.writer(f) writer.writerow([ 'snl_id', 'num_zero_occu_sites', 'icsd_id', 'is_valid', 'formula' ]) for doc in sma.snl.aggregate([ #{ '$match': { 'about._icsd.icsd_id': { '$exists': True } } }, { '$unwind': '$sites' }, { '$unwind': '$sites.species' }, { '$project': { 'snl_id': 1, 'sites.species.occu': 1, '_id': 0, 'about._icsd.icsd_id': 1, 'is_valid': 1, 'reduced_cell_formula_abc': 1 } }, { '$match': { 'sites.species.occu': 0.0 } }, { '$group': { '_id': '$snl_id', 'num_zero_occu_sites': { '$sum': 1 }, 'icsd_ids': { '$addToSet': '$about._icsd.icsd_id' }, 'is_valid': { '$addToSet': '$is_valid' }, 'formula': { '$addToSet': '$reduced_cell_formula_abc' } } }, ], cursor={}): icsd_id = doc['icsd_ids'][0] if len(doc['icsd_ids']) > 0 else '' row = [ doc['_id'], doc['num_zero_occu_sites'], icsd_id, doc['is_valid'][0], doc['formula'][0] ]
def process_fw(self, dir_name, d): d["task_id_deprecated"] = int( d["task_id"].split('-')[-1]) # useful for WC and AJ # update the run fields to give species group in root, if exists for r in d['run_tags']: if "species_group=" in r: d["species_group"] = int(r.split("=")[-1]) break # custom Materials Project post-processing for FireWorks with zopen(zpath(os.path.join(dir_name, 'FW.json'))) as f: fw_dict = json.load(f) d['fw_id'] = fw_dict['fw_id'] d['snl'] = fw_dict['spec']['mpsnl'] d['snlgroup_id'] = fw_dict['spec']['snlgroup_id'] d['vaspinputset_name'] = fw_dict['spec'].get('vaspinputset_name') d['task_type'] = fw_dict['spec']['task_type'] # Process data for deformed structures if 'deformed' in d['task_type']: d['deformation_matrix'] = fw_dict['spec']['deformation_matrix'] d['original_task_id'] = fw_dict['spec']['original_task_id'] if not self.update_duplicates: if 'optimize structure' in d['task_type'] and 'output' in d: # create a new SNL based on optimized structure new_s = Structure.from_dict(d['output']['crystal']) old_snl = StructureNL.from_dict(d['snl']) history = old_snl.history history.append({ 'name': 'Materials Project structure optimization', 'url': 'http://www.materialsproject.org', 'description': { 'task_type': d['task_type'], 'fw_id': d['fw_id'], 'task_id': d['task_id'] } }) new_snl = StructureNL(new_s, old_snl.authors, old_snl.projects, old_snl.references, old_snl.remarks, old_snl.data, history) # enter new SNL into SNL db # get the SNL mongo adapter sma = SNLMongoAdapter.auto_load() # add snl mpsnl, snlgroup_id, spec_group = sma.add_snl( new_snl, snlgroup_guess=d['snlgroup_id']) d['snl_final'] = mpsnl.as_dict() d['snlgroup_id_final'] = snlgroup_id d['snlgroup_changed'] = (d['snlgroup_id'] != d['snlgroup_id_final']) else: d['snl_final'] = d['snl'] d['snlgroup_id_final'] = d['snlgroup_id'] d['snlgroup_changed'] = False # custom processing for detecting errors new_style = os.path.exists(zpath(os.path.join(dir_name, 'FW.json'))) vasp_signals = {} critical_errors = [ "INPUTS_DONT_EXIST", "OUTPUTS_DONT_EXIST", "INCOHERENT_POTCARS", "VASP_HASNT_STARTED", "VASP_HASNT_COMPLETED", "CHARGE_UNCONVERGED", "NETWORK_QUIESCED", "HARD_KILLED", "WALLTIME_EXCEEDED", "ATOMS_TOO_CLOSE", "DISK_SPACE_EXCEEDED", "NO_RELAX2", "POSITIVE_ENERGY" ] last_relax_dir = dir_name if not new_style: # get the last relaxation dir # the order is relax2, current dir, then relax1. This is because # after completing relax1, the job happens in the current dir. # Finally, it gets moved to relax2. # There are some weird cases where both the current dir and relax2 # contain data. The relax2 is good, but the current dir is bad. if is_valid_vasp_dir(os.path.join(dir_name, "relax2")): last_relax_dir = os.path.join(dir_name, "relax2") elif is_valid_vasp_dir(dir_name): pass elif is_valid_vasp_dir(os.path.join(dir_name, "relax1")): last_relax_dir = os.path.join(dir_name, "relax1") vasp_signals['last_relax_dir'] = last_relax_dir ## see what error signals are present print "getting signals for dir :{}".format(last_relax_dir) sl = SignalDetectorList() sl.append(VASPInputsExistSignal()) sl.append(VASPOutputsExistSignal()) sl.append(VASPOutSignal()) sl.append(HitAMemberSignal()) sl.append(SegFaultSignal()) sl.append(VASPStartedCompletedSignal()) if d['state'] == 'successful' and 'optimize structure' in d[ 'task_type']: sl.append(Relax2ExistsSignal()) signals = sl.detect_all(last_relax_dir) signals = signals.union(WallTimeSignal().detect(dir_name)) if not new_style: root_dir = os.path.dirname(dir_name) # one level above dir_name signals = signals.union(WallTimeSignal().detect(root_dir)) signals = signals.union(DiskSpaceExceededSignal().detect(dir_name)) if not new_style: root_dir = os.path.dirname(dir_name) # one level above dir_name signals = signals.union(DiskSpaceExceededSignal().detect(root_dir)) if d.get('output', {}).get('final_energy', None) > 0: signals.add('POSITIVE_ENERGY') signals = list(signals) critical_signals = [val for val in signals if val in critical_errors] vasp_signals['signals'] = signals vasp_signals['critical_signals'] = critical_signals vasp_signals['num_signals'] = len(signals) vasp_signals['num_critical'] = len(critical_signals) if len(critical_signals) > 0 and d['state'] == "successful": d["state"] = "error" d['analysis'] = d.get('analysis', {}) d['analysis']['errors_MP'] = vasp_signals
def process_fw(self, dir_name, d): d["task_id_deprecated"] = int(d["task_id"].split('-')[-1]) # useful for WC and AJ # update the run fields to give species group in root, if exists for r in d['run_tags']: if "species_group=" in r: d["species_group"] = int(r.split("=")[-1]) break # custom Materials Project post-processing for FireWorks with zopen(zpath(os.path.join(dir_name, 'FW.json'))) as f: fw_dict = json.load(f) d['fw_id'] = fw_dict['fw_id'] d['snl'] = fw_dict['spec']['mpsnl'] d['snlgroup_id'] = fw_dict['spec']['snlgroup_id'] d['vaspinputset_name'] = fw_dict['spec'].get('vaspinputset_name') d['task_type'] = fw_dict['spec']['task_type'] # Process data for deformed structures if 'deformed' in d['task_type']: d['deformation_matrix'] = fw_dict['spec']['deformation_matrix'] d['original_task_id'] = fw_dict['spec']['original_task_id'] if not self.update_duplicates: if 'optimize structure' in d['task_type'] and 'output' in d: # create a new SNL based on optimized structure new_s = Structure.from_dict(d['output']['crystal']) old_snl = StructureNL.from_dict(d['snl']) history = old_snl.history history.append( {'name': 'Materials Project structure optimization', 'url': 'http://www.materialsproject.org', 'description': {'task_type': d['task_type'], 'fw_id': d['fw_id'], 'task_id': d['task_id']}}) new_snl = StructureNL(new_s, old_snl.authors, old_snl.projects, old_snl.references, old_snl.remarks, old_snl.data, history) # enter new SNL into SNL db # get the SNL mongo adapter sma = SNLMongoAdapter.auto_load() # add snl mpsnl, snlgroup_id, spec_group = sma.add_snl(new_snl, snlgroup_guess=d['snlgroup_id']) d['snl_final'] = mpsnl.as_dict() d['snlgroup_id_final'] = snlgroup_id d['snlgroup_changed'] = (d['snlgroup_id'] != d['snlgroup_id_final']) else: d['snl_final'] = d['snl'] d['snlgroup_id_final'] = d['snlgroup_id'] d['snlgroup_changed'] = False # custom processing for detecting errors new_style = os.path.exists(zpath(os.path.join(dir_name, 'FW.json'))) vasp_signals = {} critical_errors = ["INPUTS_DONT_EXIST", "OUTPUTS_DONT_EXIST", "INCOHERENT_POTCARS", "VASP_HASNT_STARTED", "VASP_HASNT_COMPLETED", "CHARGE_UNCONVERGED", "NETWORK_QUIESCED", "HARD_KILLED", "WALLTIME_EXCEEDED", "ATOMS_TOO_CLOSE", "DISK_SPACE_EXCEEDED", "NO_RELAX2", "POSITIVE_ENERGY"] last_relax_dir = dir_name if not new_style: # get the last relaxation dir # the order is relax2, current dir, then relax1. This is because # after completing relax1, the job happens in the current dir. # Finally, it gets moved to relax2. # There are some weird cases where both the current dir and relax2 # contain data. The relax2 is good, but the current dir is bad. if is_valid_vasp_dir(os.path.join(dir_name, "relax2")): last_relax_dir = os.path.join(dir_name, "relax2") elif is_valid_vasp_dir(dir_name): pass elif is_valid_vasp_dir(os.path.join(dir_name, "relax1")): last_relax_dir = os.path.join(dir_name, "relax1") vasp_signals['last_relax_dir'] = last_relax_dir ## see what error signals are present print "getting signals for dir :{}".format(last_relax_dir) sl = SignalDetectorList() sl.append(VASPInputsExistSignal()) sl.append(VASPOutputsExistSignal()) sl.append(VASPOutSignal()) sl.append(HitAMemberSignal()) sl.append(SegFaultSignal()) sl.append(VASPStartedCompletedSignal()) if d['state'] == 'successful' and 'optimize structure' in d['task_type']: sl.append(Relax2ExistsSignal()) signals = sl.detect_all(last_relax_dir) signals = signals.union(WallTimeSignal().detect(dir_name)) if not new_style: root_dir = os.path.dirname(dir_name) # one level above dir_name signals = signals.union(WallTimeSignal().detect(root_dir)) signals = signals.union(DiskSpaceExceededSignal().detect(dir_name)) if not new_style: root_dir = os.path.dirname(dir_name) # one level above dir_name signals = signals.union(DiskSpaceExceededSignal().detect(root_dir)) if d.get('output',{}).get('final_energy', None) > 0: signals.add('POSITIVE_ENERGY') signals = list(signals) critical_signals = [val for val in signals if val in critical_errors] vasp_signals['signals'] = signals vasp_signals['critical_signals'] = critical_signals vasp_signals['num_signals'] = len(signals) vasp_signals['num_critical'] = len(critical_signals) if len(critical_signals) > 0 and d['state'] == "successful": d["state"] = "error" d['analysis'] = d.get('analysis', {}) d['analysis']['errors_MP'] = vasp_signals
import time, yaml, sys from fireworks.core.launchpad import LaunchPad from fireworks.core.firework import Firework, Workflow from mpworks.firetasks.controller_tasks import AddEStructureTask from fireworks.utilities.fw_utilities import get_slug from mpworks.snl_utils.snl_mongo import SNLMongoAdapter from pymongo import MongoClient from collections import Counter from datetime import datetime from fnmatch import fnmatch # DONE manually: "mp-987" -> fw_id: 119629 lpdb = LaunchPad.from_file('/global/homes/m/matcomp/mp_prod/config/config_Mendel/my_launchpad.yaml') spec = {'task_type': 'Controller: add Electronic Structure v2', '_priority': 100000} sma = SNLMongoAdapter.from_file('/global/homes/m/matcomp/mp_prod/config/dbs/snl_db.yaml') with open('/global/homes/m/matcomp/mp_prod/materials_db_prod.yaml') as f: creds = yaml.load(f) client = MongoClient(creds['host'], creds['port']) db = client[creds['db']] db.authenticate(creds['username'], creds['password']) materials = db['materials'] tasks = db['tasks'] def append_wf(fw_id, parent_fw_id=None): wf = lpdb.workflows.find_one({'nodes':fw_id}, {'parent_links':1,'links':1,'name':1}) try: if parent_fw_id is None: parent_fw_id = wf['parent_links'][str(fw_id)][-1] # non-defused AddEStructureTask v2 already in children? for child_fw_id in wf['links'][str(parent_fw_id)]:
def fix(): # initialize databases module_dir = os.path.dirname(os.path.abspath(__file__)) snl_f = os.path.join(module_dir, 'snl.yaml') snldb = SNLMongoAdapter.from_file(snl_f) snl = snldb.snl snlgroups = snldb.snlgroups tasks_f = os.path.join(module_dir, 'tasks.yaml') with open(tasks_f) as f2: task_creds = yaml.load(f2) mc = MongoClient(task_creds['host'], task_creds['port']) db = mc[task_creds['database']] db.authenticate(task_creds['admin_user'], task_creds['admin_password']) tasks = db['tasks'] tasks_f = os.path.join(module_dir, 'tasks.yaml') with open(tasks_f) as f2: task_creds = yaml.load(f2) mc = MongoClient(task_creds['host'], task_creds['port']) db = mc[task_creds['database']] db.authenticate(task_creds['admin_user'], task_creds['admin_password']) tasks = db['tasks'] lp_f = os.path.join(module_dir, 'my_launchpad.yaml') lpdb = LaunchPad.from_file(lp_f) fws = lpdb.fireworks launches = lpdb.launches sb_f = os.path.join(module_dir, 'submission.yaml') sbdb = SubmissionMongoAdapter.from_file(sb_f) submissions = sbdb.jobs bad_crystal_ids = [] crystals_file = os.path.join(module_dir, 'bad_crystals.txt') with open(crystals_file) as f: for line in f: bad_crystal_ids.append(int(line.strip())) for c_id in bad_crystal_ids: if c_id == 100892 or c_id == 100202: print 'SKIP' else: # FIX SNL for s in snl.find({'about._materialsproject.deprecated.crystal_id_deprecated': c_id}, {'snl_id': 1}): snl.update({'snl_id': s['snl_id']}, {'$pushAll': {"about.remarks": ['DEPRECATED', 'SEVERE BUG IN ICSD CONVERSION']}}) # FIX SNLGROUPS for s in snlgroups.find({'canonical_snl.about._materialsproject.deprecated.crystal_id_deprecated': c_id}, {'snlgroup_id': 1}): snlgroups.update({'snlgroup_id': s['snlgroup_id']}, {'$pushAll': {"canonical_snl.about.remarks": ['DEPRECATED', 'SEVERE BUG IN ICSD CONVERSION']}}) # FIX FWs pt 1 for s in fws.find({'spec.mpsnl.about._materialsproject.deprecated.crystal_id_deprecated': c_id}, {'fw_id': 1}): fws.update({'fw_id': s['fw_id']}, {'$pushAll': {"spec.mpsnl.about.remarks": ['DEPRECATED', 'SEVERE BUG IN ICSD CONVERSION']}}) # FIX FWs pt 2 for s in fws.find({'spec.force_mpsnl.about._materialsproject.deprecated.crystal_id_deprecated': c_id}, {'fw_id': 1}): fws.update({'fw_id': s['fw_id']}, {'$pushAll': {"spec.force_mpsnl.about.remarks": ['DEPRECATED', 'SEVERE BUG IN ICSD CONVERSION']}}) # FIX Launches for s in launches.find({'action.update_spec.mpsnl.about._materialsproject.deprecated.crystal_id_deprecated': c_id}, {'launch_id': 1}): launches.update({'launch_id': s['launch_id']}, {'$pushAll': {"action.update_spec.mpsnl.about.remarks": ['DEPRECATED', 'SEVERE BUG IN ICSD CONVERSION']}}) # FIX TASKS for s in tasks.find({'snl.about._materialsproject.deprecated.crystal_id_deprecated': c_id}, {'task_id': 1}): tasks.update({'task_id': s['task_id']}, {'$pushAll': {"snl.about.remarks": ['DEPRECATED', 'SEVERE BUG IN ICSD CONVERSION']}}) tasks.update({'task_id': s['task_id']}, {'$pushAll': {"snl_final.about.remarks": ['DEPRECATED', 'SEVERE BUG IN ICSD CONVERSION']}}) # FIX SUBMISSIONS for s in submissions.find({'about._materialsproject.deprecated.crystal_id_deprecated': c_id}, {'submission_id': 1}): submissions.update({'submission_id': s['submission_id']}, {'$pushAll': {"about.remarks": ['DEPRECATED', 'SEVERE BUG IN ICSD CONVERSION']}}) print 'FIXED', c_id
import os import plotly.plotly as py from pandas import DataFrame from mpworks.snl_utils.snl_mongo import SNLMongoAdapter sma = SNLMongoAdapter.auto_load() sma2 = SNLMongoAdapter.from_file( os.path.join(os.environ['DB_LOC'], 'materials_db.yaml') ) def _get_snlgroup_id(snl_id): return sma.snlgroups.find_one( {'all_snl_ids': int(snl_id)}, {'snlgroup_id': 1, '_id': 0} )['snlgroup_id'] def _get_mp_id(snlgroup_id): mat = sma2.database.materials.find_one( {'snlgroup_id_final': snlgroup_id}, {'_id': 0, 'task_id': 1} ) if mat is not None: return mat['task_id'] return 'not found' def _get_mp_link(mp_id): if mp_id == 'not found': return mp_id url = 'link:$$https://materialsproject.org/materials/' url += mp_id url += '$$[%s]' % mp_id