示例#1
0
def get_deprecated_snl(snl_id, colls):
    snl_old = colls.snl.find_one({'snl_id': snl_id})
    del snl_old['about']['_icsd']
    snl_old['about']['remarks'].append(
        'Record updated (about._icsd deleted) {}'.format(
            datetime.datetime.now().strftime('%Y-%m-%d')))
    return MPStructureNL.from_dict(snl_old)
示例#2
0
def submit_all_snl(min=None, max=None):
    constraints = {
        'is_ordered': True,
        'is_valid': True,
        'nsites': {
            '$lte': 200
        },
        'canonical_snl.about.projects': {
            '$ne': 'CederDahn Challenge'
        }
    }
    constraints['elements'] = {'$nin': NO_POTCARS}
    constraints['canonical_snl.about.history.name'] = {
        "$ne": "Materials Project structure optimization"
    }
    constraints['canonical_snl.about.remarks'] = {"$ne": "DEPRECATED"}

    if min and max:
        constraints['snlgroup_id'] = {'$gte': min, '$lte': max}
    elif min or max:
        raise ValueError('Must specify both min AND max if you specify one')

    snldb = SNLMongoAdapter.auto_load()
    sma = SubmissionMongoAdapter.auto_load()

    for result in snldb.snlgroups.find(constraints, {
            'canonical_snl': 1,
            'snlgroup_id': 1
    }):
        snl = MPStructureNL.from_dict(result['canonical_snl'])
        parameters = {'snlgroup_id': result['snlgroup_id']}
        sma.submit_snl(snl,
                       'Anubhav Jain <*****@*****.**>',
                       parameters=parameters)
示例#3
0
    def add_snl(self, snl, force_new=False, snlgroup_guess=None):
        try:
            self.lock_db()
            snl_id = self._get_next_snl_id()

            spstruc = snl.structure.copy()
            spstruc.remove_oxidation_states()
            sf = SymmetryFinder(spstruc, SPACEGROUP_TOLERANCE)
            sf.get_spacegroup()
            sgnum = sf.get_spacegroup_number() if sf.get_spacegroup_number() \
                else -1
            sgsym = sf.get_spacegroup_symbol() if sf.get_spacegroup_symbol() \
                else 'unknown'
            sghall = sf.get_hall() if sf.get_hall() else 'unknown'
            sgxtal = sf.get_crystal_system() if sf.get_crystal_system() \
                else 'unknown'
            sglatt = sf.get_lattice_type() if sf.get_lattice_type() else 'unknown'
            sgpoint = unicode(sf.get_point_group(), errors="ignore")

            mpsnl = MPStructureNL.from_snl(snl, snl_id, sgnum, sgsym, sghall,
                                           sgxtal, sglatt, sgpoint)
            snlgroup, add_new, spec_group = self.add_mpsnl(mpsnl, force_new, snlgroup_guess)
            self.release_lock()
            return mpsnl, snlgroup.snlgroup_id, spec_group
        except:
            self.release_lock()
            traceback.print_exc()
            raise ValueError("Error while adding SNL!")
示例#4
0
文件: core.py 项目: xhqu1981/MPWorks
 def process_item(self, item, index):
     nrow, ncol, snlgroups = super(SNLGroupMemberChecker, self).process_item(item, index)
     for snlgroup_id in item['snlgroup_ids']:
         local_mismatch_dict = dict((k,[]) for k in categories[self.checker_name])
         snlgrp = snlgroups[snlgroup_id]
         mismatch_snls = []
         entry = '%d,%d:' % (snlgrp.snlgroup_id, snlgrp.canonical_snl.snl_id)
         for idx,snl_id in enumerate(snlgrp.all_snl_ids):
             if snl_id == snlgrp.canonical_snl.snl_id: continue
             try:
                 mpsnl_dict = self._snls.collection.find_one({'snl_id': snl_id})
                 mpsnl = MPStructureNL.from_dict(mpsnl_dict)
             except:
                 exc_type, exc_value, exc_traceback = sys.exc_info()
                 _log.info('%r %r', exc_type, exc_value)
                 local_mismatch_dict[categories[self.checker_name][-1]].append('%s%d' % (entry, snl_id))
                 continue
             if self._matcher.fit(mpsnl.structure, snlgrp.canonical_structure): continue
             mismatch_snls.append(str(snl_id))
             _log.info('%s %d', entry, snl_id)
         if len(mismatch_snls) > 0:
             full_entry = '%s%s' % (entry, ','.join(mismatch_snls))
             local_mismatch_dict[categories[self.checker_name][0]].append(full_entry)
             _log.info('(%d) %r', self._counter_total.value, local_mismatch_dict)
         self._increase_counter(nrow, ncol, local_mismatch_dict)
示例#5
0
    def add_snl(self, snl, force_new=False, snlgroup_guess=None):
        try:
            self.lock_db()
            snl_id = self._get_next_snl_id()

            spstruc = snl.structure.copy()
            spstruc.remove_oxidation_states()
            sf = SpacegroupAnalyzer(spstruc, SPACEGROUP_TOLERANCE)
            sf.get_spacegroup()
            sgnum = sf.get_spacegroup_number() if sf.get_spacegroup_number() \
                else -1
            sgsym = sf.get_spacegroup_symbol() if sf.get_spacegroup_symbol() \
                else 'unknown'
            sghall = sf.get_hall() if sf.get_hall() else 'unknown'
            sgxtal = sf.get_crystal_system() if sf.get_crystal_system() \
                else 'unknown'
            sglatt = sf.get_lattice_type() if sf.get_lattice_type(
            ) else 'unknown'
            sgpoint = sf.get_point_group()

            mpsnl = MPStructureNL.from_snl(snl, snl_id, sgnum, sgsym, sghall,
                                           sgxtal, sglatt, sgpoint)
            snlgroup, add_new, spec_group = self.add_mpsnl(
                mpsnl, force_new, snlgroup_guess)
            self.release_lock()
            return mpsnl, snlgroup.snlgroup_id, spec_group
        except:
            self.release_lock()
            traceback.print_exc()
            raise ValueError("Error while adding SNL!")
示例#6
0
 def add_snl(self, snl):
     snl_id = self._get_next_snl_id()
     sf = SymmetryFinder(snl.structure, SPACEGROUP_TOLERANCE)
     sf.get_spacegroup()
     mpsnl = MPStructureNL.from_snl(snl, snl_id, sf.get_spacegroup_number(),
                                    sf.get_spacegroup_symbol(), sf.get_hall(),
                                    sf.get_crystal_system(), sf.get_lattice_type())
     snlgroup, add_new = self.add_mpsnl(mpsnl)
     return mpsnl, snlgroup.snlgroup_id
示例#7
0
文件: core.py 项目: xhqu1981/MPWorks
 def process_item(self, item, index):
     nrow, ncol, snlgroups = super(SNLGroupIcsdChecker, self).process_item(item, index)
     for idx,primary_id in enumerate(item['snlgroup_ids'][:-1]):
         cat_key = ''
         local_mismatch_dict = dict((k,[]) for k in categories[self.checker_name])
         primary_group = snlgroups[primary_id]
         primary_mpsnl_dicts = self._snls.collection.find(
             *self.get_snl_query(primary_group.all_snl_ids))
         for secondary_id in item['snlgroup_ids'][idx+1:]:
             secondary_group = snlgroups[secondary_id]
             secondary_mpsnl_dicts = self._snls.collection.find(
                 *self.get_snl_query(secondary_group.all_snl_ids))
             for primary_mpsnl_dict in primary_mpsnl_dicts:
                 primary_icsd_id = primary_mpsnl_dict['about']['_icsd']['icsd_id']
                 for secondary_mpsnl_dict in secondary_mpsnl_dicts:
                     secondary_icsd_id = secondary_mpsnl_dict['about']['_icsd']['icsd_id']
                     if primary_icsd_id != secondary_icsd_id: continue
                     cat_key = 'same ICSDs'
                     primary_structure = MPStructureNL.from_dict(primary_mpsnl_dict).structure
                     secondary_structure = MPStructureNL.from_dict(secondary_mpsnl_dict).structure
                     match = self._matcher.fit(primary_structure, secondary_structure)
                     if match:
                         primary_match = self._matcher.fit(
                             primary_structure, primary_group.canonical_structure)
                         secondary_match = self._matcher.fit(
                             secondary_structure, secondary_group.canonical_structure)
                         canonical_match = self._matcher.fit(
                             primary_group.canonical_structure,
                             secondary_group.canonical_structure)
                     local_mismatch_dict[cat_key].append(
                         '({}, {}): ({}, {}) -> {} ({}{})'.format(
                             primary_id, secondary_id,
                             primary_mpsnl_dict['snl_id'],
                             secondary_mpsnl_dict['snl_id'],
                             primary_icsd_id, match,
                             '/{}/{}/{}'.format(
                                 primary_match, secondary_match, canonical_match
                             ) if match else ''
                         )
                     )
         if cat_key:
           _log.info('(%d) %r', self._counter_total.value, local_mismatch_dict)
         self._increase_counter(nrow, ncol, local_mismatch_dict)
示例#8
0
    def submit_new_workflow(self):
        # finds a submitted job, creates a workflow, and submits it to FireWorks
        job = self.jobs.find_and_modify({'state': 'SUBMITTED'},
                                        {'$set': {
                                            'state': 'WAITING'
                                        }})
        if job:
            submission_id = job['submission_id']
            try:
                if 'snl_id' in job:
                    snl = MPStructureNL.from_dict(job)
                else:
                    snl = StructureNL.from_dict(job)
                if len(snl.structure.sites) > SubmissionProcessor.MAX_SITES:
                    self.sma.update_state(submission_id, 'REJECTED',
                                          'too many sites', {})
                    print 'REJECTED WORKFLOW FOR {} - too many sites ({})'.format(
                        snl.structure.formula, len(snl.structure.sites))
                elif not job['is_valid']:
                    self.sma.update_state(
                        submission_id, 'REJECTED',
                        'invalid structure (atoms too close)', {})
                    print 'REJECTED WORKFLOW FOR {} - invalid structure'.format(
                        snl.structure.formula)
                elif len(set(NO_POTCARS) & set(job['elements'])) > 0:
                    self.sma.update_state(submission_id, 'REJECTED',
                                          'invalid structure (no POTCAR)', {})
                    print 'REJECTED WORKFLOW FOR {} - invalid element (No POTCAR)'.format(
                        snl.structure.formula)
                elif not job['is_ordered']:
                    self.sma.update_state(submission_id, 'REJECTED',
                                          'invalid structure (disordered)', {})
                    print 'REJECTED WORKFLOW FOR {} - invalid structure'.format(
                        snl.structure.formula)
                else:
                    snl.data['_materialsproject'] = snl.data.get(
                        '_materialsproject', {})
                    snl.data['_materialsproject'][
                        'submission_id'] = submission_id

                    # create a workflow
                    if "Elasticity" in snl.projects:
                        wf = snl_to_wf_elastic(snl, job['parameters'])
                    else:
                        wf = snl_to_wf(snl, job['parameters'])
                    self.launchpad.add_wf(wf)
                    print 'ADDED WORKFLOW FOR {}'.format(snl.structure.formula)
            except:
                self.jobs.find_and_modify({'submission_id': submission_id},
                                          {'$set': {
                                              'state': 'ERROR'
                                          }})
                traceback.print_exc()

            return submission_id
示例#9
0
def submit_all_snl(snldb, sma, snlgroup_constraint=None):
    constraints = {'is_ordered': True, 'is_valid': True, 'nsites': {'$lte': 200}, 'canonical_snl.about.projects': {'$ne': 'CederDahn Challenge'}}
    constraints['elements'] = {'$nin': NO_POTCARS}
    constraints['canonical_snl.about.history.name'] = {"$ne":"Materials Project structure optimization"}
    constraints['canonical_snl.about.remarks'] = {"$ne": "DEPRECATED"}

    if snlgroup_constraint:
        constraints['snlgroup_id'] = snlgroup_constraint

    for result in snldb.snlgroups.find(constraints, {'canonical_snl': 1, 'snlgroup_id': 1}):
        snl = MPStructureNL.from_dict(result['canonical_snl'])
        parameters = {'snlgroup_id': result['snlgroup_id']}
        sma.submit_snl(snl, 'Anubhav Jain <*****@*****.**>', parameters=parameters)
示例#10
0
    def submit_new_workflow(self):
        # finds a submitted job, creates a workflow, and submits it to FireWorks
        job = self.jobs.find_and_modify({'state': 'SUBMITTED'}, {'$set': {'state': 'WAITING'}})
        if job:
            submission_id = job['submission_id']
            try:
                if 'snl_id' in job:
                    snl = MPStructureNL.from_dict(job)
                else:
                    snl = StructureNL.from_dict(job)
                if len(snl.structure.sites) > SubmissionProcessor.MAX_SITES:
                    self.sma.update_state(submission_id, 'REJECTED', 'too many sites', {})
                    print 'REJECTED WORKFLOW FOR {} - too many sites ({})'.format(
                        snl.structure.formula, len(snl.structure.sites))
                elif not job['is_valid']:
                    self.sma.update_state(submission_id, 'REJECTED',
                                          'invalid structure (atoms too close)', {})
                    print 'REJECTED WORKFLOW FOR {} - invalid structure'.format(
                        snl.structure.formula)
                elif len(set(NO_POTCARS) & set(job['elements'])) > 0:
                    self.sma.update_state(submission_id, 'REJECTED',
                                          'invalid structure (no POTCAR)', {})
                    print 'REJECTED WORKFLOW FOR {} - invalid element (No POTCAR)'.format(
                        snl.structure.formula)
                elif not job['is_ordered']:
                    self.sma.update_state(submission_id, 'REJECTED',
                                          'invalid structure (disordered)', {})
                    print 'REJECTED WORKFLOW FOR {} - invalid structure'.format(
                        snl.structure.formula)
                else:
                    snl.data['_materialsproject'] = snl.data.get('_materialsproject', {})
                    snl.data['_materialsproject']['submission_id'] = submission_id

                    # create a workflow
                    if "Elasticity" in snl.projects:
                        from mpworks.workflows.snl_to_wf_phonon import snl_to_wf_phonon
                        wf=snl_to_wf_phonon(snl, job['parameters'])
                    else:
                        wf = snl_to_wf(snl, job['parameters'])
                    self.launchpad.add_wf(wf)
                    print 'ADDED WORKFLOW FOR {}'.format(snl.structure.formula)
            except:
                self.jobs.find_and_modify({'submission_id': submission_id},
                                          {'$set': {'state': 'ERROR'}})
                traceback.print_exc()

            return submission_id
示例#11
0
    def add_snl(self, snl, force_new=False, snlgroup_guess=None):
        snl_id = self._get_next_snl_id()
        sf = SymmetryFinder(snl.structure, SPACEGROUP_TOLERANCE)
        sf.get_spacegroup()
        sgnum = sf.get_spacegroup_number() if sf.get_spacegroup_number() \
            else -1
        sgsym = sf.get_spacegroup_symbol() if sf.get_spacegroup_symbol() \
            else 'unknown'
        sghall = sf.get_hall() if sf.get_hall() else 'unknown'
        sgxtal = sf.get_crystal_system() if sf.get_crystal_system() \
            else 'unknown'
        sglatt = sf.get_lattice_type() if sf.get_lattice_type() else 'unknown'
        sgpoint = unicode(sf.get_point_group(), errors="ignore")

        mpsnl = MPStructureNL.from_snl(snl, snl_id, sgnum, sgsym, sghall,
                                       sgxtal, sglatt, sgpoint)
        snlgroup, add_new = self.add_mpsnl(mpsnl, force_new, snlgroup_guess)
        return mpsnl, snlgroup.snlgroup_id
示例#12
0
def submit_all_snl(min=None, max=None):
    constraints = {'is_ordered': True, 'is_valid': True, 'nsites': {'$lte': 200}, 'canonical_snl.about.projects': {'$ne': 'CederDahn Challenge'}}
    constraints['elements'] = {'$nin': NO_POTCARS}
    constraints['canonical_snl.about.history.name'] = {"$ne":"Materials Project structure optimization"}
    constraints['canonical_snl.about.remarks'] = {"$ne": "DEPRECATED"}

    if min and max:
        constraints['snlgroup_id'] = {'$gte': min, '$lte': max}
    elif min or max:
        raise ValueError('Must specify both min AND max if you specify one')

    snldb = SNLMongoAdapter.auto_load()
    sma = SubmissionMongoAdapter.auto_load()

    for result in snldb.snlgroups.find(constraints, {'canonical_snl': 1, 'snlgroup_id': 1}):
        snl = MPStructureNL.from_dict(result['canonical_snl'])
        parameters = {'snlgroup_id': result['snlgroup_id']}
        sma.submit_snl(snl, 'Anubhav Jain <*****@*****.**>', parameters=parameters)
示例#13
0
    def add_snl(self, snl, force_new=False, snlgroup_guess=None):
        snl_id = self._get_next_snl_id()
        sf = SymmetryFinder(snl.structure, SPACEGROUP_TOLERANCE)
        sf.get_spacegroup()
        sgnum = sf.get_spacegroup_number() if sf.get_spacegroup_number() \
            else -1
        sgsym = sf.get_spacegroup_symbol() if sf.get_spacegroup_symbol() \
            else 'unknown'
        sghall = sf.get_hall() if sf.get_hall() else 'unknown'
        sgxtal = sf.get_crystal_system() if sf.get_crystal_system() \
            else 'unknown'
        sglatt = sf.get_lattice_type() if sf.get_lattice_type() else 'unknown'
        sgpoint = unicode(sf.get_point_group(), errors="ignore")

        mpsnl = MPStructureNL.from_snl(snl, snl_id, sgnum, sgsym, sghall,
                                       sgxtal, sglatt, sgpoint)
        snlgroup, add_new = self.add_mpsnl(mpsnl, force_new, snlgroup_guess)
        return mpsnl, snlgroup.snlgroup_id
示例#14
0
def find_alternate_canonical():
    # see if we can replace a deprecated canonical SNL with a non-deprecated one

    module_dir = os.path.dirname(os.path.abspath(__file__))

    snl_f = os.path.join(module_dir, 'snl.yaml')
    snldb = SNLMongoAdapter.from_file(snl_f)
    snl = snldb.snl
    snlgroups = snldb.snlgroups

    for g in snlgroups.find({"canonical_snl.about.remarks":"DEPRECATED"}, {"snlgroup_id": 1, "all_snl_ids": 1}):
        for s in snl.find({"snl_id": {"$in": g['all_snl_ids']}, "about.remarks": {"$ne": "DEPRECATED"}}):
            canonical_mpsnl = MPStructureNL.from_dict(s)
            snldb.switch_canonical_snl(g['snlgroup_id'], canonical_mpsnl)
            print g['snlgroup_id']
            break

    print 'DONE'
示例#15
0
文件: core.py 项目: xhqu1981/MPWorks
 def process_item(self, item, index):
     nrow, ncol, snlgroups = super(SNLSpaceGroupChecker, self).process_item(item, index)
     local_mismatch_dict = dict((k,[]) for k in categories[self.checker_name])
     category = ''
     try:
         mpsnl_dict = self._snls.collection.find_one({ 'snl_id': item })
         mpsnl = MPStructureNL.from_dict(mpsnl_dict)
         mpsnl.structure.remove_oxidation_states()
         sf = SpacegroupAnalyzer(mpsnl.structure, symprec=0.1)
         if sf.get_spacegroup_number() != mpsnl.sg_num:
             category = categories[self.checker_name][int(sf.get_spacegroup_number() == 0)]
     except:
         exc_type, exc_value, exc_traceback = sys.exc_info()
         category = categories[0][2]
     if category:
         local_mismatch_dict[category].append(str(item))
         _log.info('(%d) %r', self._counter_total.value, local_mismatch_dict)
     self._increase_counter(nrow, ncol, local_mismatch_dict)
示例#16
0
def check_snl_spacegroups(args):
    """check spacegroups of all available SNLs"""
    range_index = args.start / num_ids_per_stream
    idxs = [range_index * 2]
    idxs += [idxs[0] + 1]
    s = [py.Stream(stream_ids[i]) for i in idxs]
    for i in range(len(idxs)):
        s[i].open()
    end = num_snls if args.end > num_snls else args.end
    id_range = {"$gt": args.start, "$lte": end}
    mpsnl_cursor = sma.snl.find({"snl_id": id_range})
    num_good_ids = 0
    colors = []
    for mpsnl_dict in mpsnl_cursor:
        start_time = time.clock()
        exc_raised = False
        try:
            mpsnl = MPStructureNL.from_dict(mpsnl_dict)
            sf = SpacegroupAnalyzer(mpsnl.structure, symprec=0.1)
        except:
            exc_type, exc_value, exc_traceback = sys.exc_info()
            exc_raised = True
        is_good = (not exc_raised
                   and sf.get_spacegroup_number() == mpsnl.sg_num)
        if is_good:  # Bar (good)
            num_good_ids += 1
            data = dict(x=[num_good_ids], y=[range_index])
        else:  # Scatter (bad)
            if exc_raised:
                category = 2 if fnmatch(str(exc_type), '*pybtex*') else 3
                text = ' '.join([str(exc_type), str(exc_value)])
            else:
                category = int(sf.get_spacegroup_number() == 0)
                text = '%s: %d' % (mpsnl.snlgroup_key,
                                   sf.get_spacegroup_number())
            colors.append(category_colors[category])
            data = dict(x=mpsnl_dict['snl_id'] % num_ids_per_stream,
                        y=range_index,
                        text=text,
                        marker=Marker(color=colors))
        s[is_good].write(data)
    for i in range(len(idxs)):
        s[i].close()
示例#17
0
    def resubmit(self, submission_id, snl_db=None):
        # see if an SNL object has already been created
        if not snl_db:
            snl_db = SNLMongoAdapter.auto_load()

        mpsnl = None
        snlgroup_id = None
        snl_dict = snl_db.snl.find_one({"about._materialsproject.submission_id": submission_id})
        if snl_dict:
            mpsnl = MPStructureNL.from_dict(snl_dict)
            snlgroup_id = snl_db.snlgroups.find_one({"all_snl_ids": snl_dict['snl_id']}, {"snlgroup_id":1})['snlgroup_id']

        # Now reset the current submission parameters
        updates = {'state': 'SUBMITTED', 'state_details': {}, 'task_dict': {}}

        if mpsnl:
            updates['parameters'] = self.jobs.find_one({'submission_id': submission_id}, {'parameters': 1})['parameters']
            updates['parameters'].update({"mpsnl": mpsnl.as_dict(), "snlgroup_id": snlgroup_id})

        self.jobs.find_and_modify({'submission_id': submission_id}, {'$set': updates})
示例#18
0
    def resubmit(self, submission_id, snl_db=None):
        # see if an SNL object has already been created
        if not snl_db:
            snl_db = SNLMongoAdapter.auto_load()

        mpsnl = None
        snlgroup_id = None
        snl_dict = snl_db.snl.find_one({"about._materialsproject.submission_id": submission_id})
        if snl_dict:
            mpsnl = MPStructureNL.from_dict(snl_dict)
            snlgroup_id = snl_db.snlgroups.find_one({"all_snl_ids": snl_dict['snl_id']}, {"snlgroup_id":1})['snlgroup_id']

        # Now reset the current submission parameters
        updates = {'state': 'SUBMITTED', 'state_details': {}, 'task_dict': {}}

        if mpsnl:
            updates['parameters'] = self.jobs.find_one({'submission_id': submission_id}, {'parameters': 1})['parameters']
            updates['parameters'].update({"mpsnl": mpsnl.to_dict, "snlgroup_id": snlgroup_id})

        self.jobs.find_and_modify({'submission_id': submission_id}, {'$set': updates})
示例#19
0
def check_snl_spacegroups(args):
    """check spacegroups of all available SNLs"""
    range_index = args.start / num_ids_per_stream
    idxs = [range_index*2]
    idxs += [idxs[0]+1]
    s = [py.Stream(stream_ids[i]) for i in idxs]
    for i in range(len(idxs)): s[i].open()
    end = num_snls if args.end > num_snls else args.end
    id_range = {"$gt": args.start, "$lte": end}
    mpsnl_cursor = sma.snl.find({ "snl_id": id_range})
    num_good_ids = 0
    colors=[]
    for mpsnl_dict in mpsnl_cursor:
        start_time = time.clock()
        exc_raised = False
        try:
            mpsnl = MPStructureNL.from_dict(mpsnl_dict)
            sf = SpacegroupAnalyzer(mpsnl.structure, symprec=0.1)
        except:
            exc_type, exc_value, exc_traceback = sys.exc_info()
            exc_raised = True
        is_good = (not exc_raised and sf.get_spacegroup_number() == mpsnl.sg_num)
        if is_good: # Bar (good)
            num_good_ids += 1
            data = dict(x=[num_good_ids], y=[range_index])
        else: # Scatter (bad)
            if exc_raised:
                category = 2 if fnmatch(str(exc_type), '*pybtex*') else 3
                text = ' '.join([str(exc_type), str(exc_value)])
            else:
                category = int(sf.get_spacegroup_number() == 0)
                text = '%s: %d' % (mpsnl.snlgroup_key, sf.get_spacegroup_number())
            colors.append(category_colors[category])
            data = dict(
                x=mpsnl_dict['snl_id']%num_ids_per_stream,
                y=range_index, text=text, marker=Marker(color=colors)
            )
        s[is_good].write(data)
    for i in range(len(idxs)): s[i].close()
示例#20
0
def get_deprecated_snl(snl_id, colls):
    snl_old = colls.snl.find_one({'snl_id': snl_id})
    del snl_old['about']['_icsd']
    snl_old['about']['remarks'].append('Record updated (about._icsd deleted) {}'.format(datetime.datetime.now().strftime('%Y-%m-%d')))
    return MPStructureNL.from_dict(snl_old)
示例#21
0
def check_snls_in_snlgroups(args):
    """check whether SNLs in each SNLGroup still match resp. canonical SNL"""
    range_index = args.start / num_ids_per_stream
    idxs = [2*(num_snl_streams+range_index)]
    idxs += [idxs[0]+1]
    s = [py.Stream(stream_ids[i]) for i in idxs]
    for i in range(len(idxs)): s[i].open()
    end = num_snlgroups if args.end > num_snlgroups else args.end
    id_range = {"$gt": args.start, "$lte": end}
    snlgrp_cursor = sma.snlgroups.find({ "snlgroup_id": id_range})
    colors = []
    num_good_ids = 0
    for snlgrp_dict in snlgrp_cursor:
        start_time = time.clock()
        try:
            snlgrp = SNLGroup.from_dict(snlgrp_dict)
        except:
            exc_type, exc_value, exc_traceback = sys.exc_info()
            text = ' '.join([str(exc_type), str(exc_value)])
            colors.append(category_colors[-1]) # Others
            data = dict(
                x=snlgrp_dict['snlgroup_id']%num_ids_per_stream,
                y=range_index, text=text, marker=Marker(color=colors)
            )
            s[0].write(data)
            sleep(start_time)
            continue
        if len(snlgrp.all_snl_ids) <= 1:
            num_good_ids += 1
            data = dict(x=[num_good_ids], y=[range_index])
            s[1].write(data)
            sleep(start_time)
            continue
        exc_raised = False
        all_snls_good = True
        for snl_id in snlgrp.all_snl_ids:
            if snl_id == snlgrp.canonical_snl.snl_id: continue
            mpsnl_dict = sma.snl.find_one({ "snl_id": snl_id })
            try:
                mpsnl = MPStructureNL.from_dict(mpsnl_dict)
                is_match = matcher.fit(mpsnl.structure, snlgrp.canonical_structure)
            except:
                exc_type, exc_value, exc_traceback = sys.exc_info()
                exc_raised = True
            if exc_raised or not is_match: # Scatter (bad)
                if exc_raised:
                    category = 2 if fnmatch(str(exc_type), '*pybtex*') else 3
                    text = ' '.join([str(exc_type), str(exc_value)])
                else:
                    category = 0
                    text = '%d != can:%d' % (mpsnl_dict['snl_id'], snlgrp.canonical_snl.snl_id)
                colors.append(category_colors[category])
                data = dict(
                    x=snlgrp_dict['snlgroup_id']%num_ids_per_stream,
                    y=range_index, text=text, marker=Marker(color=colors)
                )
                s[0].write(data)
                all_snls_good = False
                sleep(start_time)
                break
        if all_snls_good: # Bar (good)
            num_good_ids += 1
            data = dict(x=[num_good_ids], y=[range_index])
            s[1].write(data)
            sleep(start_time)
    for i in range(len(idxs)): s[i].close()
示例#22
0
def analyze(args):
    """analyze data at any point for a copy of the streaming figure"""
    # NOTE: make copy online first with suffix _%Y-%m-%d and note figure id
    fig = py.get_figure(creds['username'], args.fig_id)
    if args.t:
        if args.fig_id == 42:
            label_entries = filter(None, '<br>'.join(fig['data'][2]['text']).split('<br>'))
            pairs = map(make_tuple, label_entries)
            grps = set(chain.from_iterable(pairs))
            snlgrp_cursor = sma.snlgroups.aggregate([
                { '$match': {
                    'snlgroup_id': { '$in': list(grps) },
                    'canonical_snl.about.projects': {'$ne': 'CederDahn Challenge'}
                } },
                { '$project': { 'snlgroup_id': 1, 'canonical_snl.snlgroup_key': 1, '_id': 0 } }
            ], cursor={})
            snlgroup_keys = {}
            for d in snlgrp_cursor:
                snlgroup_keys[d['snlgroup_id']] = d['canonical_snl']['snlgroup_key']
            print snlgroup_keys[40890]
            sma2 = SNLMongoAdapter.from_file(
                os.path.join(os.environ['DB_LOC'], 'materials_db.yaml')
            )
            materials_cursor = sma2.database.materials.aggregate([
                { '$match': {
                    'snlgroup_id_final': { '$in': list(grps) },
                    'snl_final.about.projects': {'$ne': 'CederDahn Challenge'}
                } },
                { '$project': {
                    'snlgroup_id_final': 1, '_id': 0, 'task_id': 1,
                    'final_energy_per_atom': 1,
                    'band_gap.search_gap.band_gap': 1,
                    'volume': 1, 'nsites': 1
                }}
            ], cursor={})
            snlgroup_data = {}
            for material in materials_cursor:
                snlgroup_id = material['snlgroup_id_final']
                final_energy_per_atom = material['final_energy_per_atom']
                band_gap = material['band_gap']['search_gap']['band_gap']
                volume_per_atom = material['volume'] / material['nsites']
                snlgroup_data[snlgroup_id] = {
                    'final_energy_per_atom': final_energy_per_atom,
                    'band_gap': band_gap, 'task_id': material['task_id'],
                    'volume_per_atom': volume_per_atom
                }
            print snlgroup_data[40890]
            filestem = 'mpworks/check_snl/results/bad_snlgroups_2_'
            with open(filestem+'in_matdb.csv', 'wb') as f, \
                    open(filestem+'notin_matdb.csv', 'wb') as g:
                writer1, writer2 = csv.writer(f), csv.writer(g)
                header = [
                    'category', 'composition',
                    'snlgroup_id 1', 'sg_num 1', 'task_id 1',
                    'snlgroup_id 2', 'sg_num 2', 'task_id 2',
                    'delta_energy', 'delta_bandgap', 'delta_volume_per_atom',
                    'rms_dist', 'scenario'
                ]
                writer1.writerow(header)
                writer2.writerow(header)
                for primary_id, secondary_id in pairs:
                    if primary_id not in snlgroup_keys or \
                       secondary_id not in snlgroup_keys: continue
                    composition, primary_sg_num = snlgroup_keys[primary_id].split('--')
                    secondary_sg_num = snlgroup_keys[secondary_id].split('--')[1]
                    category = 'same SGs' if primary_sg_num == secondary_sg_num else 'diff. SGs'
                    if primary_id not in snlgroup_data or secondary_id not in snlgroup_data:
                        delta_energy, delta_bandgap, delta_volume_per_atom = '', '', ''
                    else:
                        delta_energy = "{0:.3g}".format(abs(
                            snlgroup_data[primary_id]['final_energy_per_atom'] - \
                            snlgroup_data[secondary_id]['final_energy_per_atom']
                        ))
                        delta_bandgap = "{0:.3g}".format(abs(
                            snlgroup_data[primary_id]['band_gap'] - \
                            snlgroup_data[secondary_id]['band_gap']
                        ))
                        delta_volume_per_atom = "{0:.3g}".format(abs(
                            snlgroup_data[primary_id]['volume_per_atom'] - \
                            snlgroup_data[secondary_id]['volume_per_atom']
                        ))
                    scenario, rms_dist_str = '', ''
                    if category == 'diff. SGs' and delta_energy and delta_bandgap:
                        scenario = 'different' if (
                            float(delta_energy) > 0.01 or float(delta_bandgap) > 0.1
                        ) else 'similar'
                        snlgrp1_dict = sma.snlgroups.find_one({ "snlgroup_id": primary_id })
                        snlgrp2_dict = sma.snlgroups.find_one({ "snlgroup_id": secondary_id })
                        snlgrp1 = SNLGroup.from_dict(snlgrp1_dict)
                        snlgrp2 = SNLGroup.from_dict(snlgrp2_dict)
                        primary_structure = snlgrp1.canonical_structure
                        secondary_structure = snlgrp2.canonical_structure
                        rms_dist = matcher.get_rms_dist(primary_structure, secondary_structure)
                        if rms_dist is not None:
                            rms_dist_str = "({0:.3g},{1:.3g})".format(*rms_dist)
                            print rms_dist_str
                    row = [
                        category, composition,
                        primary_id, primary_sg_num,
                        snlgroup_data[primary_id]['task_id'] \
                        if primary_id in snlgroup_data else '',
                        secondary_id, secondary_sg_num,
                        snlgroup_data[secondary_id]['task_id'] \
                        if secondary_id in snlgroup_data else '',
                        delta_energy, delta_bandgap, delta_volume_per_atom,
                        rms_dist_str, scenario
                    ]
                    if delta_energy and delta_bandgap: writer1.writerow(row)
                    else: writer2.writerow(row)
        elif args.fig_id == 16:
            out_fig = Figure()
            badsnls_trace = Scatter(x=[], y=[], text=[], mode='markers', name='SG Changes')
            bisectrix = Scatter(x=[0,230], y=[0,230], mode='lines', name='bisectrix')
            print 'pulling bad snls from plotly ...'
            bad_snls = OrderedDict()
            for category, text in zip(fig['data'][2]['y'], fig['data'][2]['text']):
                for snl_id in map(int, text.split('<br>')):
                    bad_snls[snl_id] = category
            with open('mpworks/check_snl/results/bad_snls.csv', 'wb') as f:
                print 'pulling bad snls from database ...'
                mpsnl_cursor = sma.snl.find({
                    'snl_id': { '$in': bad_snls.keys() },
                    'about.projects': {'$ne': 'CederDahn Challenge'}
                })
                writer = csv.writer(f)
                writer.writerow([
                    'snl_id', 'category', 'snlgroup_key', 'nsites', 'remarks', 'projects', 'authors'
                ])
                print 'writing bad snls to file ...'
                for mpsnl_dict in mpsnl_cursor:
                    mpsnl = MPStructureNL.from_dict(mpsnl_dict)
                    row = [ mpsnl.snl_id, bad_snls[mpsnl.snl_id], mpsnl.snlgroup_key ]
                    row += _get_snl_extra_info(mpsnl)
                    writer.writerow(row)
                    sg_num = mpsnl.snlgroup_key.split('--')[1]
                    if (bad_snls[mpsnl.snl_id] == 'SG default' and sg_num != '-1') or \
                       bad_snls[mpsnl.snl_id] == 'SG change':
                        mpsnl.structure.remove_oxidation_states()
                        sf = SpacegroupAnalyzer(mpsnl.structure, symprec=0.1)
                        badsnls_trace['x'].append(mpsnl.sg_num)
                        badsnls_trace['y'].append(sf.get_spacegroup_number())
                        badsnls_trace['text'].append(mpsnl.snl_id)
                        if bad_snls[mpsnl.snl_id] == 'SG default':
                            print sg_num, sf.get_spacegroup_number()
                print 'plotting out-fig ...'
                out_fig['data'] = Data([bisectrix, badsnls_trace])
                out_fig['layout'] = Layout(
                    showlegend=False, hovermode='closest',
                    title='Spacegroup Assignment Changes',
                    xaxis=XAxis(showgrid=False, title='old SG number', range=[0,230]),
                    yaxis=YAxis(showgrid=False, title='new SG number', range=[0,230]),
                )
                filename = 'spacegroup_changes_'
                filename += datetime.datetime.now().strftime('%Y-%m-%d') 
                py.plot(out_fig, filename=filename, auto_open=False)
        elif args.fig_id == 43: # SNLGroupMemberChecker
            matcher2 = StructureMatcher(
                ltol=0.2, stol=0.3, angle_tol=5, primitive_cell=False, scale=True,
                attempt_supercell=True, comparator=ElementComparator()
            )
            print 'pulling data from plotly ...'
            trace = Scatter(x=[], y=[], text=[], mode='markers', name='mismatches')
            bad_snls = OrderedDict() # snlgroup_id : [ mismatching snl_ids ]
            for category, text in zip(fig['data'][2]['y'], fig['data'][2]['text']):
                if category != 'mismatch': continue
                for entry in text.split('<br>'):
                    fields = entry.split(':')
                    snlgroup_id = int(fields[0].split(',')[0])
                    print snlgroup_id
                    snlgrp_dict = sma.snlgroups.find_one({ 'snlgroup_id': snlgroup_id })
                    snlgrp = SNLGroup.from_dict(snlgrp_dict)
                    s1 = snlgrp.canonical_structure.get_primitive_structure()
                    bad_snls[snlgroup_id] = []
                    for i, snl_id in enumerate(fields[1].split(',')):
                        mpsnl_dict = sma.snl.find_one({ 'snl_id': int(snl_id) })
                        if 'CederDahn Challenge' in mpsnl_dict['about']['projects']:
                            print 'skip CederDahn: %s' % snl_id
                            continue
                        mpsnl = MPStructureNL.from_dict(mpsnl_dict)
                        s2 = mpsnl.structure.get_primitive_structure()
                        is_match = matcher2.fit(s1, s2)
                        if is_match: continue
                        bad_snls[snlgroup_id].append(snl_id)
                        trace['x'].append(snlgroup_id)
                        trace['y'].append(i+1)
                        trace['text'].append(snl_id)
                    if len(bad_snls[snlgroup_id]) < 1:
                        bad_snls.pop(snlgroup_id, None)
            with open('mpworks/check_snl/results/bad_snlgroups.csv', 'wb') as f:
                print 'pulling bad snlgroups from database ...'
                snlgroup_cursor = sma.snlgroups.find({
                    'snlgroup_id': { '$in': bad_snls.keys() },
                })
                writer = csv.writer(f)
                writer.writerow(['snlgroup_id', 'snlgroup_key', 'mismatching snl_ids'])
                print 'writing bad snlgroups to file ...'
                for snlgroup_dict in snlgroup_cursor:
                    snlgroup = SNLGroup.from_dict(snlgroup_dict)
                    row = [
                        snlgroup.snlgroup_id, snlgroup.canonical_snl.snlgroup_key,
                        ' '.join(bad_snls[snlgroup.snlgroup_id])
                    ]
                    writer.writerow(row)
            print 'plotting out-fig ...'
            out_fig = Figure()
            out_fig['data'] = Data([trace])
            out_fig['layout'] = Layout(
                showlegend=False, hovermode='closest',
                title='Member Mismatches of SNLGroup Canonicals',
                xaxis=XAxis(showgrid=False, title='snlgroup_id', showexponent='none'),
                yaxis=YAxis(showgrid=False, title='# mismatching SNLs'),
            )
            filename = 'groupmember_mismatches_'
            filename += datetime.datetime.now().strftime('%Y-%m-%d') 
            py.plot(out_fig, filename=filename, auto_open=False)
    else:
        errors = Counter()
        bad_snls = OrderedDict()
        bad_snlgroups = OrderedDict()
        for i,d in enumerate(fig['data']):
            if not isinstance(d, Scatter): continue
            if not 'x' in d or not 'y' in d or not 'text' in d: continue
            start_id = int(d['name'].split(' - ')[0][:-1])*1000
            marker_colors = d['marker']['color']
            if i < 2*num_snl_streams: # spacegroups
                errors += Counter(marker_colors)
                for idx,color in enumerate(marker_colors):
                    snl_id = start_id + d['x'][idx]
                    color_index = category_colors.index(color)
                    category = categories[color_index]
                    bad_snls[snl_id] = category
            else: # groupmembers
                for idx,color in enumerate(marker_colors):
                    if color != category_colors[0]: continue
                    snlgroup_id = start_id + d['x'][idx]
                    mismatch_snl_id, canonical_snl_id = d['text'][idx].split(' != ')
                    bad_snlgroups[snlgroup_id] = int(mismatch_snl_id)
        print errors
        fig_data = fig['data'][-1]
        fig_data['x'] = [ errors[color] for color in fig_data['marker']['color'] ]
        filename = _get_filename()
        print filename
        #py.plot(fig, filename=filename)
        with open('mpworks/check_snl/results/bad_snls.csv', 'wb') as f:
            mpsnl_cursor = sma.snl.find({ 'snl_id': { '$in': bad_snls.keys() } })
            writer = csv.writer(f)
            writer.writerow([
                'snl_id', 'category', 'snlgroup_key', 'nsites', 'remarks', 'projects', 'authors'
            ])
            for mpsnl_dict in mpsnl_cursor:
                mpsnl = MPStructureNL.from_dict(mpsnl_dict)
                row = [ mpsnl.snl_id, bad_snls[mpsnl.snl_id], mpsnl.snlgroup_key ]
                row += _get_snl_extra_info(mpsnl)
                writer.writerow(row)
        with open('mpworks/check_snl/results/bad_snlgroups.csv', 'wb') as f:
            snlgrp_cursor = sma.snlgroups.find({ 'snlgroup_id': { '$in': bad_snlgroups.keys() } })
            first_mismatch_snls_cursor = sma.snl.find({ 'snl_id': { '$in': bad_snlgroups.values() } })
            first_mismatch_snl_info = OrderedDict()
            for mpsnl_dict in first_mismatch_snls_cursor:
                mpsnl = MPStructureNL.from_dict(mpsnl_dict)
                first_mismatch_snl_info[mpsnl.snl_id] = _get_snl_extra_info(mpsnl)
            writer = csv.writer(f)
            writer.writerow([
                'snlgroup_id', 'snlgroup_key',
                'canonical_snl_id', 'first_mismatching_snl_id',
                 'nsites', 'remarks', 'projects', 'authors'
            ])
            for snlgrp_dict in snlgrp_cursor:
                snlgrp = SNLGroup.from_dict(snlgrp_dict)
                first_mismatch_snl_id = bad_snlgroups[snlgrp.snlgroup_id]
                row = [
                    snlgrp.snlgroup_id, snlgrp.canonical_snl.snlgroup_key,
                    snlgrp.canonical_snl.snl_id, first_mismatch_snl_id
                ]
                row += [
                    ' & '.join(pair) if pair[0] != pair[1] else pair[0]
                    for pair in zip(
                        _get_snl_extra_info(snlgrp.canonical_snl),
                        first_mismatch_snl_info[int(first_mismatch_snl_id)]
                    )
                ]
                writer.writerow(row)
示例#23
0
    module_dir = os.path.dirname(os.path.abspath(__file__))
    snl_f = os.path.join(module_dir, 'snl.yaml')
    snldb = SNLMongoAdapter.from_file(snl_f)

    all_snl_ids = []  # snl ids that have a group
    all_missing_ids = []  # snl ids missing a group
    idx = 0
    print 'GETTING GROUPS'
    for x in snldb.snlgroups.find({}, {"all_snl_ids": 1}):
        all_snl_ids.extend(x['all_snl_ids'])

    print 'CHECKING SNL'
    for x in snldb.snl.find({}, {'snl_id': 1}, timeout=False):
        print x['snl_id']
        if x['snl_id'] not in all_snl_ids:
            print x['snl_id'], '*********'
            all_missing_ids.append(x['snl_id'])

    print 'FIXING / ADDING GROUPS'
    print all_missing_ids

    for snl_id in all_missing_ids:
        try:
            mpsnl = MPStructureNL.from_dict(snldb.snl.find_one({"snl_id": snl_id}))
            snldb.build_groups(mpsnl)
            print 'SUCCESSFUL', snl_id
        except:
            print 'ERROR with snl_id', snl_id
            traceback.print_exc()

示例#24
0
    module_dir = os.path.dirname(os.path.abspath(__file__))
    snl_f = os.path.join(module_dir, 'snl.yaml')
    snldb = SNLMongoAdapter.from_file(snl_f)

    all_snl_ids = []  # snl ids that have a group
    all_missing_ids = []  # snl ids missing a group
    idx = 0
    print 'GETTING GROUPS'
    for x in snldb.snlgroups.find({}, {"all_snl_ids": 1}):
        all_snl_ids.extend(x['all_snl_ids'])

    print 'CHECKING SNL'
    for x in snldb.snl.find({}, {'snl_id': 1}, timeout=False):
        print x['snl_id']
        if x['snl_id'] not in all_snl_ids:
            print x['snl_id'], '*********'
            all_missing_ids.append(x['snl_id'])

    print 'FIXING / ADDING GROUPS'
    print all_missing_ids

    for snl_id in all_missing_ids:
        try:
            mpsnl = MPStructureNL.from_dict(
                snldb.snl.find_one({"snl_id": snl_id}))
            snldb.build_groups(mpsnl)
            print 'SUCCESSFUL', snl_id
        except:
            print 'ERROR with snl_id', snl_id
            traceback.print_exc()
示例#25
0
def check_snls_in_snlgroups(args):
    """check whether SNLs in each SNLGroup still match resp. canonical SNL"""
    range_index = args.start / num_ids_per_stream
    idxs = [2 * (num_snl_streams + range_index)]
    idxs += [idxs[0] + 1]
    s = [py.Stream(stream_ids[i]) for i in idxs]
    for i in range(len(idxs)):
        s[i].open()
    end = num_snlgroups if args.end > num_snlgroups else args.end
    id_range = {"$gt": args.start, "$lte": end}
    snlgrp_cursor = sma.snlgroups.find({"snlgroup_id": id_range})
    colors = []
    num_good_ids = 0
    for snlgrp_dict in snlgrp_cursor:
        start_time = time.clock()
        try:
            snlgrp = SNLGroup.from_dict(snlgrp_dict)
        except:
            exc_type, exc_value, exc_traceback = sys.exc_info()
            text = ' '.join([str(exc_type), str(exc_value)])
            colors.append(category_colors[-1])  # Others
            data = dict(x=snlgrp_dict['snlgroup_id'] % num_ids_per_stream,
                        y=range_index,
                        text=text,
                        marker=Marker(color=colors))
            s[0].write(data)
            sleep(start_time)
            continue
        if len(snlgrp.all_snl_ids) <= 1:
            num_good_ids += 1
            data = dict(x=[num_good_ids], y=[range_index])
            s[1].write(data)
            sleep(start_time)
            continue
        exc_raised = False
        all_snls_good = True
        for snl_id in snlgrp.all_snl_ids:
            if snl_id == snlgrp.canonical_snl.snl_id: continue
            mpsnl_dict = sma.snl.find_one({"snl_id": snl_id})
            try:
                mpsnl = MPStructureNL.from_dict(mpsnl_dict)
                is_match = matcher.fit(mpsnl.structure,
                                       snlgrp.canonical_structure)
            except:
                exc_type, exc_value, exc_traceback = sys.exc_info()
                exc_raised = True
            if exc_raised or not is_match:  # Scatter (bad)
                if exc_raised:
                    category = 2 if fnmatch(str(exc_type), '*pybtex*') else 3
                    text = ' '.join([str(exc_type), str(exc_value)])
                else:
                    category = 0
                    text = '%d != can:%d' % (mpsnl_dict['snl_id'],
                                             snlgrp.canonical_snl.snl_id)
                colors.append(category_colors[category])
                data = dict(x=snlgrp_dict['snlgroup_id'] % num_ids_per_stream,
                            y=range_index,
                            text=text,
                            marker=Marker(color=colors))
                s[0].write(data)
                all_snls_good = False
                sleep(start_time)
                break
        if all_snls_good:  # Bar (good)
            num_good_ids += 1
            data = dict(x=[num_good_ids], y=[range_index])
            s[1].write(data)
            sleep(start_time)
    for i in range(len(idxs)):
        s[i].close()
示例#26
0
def analyze(args):
    """analyze data at any point for a copy of the streaming figure"""
    # NOTE: make copy online first with suffix _%Y-%m-%d and note figure id
    fig = py.get_figure(creds['username'], args.fig_id)
    if args.t:
        if args.fig_id == 42:
            label_entries = filter(
                None, '<br>'.join(fig['data'][2]['text']).split('<br>'))
            pairs = map(make_tuple, label_entries)
            grps = set(chain.from_iterable(pairs))
            snlgrp_cursor = sma.snlgroups.aggregate([{
                '$match': {
                    'snlgroup_id': {
                        '$in': list(grps)
                    },
                    'canonical_snl.about.projects': {
                        '$ne': 'CederDahn Challenge'
                    }
                }
            }, {
                '$project': {
                    'snlgroup_id': 1,
                    'canonical_snl.snlgroup_key': 1,
                    '_id': 0
                }
            }],
                                                    cursor={})
            snlgroup_keys = {}
            for d in snlgrp_cursor:
                snlgroup_keys[
                    d['snlgroup_id']] = d['canonical_snl']['snlgroup_key']
            print snlgroup_keys[40890]
            sma2 = SNLMongoAdapter.from_file(
                os.path.join(os.environ['DB_LOC'], 'materials_db.yaml'))
            materials_cursor = sma2.database.materials.aggregate([{
                '$match': {
                    'snlgroup_id_final': {
                        '$in': list(grps)
                    },
                    'snl_final.about.projects': {
                        '$ne': 'CederDahn Challenge'
                    }
                }
            }, {
                '$project': {
                    'snlgroup_id_final': 1,
                    '_id': 0,
                    'task_id': 1,
                    'final_energy_per_atom': 1,
                    'band_gap.search_gap.band_gap': 1,
                    'volume': 1,
                    'nsites': 1
                }
            }],
                                                                 cursor={})
            snlgroup_data = {}
            for material in materials_cursor:
                snlgroup_id = material['snlgroup_id_final']
                final_energy_per_atom = material['final_energy_per_atom']
                band_gap = material['band_gap']['search_gap']['band_gap']
                volume_per_atom = material['volume'] / material['nsites']
                snlgroup_data[snlgroup_id] = {
                    'final_energy_per_atom': final_energy_per_atom,
                    'band_gap': band_gap,
                    'task_id': material['task_id'],
                    'volume_per_atom': volume_per_atom
                }
            print snlgroup_data[40890]
            filestem = 'mpworks/check_snl/results/bad_snlgroups_2_'
            with open(filestem+'in_matdb.csv', 'wb') as f, \
                    open(filestem+'notin_matdb.csv', 'wb') as g:
                writer1, writer2 = csv.writer(f), csv.writer(g)
                header = [
                    'category', 'composition', 'snlgroup_id 1', 'sg_num 1',
                    'task_id 1', 'snlgroup_id 2', 'sg_num 2', 'task_id 2',
                    'delta_energy', 'delta_bandgap', 'delta_volume_per_atom',
                    'rms_dist', 'scenario'
                ]
                writer1.writerow(header)
                writer2.writerow(header)
                for primary_id, secondary_id in pairs:
                    if primary_id not in snlgroup_keys or \
                       secondary_id not in snlgroup_keys:
                        continue
                    composition, primary_sg_num = snlgroup_keys[
                        primary_id].split('--')
                    secondary_sg_num = snlgroup_keys[secondary_id].split(
                        '--')[1]
                    category = 'same SGs' if primary_sg_num == secondary_sg_num else 'diff. SGs'
                    if primary_id not in snlgroup_data or secondary_id not in snlgroup_data:
                        delta_energy, delta_bandgap, delta_volume_per_atom = '', '', ''
                    else:
                        delta_energy = "{0:.3g}".format(abs(
                            snlgroup_data[primary_id]['final_energy_per_atom'] - \
                            snlgroup_data[secondary_id]['final_energy_per_atom']
                        ))
                        delta_bandgap = "{0:.3g}".format(abs(
                            snlgroup_data[primary_id]['band_gap'] - \
                            snlgroup_data[secondary_id]['band_gap']
                        ))
                        delta_volume_per_atom = "{0:.3g}".format(abs(
                            snlgroup_data[primary_id]['volume_per_atom'] - \
                            snlgroup_data[secondary_id]['volume_per_atom']
                        ))
                    scenario, rms_dist_str = '', ''
                    if category == 'diff. SGs' and delta_energy and delta_bandgap:
                        scenario = 'different' if (
                            float(delta_energy) > 0.01
                            or float(delta_bandgap) > 0.1) else 'similar'
                        snlgrp1_dict = sma.snlgroups.find_one(
                            {"snlgroup_id": primary_id})
                        snlgrp2_dict = sma.snlgroups.find_one(
                            {"snlgroup_id": secondary_id})
                        snlgrp1 = SNLGroup.from_dict(snlgrp1_dict)
                        snlgrp2 = SNLGroup.from_dict(snlgrp2_dict)
                        primary_structure = snlgrp1.canonical_structure
                        secondary_structure = snlgrp2.canonical_structure
                        rms_dist = matcher.get_rms_dist(
                            primary_structure, secondary_structure)
                        if rms_dist is not None:
                            rms_dist_str = "({0:.3g},{1:.3g})".format(
                                *rms_dist)
                            print rms_dist_str
                    row = [
                        category, composition,
                        primary_id, primary_sg_num,
                        snlgroup_data[primary_id]['task_id'] \
                        if primary_id in snlgroup_data else '',
                        secondary_id, secondary_sg_num,
                        snlgroup_data[secondary_id]['task_id'] \
                        if secondary_id in snlgroup_data else '',
                        delta_energy, delta_bandgap, delta_volume_per_atom,
                        rms_dist_str, scenario
                    ]
                    if delta_energy and delta_bandgap: writer1.writerow(row)
                    else: writer2.writerow(row)
        elif args.fig_id == 16:
            out_fig = Figure()
            badsnls_trace = Scatter(x=[],
                                    y=[],
                                    text=[],
                                    mode='markers',
                                    name='SG Changes')
            bisectrix = Scatter(x=[0, 230],
                                y=[0, 230],
                                mode='lines',
                                name='bisectrix')
            print 'pulling bad snls from plotly ...'
            bad_snls = OrderedDict()
            for category, text in zip(fig['data'][2]['y'],
                                      fig['data'][2]['text']):
                for snl_id in map(int, text.split('<br>')):
                    bad_snls[snl_id] = category
            with open('mpworks/check_snl/results/bad_snls.csv', 'wb') as f:
                print 'pulling bad snls from database ...'
                mpsnl_cursor = sma.snl.find({
                    'snl_id': {
                        '$in': bad_snls.keys()
                    },
                    'about.projects': {
                        '$ne': 'CederDahn Challenge'
                    }
                })
                writer = csv.writer(f)
                writer.writerow([
                    'snl_id', 'category', 'snlgroup_key', 'nsites', 'remarks',
                    'projects', 'authors'
                ])
                print 'writing bad snls to file ...'
                for mpsnl_dict in mpsnl_cursor:
                    mpsnl = MPStructureNL.from_dict(mpsnl_dict)
                    row = [
                        mpsnl.snl_id, bad_snls[mpsnl.snl_id],
                        mpsnl.snlgroup_key
                    ]
                    row += _get_snl_extra_info(mpsnl)
                    writer.writerow(row)
                    sg_num = mpsnl.snlgroup_key.split('--')[1]
                    if (bad_snls[mpsnl.snl_id] == 'SG default' and sg_num != '-1') or \
                       bad_snls[mpsnl.snl_id] == 'SG change':
                        mpsnl.structure.remove_oxidation_states()
                        sf = SpacegroupAnalyzer(mpsnl.structure, symprec=0.1)
                        badsnls_trace['x'].append(mpsnl.sg_num)
                        badsnls_trace['y'].append(sf.get_spacegroup_number())
                        badsnls_trace['text'].append(mpsnl.snl_id)
                        if bad_snls[mpsnl.snl_id] == 'SG default':
                            print sg_num, sf.get_spacegroup_number()
                print 'plotting out-fig ...'
                out_fig['data'] = Data([bisectrix, badsnls_trace])
                out_fig['layout'] = Layout(
                    showlegend=False,
                    hovermode='closest',
                    title='Spacegroup Assignment Changes',
                    xaxis=XAxis(showgrid=False,
                                title='old SG number',
                                range=[0, 230]),
                    yaxis=YAxis(showgrid=False,
                                title='new SG number',
                                range=[0, 230]),
                )
                filename = 'spacegroup_changes_'
                filename += datetime.datetime.now().strftime('%Y-%m-%d')
                py.plot(out_fig, filename=filename, auto_open=False)
        elif args.fig_id == 43:  # SNLGroupMemberChecker
            matcher2 = StructureMatcher(ltol=0.2,
                                        stol=0.3,
                                        angle_tol=5,
                                        primitive_cell=False,
                                        scale=True,
                                        attempt_supercell=True,
                                        comparator=ElementComparator())
            print 'pulling data from plotly ...'
            trace = Scatter(x=[],
                            y=[],
                            text=[],
                            mode='markers',
                            name='mismatches')
            bad_snls = OrderedDict()  # snlgroup_id : [ mismatching snl_ids ]
            for category, text in zip(fig['data'][2]['y'],
                                      fig['data'][2]['text']):
                if category != 'mismatch': continue
                for entry in text.split('<br>'):
                    fields = entry.split(':')
                    snlgroup_id = int(fields[0].split(',')[0])
                    print snlgroup_id
                    snlgrp_dict = sma.snlgroups.find_one(
                        {'snlgroup_id': snlgroup_id})
                    snlgrp = SNLGroup.from_dict(snlgrp_dict)
                    s1 = snlgrp.canonical_structure.get_primitive_structure()
                    bad_snls[snlgroup_id] = []
                    for i, snl_id in enumerate(fields[1].split(',')):
                        mpsnl_dict = sma.snl.find_one({'snl_id': int(snl_id)})
                        if 'CederDahn Challenge' in mpsnl_dict['about'][
                                'projects']:
                            print 'skip CederDahn: %s' % snl_id
                            continue
                        mpsnl = MPStructureNL.from_dict(mpsnl_dict)
                        s2 = mpsnl.structure.get_primitive_structure()
                        is_match = matcher2.fit(s1, s2)
                        if is_match: continue
                        bad_snls[snlgroup_id].append(snl_id)
                        trace['x'].append(snlgroup_id)
                        trace['y'].append(i + 1)
                        trace['text'].append(snl_id)
                    if len(bad_snls[snlgroup_id]) < 1:
                        bad_snls.pop(snlgroup_id, None)
            with open('mpworks/check_snl/results/bad_snlgroups.csv',
                      'wb') as f:
                print 'pulling bad snlgroups from database ...'
                snlgroup_cursor = sma.snlgroups.find({
                    'snlgroup_id': {
                        '$in': bad_snls.keys()
                    },
                })
                writer = csv.writer(f)
                writer.writerow(
                    ['snlgroup_id', 'snlgroup_key', 'mismatching snl_ids'])
                print 'writing bad snlgroups to file ...'
                for snlgroup_dict in snlgroup_cursor:
                    snlgroup = SNLGroup.from_dict(snlgroup_dict)
                    row = [
                        snlgroup.snlgroup_id,
                        snlgroup.canonical_snl.snlgroup_key,
                        ' '.join(bad_snls[snlgroup.snlgroup_id])
                    ]
                    writer.writerow(row)
            print 'plotting out-fig ...'
            out_fig = Figure()
            out_fig['data'] = Data([trace])
            out_fig['layout'] = Layout(
                showlegend=False,
                hovermode='closest',
                title='Member Mismatches of SNLGroup Canonicals',
                xaxis=XAxis(showgrid=False,
                            title='snlgroup_id',
                            showexponent='none'),
                yaxis=YAxis(showgrid=False, title='# mismatching SNLs'),
            )
            filename = 'groupmember_mismatches_'
            filename += datetime.datetime.now().strftime('%Y-%m-%d')
            py.plot(out_fig, filename=filename, auto_open=False)
    else:
        errors = Counter()
        bad_snls = OrderedDict()
        bad_snlgroups = OrderedDict()
        for i, d in enumerate(fig['data']):
            if not isinstance(d, Scatter): continue
            if not 'x' in d or not 'y' in d or not 'text' in d: continue
            start_id = int(d['name'].split(' - ')[0][:-1]) * 1000
            marker_colors = d['marker']['color']
            if i < 2 * num_snl_streams:  # spacegroups
                errors += Counter(marker_colors)
                for idx, color in enumerate(marker_colors):
                    snl_id = start_id + d['x'][idx]
                    color_index = category_colors.index(color)
                    category = categories[color_index]
                    bad_snls[snl_id] = category
            else:  # groupmembers
                for idx, color in enumerate(marker_colors):
                    if color != category_colors[0]: continue
                    snlgroup_id = start_id + d['x'][idx]
                    mismatch_snl_id, canonical_snl_id = d['text'][idx].split(
                        ' != ')
                    bad_snlgroups[snlgroup_id] = int(mismatch_snl_id)
        print errors
        fig_data = fig['data'][-1]
        fig_data['x'] = [
            errors[color] for color in fig_data['marker']['color']
        ]
        filename = _get_filename()
        print filename
        #py.plot(fig, filename=filename)
        with open('mpworks/check_snl/results/bad_snls.csv', 'wb') as f:
            mpsnl_cursor = sma.snl.find({'snl_id': {'$in': bad_snls.keys()}})
            writer = csv.writer(f)
            writer.writerow([
                'snl_id', 'category', 'snlgroup_key', 'nsites', 'remarks',
                'projects', 'authors'
            ])
            for mpsnl_dict in mpsnl_cursor:
                mpsnl = MPStructureNL.from_dict(mpsnl_dict)
                row = [
                    mpsnl.snl_id, bad_snls[mpsnl.snl_id], mpsnl.snlgroup_key
                ]
                row += _get_snl_extra_info(mpsnl)
                writer.writerow(row)
        with open('mpworks/check_snl/results/bad_snlgroups.csv', 'wb') as f:
            snlgrp_cursor = sma.snlgroups.find(
                {'snlgroup_id': {
                    '$in': bad_snlgroups.keys()
                }})
            first_mismatch_snls_cursor = sma.snl.find(
                {'snl_id': {
                    '$in': bad_snlgroups.values()
                }})
            first_mismatch_snl_info = OrderedDict()
            for mpsnl_dict in first_mismatch_snls_cursor:
                mpsnl = MPStructureNL.from_dict(mpsnl_dict)
                first_mismatch_snl_info[mpsnl.snl_id] = _get_snl_extra_info(
                    mpsnl)
            writer = csv.writer(f)
            writer.writerow([
                'snlgroup_id', 'snlgroup_key', 'canonical_snl_id',
                'first_mismatching_snl_id', 'nsites', 'remarks', 'projects',
                'authors'
            ])
            for snlgrp_dict in snlgrp_cursor:
                snlgrp = SNLGroup.from_dict(snlgrp_dict)
                first_mismatch_snl_id = bad_snlgroups[snlgrp.snlgroup_id]
                row = [
                    snlgrp.snlgroup_id, snlgrp.canonical_snl.snlgroup_key,
                    snlgrp.canonical_snl.snl_id, first_mismatch_snl_id
                ]
                row += [
                    ' & '.join(pair) if pair[0] != pair[1] else pair[0]
                    for pair in zip(
                        _get_snl_extra_info(snlgrp.canonical_snl),
                        first_mismatch_snl_info[int(first_mismatch_snl_id)])
                ]
                writer.writerow(row)