Пример #1
0
    def add_structure(self, source, name=None, identifier=None, fmt=None):
        """add a structure to the mpfile"""
        from pymatgen import Structure, MPRester
        if isinstance(source, Structure):
            structure = source
        elif isinstance(source, dict):
            structure = Structure.from_dict(source)
        elif os.path.exists(source):
            structure = Structure.from_file(source, sort=True)
        elif isinstance(source, six.string_types):
            if fmt is None:
                raise ValueError('Need fmt to get structure from string!')
            structure = Structure.from_str(source, fmt, sort=True)
        else:
            raise ValueError(source, 'not supported!')

        if name is not None:
            if not isinstance(name, six.string_types):
                raise ValueError('structure name needs to be a string')
            elif '.' in name:
                raise ValueError('structure name cannot contain dots (.)')

        mpr = MPRester()
        if not mpr.api_key:
            raise ValueError(
                'API key not set. Run `pmg config --add PMG_MAPI_KEY <USER_API_KEY>`.'
            )
        matched_mpids = mpr.find_structure(structure)
        formula = get_composition_from_string(structure.composition.formula)
        if not matched_mpids:
            if identifier is None:
                identifier = formula
                print(
                    'Structure not found in MP! Please submit via MPComplete to '
                    'obtain mp-id or manually choose an anchor mp-id! Continuing '
                    'with {} as identifier!'.format(identifier))
            else:
                print('Structure not found in MP! Forcing {} as identifier!'.
                      format(identifier))
        elif identifier is None:
            identifier = matched_mpids[0]
            if len(matched_mpids) > 1:
                print('Multiple matching structures found in MP. Using',
                      identifier)
        elif identifier not in matched_mpids:
            raise ValueError(
                'Structure does not match {} but instead {}'.format(
                    identifier, matched_mpids))

        idx = len(
            self.document.get(identifier, {}).get(mp_level01_titles[3], {}))
        sub_key = formula if name is None else name
        if sub_key in self.document.get(identifier,
                                        {}).get(mp_level01_titles[3], {}):
            sub_key += '_{}'.format(idx)
        self.document.rec_update(
            nest_dict(structure.as_dict(),
                      [identifier, mp_level01_titles[3], sub_key]))
        return identifier
Пример #2
0
 def concat(self, mpfile):
     """concatenate single-section MPFile with this MPFile"""
     try:
         if len(mpfile.document) > 1:
             raise ValueError(
                 'concatenation only possible with single section files')
     except AttributeError:
         raise ValueError('Provide a MPFile to concatenate')
     mp_cat_id = mpfile.document.keys()[0]
     general_title = mp_level01_titles[0]
     if general_title in mpfile.document[mp_cat_id]:
         general_data = mpfile.document[mp_cat_id].pop(general_title)
         if general_title not in self.document:
             self.document.rec_update(
                 nest_dict(general_data, [general_title]))
     self.document.rec_update(
         nest_dict(mpfile.document.pop(mp_cat_id),
                   [self.get_unique_mp_cat_id(mp_cat_id)]))
Пример #3
0
    def add_data_table(self, identifier, dataframe, name):
        """add a datatable to the root-level section

        Args:
            identifier (str): MP category ID (`mp_cat_id`)
            dataframe (pandas.DataFrame): tabular data as Pandas DataFrame
            name (str): table name, optional if only one table in section
        """
        # TODO: optional table name, required if multiple tables per root-level section
        self.document.rec_update(
            nest_dict(pandas_to_dict(dataframe), [identifier, name]))
Пример #4
0
 def parse(self, file_string):
     """recursively parse sections according to number of separators"""
     # split into section title line (even) and section body (odd entries)
     sections = re.split(self.separator_regex(), file_string)
     if len(sections) > 1:
         # check for preceding bare section_body (without section title), and parse
         if sections[0] != '': self.parse(sections[0])
         # drop preceding bare section_body
         sections = sections[1:] # https://docs.python.org/2/library/re.html#re.split
         for section_index,section_body in enumerate(sections[1::2]):
             clean_title = self.clean_title(sections[2*section_index])
             # uniquify level-0 titles if necessary
             if self.level == min_indent_level and clean_title in self.document:
                 clean_title += '--%d' % self.level0_counter
                 self.level0_counter += 1
             self.increase_level(clean_title)
             self.parse(section_body)
             self.reduce_level()
     else:
         # separator level not found, convert section body to pandas object,
         section_title = self.section_titles[-1]
         is_data_section, pd_obj = self.read_csv(section_title, file_string)
         logging.info(pd_obj)
         # TODO: include validation
         # use first csv table for default plot, first column as x-column
         if is_data_section and mp_level01_titles[2] not in \
            self.document[self.section_titles[0]]:
             self.document.rec_update(nest_dict(
                 {'x': pd_obj.columns[0], 'table': section_title},
                 [self.section_titles[0], mp_level01_titles[2], 'default']
             ))
         # add data section title to nest 'bare' data under data section
         # => artificially increase and decrease level (see below)
         is_bare_data = (is_data_section and self.is_bare_section(section_title))
         if is_bare_data: self.increase_level(mp_level01_titles[1])
         # update nested dict/document based on section level
         self.document.rec_update(nest_dict(
             self.to_dict(pd_obj), self.section_titles
         ))
         if is_bare_data: self.reduce_level()
Пример #5
0
 def insert_general_section(self, general_mpfile):
     """insert general section from `general_mpfile` into this MPFile"""
     if general_mpfile is None: return
     general_title = mp_level01_titles[0]
     general_data = general_mpfile.document[general_title]
     root_key = self.document.keys()[0]
     first_subkey = self.document[root_key].keys()[0]
     for key, value in general_data.items():
         if key in self.document[root_key]:
             self.document.rec_update(nest_dict(value, [root_key, key]))
         else:
             self.document[root_key].insert_before(first_subkey,
                                                   (key, value))
Пример #6
0
 def concat(self, mpfile, uniquify=True):
     """concatenate single-section MPFile with this MPFile"""
     try:
         if len(mpfile.document) > 1:
             raise ValueError(
                 'concatenation only possible with single section files')
     except AttributeError:
         raise ValueError('Provide a MPFile to concatenate')
     mp_cat_id = mpfile.document.keys()[0]
     general_title = mp_level01_titles[0]
     if general_title in mpfile.document[mp_cat_id]:
         general_data = mpfile.document[mp_cat_id].pop(general_title)
         if general_title not in self.document:
             self.document.rec_update(
                 nest_dict(general_data, [general_title]))
     mp_cat_id_idx, mp_cat_id_uniq = 0, mp_cat_id
     if uniquify:
         while mp_cat_id_uniq in self.document.keys():
             mp_cat_id_uniq = mp_cat_id + '--{}'.format(mp_cat_id_idx)
             mp_cat_id_idx += 1
     self.document.rec_update(
         nest_dict(mpfile.document.pop(mp_cat_id), [mp_cat_id_uniq]))
Пример #7
0
    def add_data_table(self, identifier, dataframe, name, plot_options=None):
        """add a datatable to the root-level section

        Args:
            identifier (str): MP category ID (`mp_cat_id`)
            dataframe (pandas.DataFrame): tabular data as Pandas DataFrame
            name (str): table name, optional if only one table in section
            plot_options (dict): options for according plotly graph
        """
        # TODO: optional table name, required if multiple tables per root-level section
        table_start = mp_level01_titles[1] + '_'
        if not name.startswith(table_start):
            name = table_start + name
        name = ''.join([replacements.get(c, c) for c in name])
        self.document.rec_update(
            nest_dict(Table(dataframe).to_dict(), [identifier, name]))
        self.document[identifier].insert_default_plot_options(
            dataframe, name, update_plot_options=plot_options)
Пример #8
0
 def insert_general_section(self, general_mpfile):
     """insert general section from `general_mpfile` into this MPFile"""
     if general_mpfile is None: return
     general_title = mp_level01_titles[0]
     general_data = general_mpfile.document[general_title]
     root_key = self.document.keys()[0]
     # need to reverse-loop to keep the order of the general_mpfile
     for key, value in reversed(general_data.items()):
         if key in self.document[root_key]:
             self.document.rec_update(nest_dict(value, [root_key, key]))
         else:
             # this approach is due to the order sensitivity of key-value pairs
             # before or after a `>>>..` row in the custom format (legacy)
             # => ignoring it here would generate the wrong MPFile in get_string
             for k, v in self.document[root_key].iteritems():
                 if isinstance(v, dict):
                     self.document[root_key].insert_before(k, (key, value))
                     break
Пример #9
0
 def split(self):
     general_mpfile = self.pop_first_section() \
             if mp_level01_titles[0] in self.document.keys() else None
     if not self.document:
         raise ValueError('No contributions in MPFile! Either the file is'
                          ' empty or only contains shared (meta-)data not'
                          ' correlated to core identifier.')
     while True:
         try:
             mpfile_single = self.pop_first_section()
             mpid_orig = mpfile_single.ids[0]
             mpid = mpid_orig.split('--')[0]
             mpfile_single.document.rec_update(
                 nest_dict(mpfile_single.document.pop(mpid_orig), [mpid]))
             if general_mpfile is not None:
                 mpfile_single.insert_general_section(general_mpfile)
             yield mpfile_single
         except KeyError:
             break
Пример #10
0
    def add_structure(self, source, name=None, identifier=None, fmt=None):
        """add a structure to the mpfile"""
        if isinstance(source, Structure):
            structure = source
        elif isinstance(source, dict):
            structure = Structure.from_dict(source)
        elif os.path.exists(source):
            structure = Structure.from_file(source)
        elif isinstance(source, six.string_types):
            if fmt is None:
                raise ValueError('Need fmt to get structure from string!')
            structure = Structure.from_str(source, fmt)
        else:
            raise ValueError(source, 'not supported!')

        mpr = MPRester()
        if not mpr.api_key:
            raise ValueError(
                'API key not set. Run `pmg config --add PMG_MAPI_KEY <USER_API_KEY>`.'
            )
        matched_mpids = mpr.find_structure(structure)
        if not matched_mpids:
            raise ValueError(
                'Structure not found in MP. Please submit via MPComplete to obtain mp-id!'
            )
        elif identifier is None:
            identifier = matched_mpids[0]
            if len(matched_mpids) > 1:
                print('Multiple matching structures found in MP. Using',
                      identifier)
        elif identifier not in matched_mpids:
            raise ValueError(
                'Structure does not match {} but instead {}'.format(
                    identifier, matched_mpids))

        idx = len(
            self.document.get(identifier, {}).get(mp_level01_titles[3], {}))
        sub_key = 's{}'.format(idx) if name is None else name
        self.document.rec_update(
            nest_dict(structure.as_dict(),
                      [identifier, mp_level01_titles[3], sub_key]))
Пример #11
0
 def add_hierarchical_data(self, dct, identifier=mp_level01_titles[0]):
     if len(self.ids) >= self.max_contribs:
         raise StopIteration(
             'Reached max. number of contributions in MPFile')
     self.document.rec_update(nest_dict(RecursiveDict(dct), [identifier]))
Пример #12
0
 def add_hierarchical_data(self, identifier, dct):
     self.document.rec_update(nest_dict(dct, [identifier]))