Exemplo n.º 1
0
 def get_string(self, df_head_only=False):
     from pymatgen import Structure
     lines, scope = [], []
     for key,value in self.document.iterate():
         if isinstance(value, Table):
             lines[-1] = lines[-1].replace('{', '[+').replace('}', ']')
             header = any([isinstance(col, str) for col in value])
             if isinstance(value.index, MultiIndex):
                 value.reset_index(inplace=True)
             if df_head_only:
                 value = value.head()
             csv_string = value.to_csv(
                 index=False, header=header, float_format='%g', encoding='utf-8'
             )[:-1]
             lines += csv_string.split('\n')
             if df_head_only:
                 lines.append('...')
         elif isinstance(value, Structure):
             from pymatgen.io.cif import CifWriter
             cif = CifWriter(value, symprec=symprec).__str__()
             lines.append(make_pair(
                 ''.join([replacements.get(c, c) for c in key]), cif+':end'
             ))
         elif Quantity is not None and isinstance(value, Quantity):
             lines.append(make_pair(
                 value.display_symbols[0], value.pretty_string()
             ))
         else:
             level, key = key
             # truncate scope
             level_reduction = bool(level < len(scope))
             if level_reduction:
                 del scope[level:]
             # append scope
             if value is None:
                 scope.append(''.join([
                     replacements.get(c, c) for c in key
                 ]))
             # correct scope to omit internal 'general' section
             scope_corr = scope
             if scope[0] == mp_level01_titles[0]:
                 scope_corr = scope[1:]
             # insert scope line
             if (value is None and scope_corr) or \
                (value is not None and level_reduction):
                 lines.append('\n{' + '.'.join(scope_corr) + '}')
             # insert key-value line
             if value is not None:
                 val = str(value)
                 value_lines = [val] if val.startswith('http') \
                         else textwrap.wrap(val)
                 if len(value_lines) > 1:
                     value_lines = [''] + value_lines + [':end']
                 lines.append(make_pair(
                     ''.join([replacements.get(c, c) for c in key]),
                     '\n'.join(value_lines)
                 ))
     return '\n'.join(lines) + '\n'
Exemplo n.º 2
0
 def insert_default_plot_options(self, pd_obj, k, update_plot_options=None):
     # make default plot (add entry in 'plots') for each
     # table, first column as x-column
     table_name = ''.join([replacements.get(c, c) for c in k])
     key = 'default_{}'.format(table_name)
     plots_dict = _OrderedDict([
         (mp_level01_titles[2],
          _OrderedDict([(key,
                         _OrderedDict([('x', pd_obj.columns[0]),
                                       ('table', table_name)]))]))
     ])
     if update_plot_options is not None:
         plots_dict[mp_level01_titles[2]][key].update(update_plot_options)
     if mp_level01_titles[2] in self:
         self.rec_update(plots_dict)
     else:
         self[mp_level01_titles[2]] = plots_dict[mp_level01_titles[2]]
Exemplo n.º 3
0
    def add_data_table(self, identifier, dataframe, name, plot_options=None):
        """add a datatable to the root-level section

        Args:
            identifier (str): MP category ID (`mp_cat_id`)
            dataframe (pandas.DataFrame): tabular data as Pandas DataFrame
            name (str): table name, optional if only one table in section
            plot_options (dict): options for according plotly graph
        """
        # TODO: optional table name, required if multiple tables per root-level section
        name = ''.join([replacements.get(c, c) for c in name])
        self.document.rec_update(nest_dict(
            Table(dataframe).to_dict(), [identifier, name]
        ))
        self.document[identifier].insert_default_plot_options(
            dataframe, name, update_plot_options=plot_options
        )
Exemplo n.º 4
0
 def rec_update(self, other=None, overwrite=False, replace_newlines=True):
     """https://gist.github.com/Xjs/114831"""
     # overwrite=False: don't overwrite existing unnested key
     if other is None:  # mode to force RecursiveDicts to be used
         other = self
         overwrite = True
     for key, value in other.items():
         if isinstance(key, six.string_types):
             key = ''.join([replacements.get(c, c) for c in key])
         if key in self and \
            isinstance(self[key], dict) and \
            isinstance(value, dict):
             # ensure RecursiveDict and update key (w/o underscores)
             self[key] = RecursiveDict(self[key])
             replace_newlines = bool(key != mp_level01_titles[3])
             self[key].rec_update(other=value,
                                  overwrite=overwrite,
                                  replace_newlines=replace_newlines)
         elif (key in self and overwrite) or key not in self:
             if isinstance(value, six.string_types) and replace_newlines:
                 self[key] = value.replace('\n', ' ')
             else:
                 self[key] = value
Exemplo n.º 5
0
 def get_string(self):
     from pymatgen import Structure
     lines, scope = [], []
     table_start = mp_level01_titles[1] + '_'
     for key, value in self.document.iterate():
         if isinstance(value, Table):
             header = any([
                 bool(isinstance(col, unicode) or isinstance(col, str))
                 for col in value
             ])
             if isinstance(value.index, MultiIndex):
                 value.reset_index(inplace=True)
             csv_string = value.to_csv(index=False,
                                       header=header,
                                       float_format='%g',
                                       encoding='utf-8')[:-1]
             lines += csv_string.decode('utf-8').split('\n')
         elif isinstance(value, Structure):
             from pymatgen.io.cif import CifWriter
             cif = CifWriter(value, symprec=symprec).__str__()
             lines.append(
                 make_pair(''.join([replacements.get(c, c) for c in key]),
                           cif + ':end'))
         else:
             level, key = key
             key = key if isinstance(key, unicode) else key.decode('utf-8')
             # truncate scope
             level_reduction = bool(level < len(scope))
             if level_reduction: del scope[level:]
             # append scope and set delimiters
             if value is None:
                 is_table = key.startswith(table_start)
                 if is_table:
                     # account for 'data_' prefix
                     key = key[len(table_start):]
                     start, end = '\n[+', ']'
                 else:
                     start, end = '\n{', '}'
                 scope.append(''.join([replacements.get(c, c)
                                       for c in key]))
             # correct scope to omit internal 'general' section
             scope_corr = scope
             if scope[0] == mp_level01_titles[0]:
                 scope_corr = scope[1:]
             # insert scope line
             if (value is None and scope_corr)or \
                (value is not None and level_reduction):
                 lines.append(start + '.'.join(scope_corr) + end)
             # insert key-value line
             if value is not None:
                 val = unicode(value) if not isinstance(value,
                                                        str) else value
                 value_lines = [val] if val.startswith('http') \
                         else textwrap.wrap(val)
                 if len(value_lines) > 1:
                     value_lines = [''] + value_lines + [':end']
                 lines.append(
                     make_pair(
                         ''.join([replacements.get(c, c) for c in key]),
                         '\n'.join(value_lines)))
     return '\n'.join(lines) + '\n'
Exemplo n.º 6
0
    def from_string(data):
        # use archieml-python parse to import data
        rdct = RecursiveDict(archieml.loads(data))
        rdct.rec_update()

        # post-process internal representation of file contents
        for key in list(rdct.keys()):
            is_general, root_key = normalize_root_level(key)

            if is_general:
                # make part of shared (meta-)data, i.e. nest under `general` at
                # the beginning of the MPFile
                if mp_level01_titles[0] not in rdct:
                    rdct[mp_level01_titles[0]] = RecursiveDict()
                    rdct.move_to_end(mp_level01_titles[0], last=False)

            # normalize identifier key (pop & insert)
            # using rec_update since we're looping over all entries
            # also: support data in bare tables (marked-up only by
            #       root-level identifier) by nesting under 'data'
            value = rdct.pop(key)
            keys = [mp_level01_titles[0]] if is_general else []
            keys.append(root_key)
            if isinstance(value, list):
                keys.append('table')
            rdct.rec_update(nest_dict(value, keys))

            # reference to section to iterate or parse as CIF
            section = rdct[mp_level01_titles[0]][root_key] \
                    if is_general else rdct[root_key]

            # iterate to find CSV sections to parse
            # also parse propnet quantities
            if isinstance(section, dict):
                scope = []
                for k, v in section.iterate():
                    level, key = k
                    key = ''.join([replacements.get(c, c) for c in key])
                    level_reduction = bool(level < len(scope))
                    if level_reduction:
                        del scope[level:]
                    if v is None:
                        scope.append(key)
                    elif isinstance(v, list) and isinstance(v[0], dict):
                        table = ''
                        for row_dct in v:
                            table = '\n'.join([table, row_dct['value']])
                        pd_obj = read_csv(table)
                        d = nest_dict(pd_obj.to_dict(), scope + [key])
                        section.rec_update(d, overwrite=True)
                        if not is_general and level == 0:
                            section.insert_default_plot_options(pd_obj, key)
                    elif Quantity is not None and isinstance(v, six.string_types) and ' ' in v:
                        quantity = Quantity.from_key_value(key, v)
                        d = nest_dict(quantity.as_dict(), scope + [key]) # TODO quantity.symbol.name
                        section.rec_update(d, overwrite=True)


            # convert CIF strings into pymatgen structures
            if mp_level01_titles[3] in section:
                from pymatgen.io.cif import CifParser
                for name in section[mp_level01_titles[3]].keys():
                    cif = section[mp_level01_titles[3]].pop(name)
                    parser = CifParser.from_string(cif)
                    structure = parser.get_structures(primitive=False)[0]
                    section[mp_level01_titles[3]].rec_update(nest_dict(
                        structure.as_dict(), [name]
                    ))

        return MPFile.from_dict(rdct)