Пример #1
0
def run(mpfile, nmax=None):
    #print json.dumps(mpfile.document, indent=4)
    datasource = mpfile.document['general'].pop('Datasource')
    subdir = os.path.abspath(os.path.join(
        datasource['work_dir'], datasource['directory']
    ))

    # TODO Potentially we have to insert a preprocessing step, probably in msp
    scandata_f = msp.read_scans(subdir, datacounter="Counter 1")
    scan_groups = scandata_f.groupby(datasource['group_by'].split())
    process_template = mpfile.document['general'].pop('process_template')
    translate = get_translate(datasource['work_dir'])
    keys = scan_groups.groups.keys()
    keys.sort()

    for i,g in enumerate(tqdm(keys, leave=True)):
        # TODO: Group information is saved into the output. Rethink?
        comp, sx, sy = translate(g)
        composition = normalize_root_level(comp)[1]
        process_template_copy = copy.deepcopy(process_template)
        process_template_copy['position'] = {'x': sx, 'y': sy}
        mpfile.document.rec_update(nest_dict(
            process_template_copy, [composition, 'process_chain']
        ))
        sg = scan_groups.get_group(g)
        for process_chain_name in process_template.keys():
            scan_params = mpfile.document[composition]['process_chain'][process_chain_name]
            xmcd_frame = treat_xmcd(sg, scan_params, xas_process.process_dict)
            mpfile.add_data_table(
                composition, xmcd_frame[['Energy', 'XAS', 'XMCD']],
                '_'.join(['data', process_chain_name])
            )
        if nmax is not None and i > nmax:
          break
def run(mpfile, nmax=None):
    #print json.dumps(mpfile.document, indent=4)
    datasource = mpfile.document['general'].pop('Datasource')
    subdir = os.path.abspath(
        os.path.join(datasource['work_dir'], datasource['directory']))

    # TODO Potentially we have to insert a preprocessing step, probably in msp
    scandata_f = msp.read_scans(subdir, datacounter="Counter 1")
    scan_groups = scandata_f.groupby(datasource['group_by'].split())
    process_template = mpfile.document['general'].pop('process_template')
    translate = get_translate(datasource['work_dir'])
    keys = scan_groups.groups.keys()
    keys.sort()

    for i, g in enumerate(tqdm(keys, leave=True)):
        # TODO: Group information is saved into the output. Rethink?
        comp, sx, sy = translate(g)
        composition = normalize_root_level(comp)[1]
        process_template_copy = copy.deepcopy(process_template)
        process_template_copy['position'] = {'x': sx, 'y': sy}
        mpfile.document.rec_update(
            nest_dict(process_template_copy, [composition, 'process_chain']))
        sg = scan_groups.get_group(g)
        for process_chain_name in process_template.keys():
            scan_params = mpfile.document[composition]['process_chain'][
                process_chain_name]
            xmcd_frame = treat_xmcd(sg, scan_params, xas_process.process_dict)
            mpfile.add_data_table(composition,
                                  xmcd_frame[['Energy', 'XAS', 'XMCD']],
                                  '_'.join(['data', process_chain_name]))
        if nmax is not None and i > nmax:
            break
Пример #3
0
 def from_string(data):
     # use archieml-python parse to import data
     rdct = RecursiveDict(archieml.loads(data))
     rdct.rec_update()
     # post-process internal representation of file contents
     for key in rdct.keys():
         is_general, root_key = normalize_root_level(key)
         if is_general:
             # make part of shared (meta-)data, i.e. nest under `general` at
             # the beginning of the MPFile
             if mp_level01_titles[0] not in rdct:
                 rdct.insert_before(rdct.keys()[0],
                                    (mp_level01_titles[0], RecursiveDict()))
             rdct.rec_update(
                 nest_dict(rdct.pop(key), [mp_level01_titles[0], root_key]))
         else:
             # normalize identifier key (pop & insert)
             # using rec_update since we're looping over all entries
             # also: support data in bare tables (marked-up only by
             #       root-level identifier) by nesting under 'data'
             value = rdct.pop(key)
             keys = [root_key]
             if isinstance(value, list): keys.append('table')
             rdct.rec_update(nest_dict(value, keys))
             # Note: CSV section is marked with 'data ' prefix during iterate()
             for k, v in rdct[root_key].iterate():
                 if isinstance(k, six.string_types) and \
                    k.startswith(mp_level01_titles[1]):
                     # k = table name (incl. data prefix)
                     # v = csv string from ArchieML free-form arrays
                     table_name = k[len(mp_level01_titles[1] + '_'):]
                     pd_obj = read_csv(v)
                     rdct[root_key].pop(table_name)
                     rdct[root_key].rec_update(
                         nest_dict(pd_obj.to_dict(), [k]))
                     rdct[root_key].insert_default_plot_options(pd_obj, k)
             # convert CIF strings into pymatgen structures
             if mp_level01_titles[3] in rdct[root_key]:
                 from pymatgen.io.cif import CifParser
                 for name in rdct[root_key][mp_level01_titles[3]].keys():
                     cif = rdct[root_key][mp_level01_titles[3]].pop(name)
                     parser = CifParser.from_string(cif)
                     structure = parser.get_structures(primitive=False)[0]
                     rdct[root_key][mp_level01_titles[3]].rec_update(
                         nest_dict(structure.as_dict(), [name]))
     return MPFile.from_dict(rdct)
Пример #4
0
    def from_string(data):
        # use archieml-python parse to import data
        rdct = RecursiveDict(loads(data))
        rdct.rec_update()

        # post-process internal representation of file contents
        for key in list(rdct.keys()):
            is_general, root_key = normalize_root_level(key)

            if is_general:
                # make part of shared (meta-)data, i.e. nest under `general` at
                # the beginning of the MPFile
                if mp_level01_titles[0] not in rdct:
                    rdct[mp_level01_titles[0]] = RecursiveDict()
                    rdct.move_to_end(mp_level01_titles[0], last=False)

            # normalize identifier key (pop & insert)
            # using rec_update since we're looping over all entries
            # also: support data in bare tables (marked-up only by
            #       root-level identifier) by nesting under 'data'
            value = rdct.pop(key)
            keys = [mp_level01_titles[0]] if is_general else []
            keys.append(root_key)
            if isinstance(value, list):
                keys.append("table")
            rdct.rec_update(nest_dict(value, keys))

            # reference to section to iterate or parse as CIF
            section = (rdct[mp_level01_titles[0]][root_key]
                       if is_general else rdct[root_key])

            # iterate to find CSV sections to parse
            # also parse propnet quantities
            if isinstance(section, dict):
                scope = []
                for k, v in section.iterate():
                    level, key = k
                    key = "".join([replacements.get(c, c) for c in key])
                    level_reduction = bool(level < len(scope))
                    if level_reduction:
                        del scope[level:]
                    if v is None:
                        scope.append(key)
                    elif isinstance(v, list) and isinstance(v[0], dict):
                        table = ""
                        for row_dct in v:
                            table = "\n".join([table, row_dct["value"]])
                        pd_obj = read_csv(table)
                        d = nest_dict(pd_obj.to_dict(), scope + [key])
                        section.rec_update(d, overwrite=True)
                        if not is_general and level == 0:
                            section.insert_default_plot_options(pd_obj, key)
                    elif (Quantity is not None
                          and isinstance(v, six.string_types) and " " in v):
                        quantity = Quantity.from_key_value(key, v)
                        d = nest_dict(quantity.as_dict(), scope +
                                      [key])  # TODO quantity.symbol.name
                        section.rec_update(d, overwrite=True)

            # convert CIF strings into pymatgen structures
            if mp_level01_titles[3] in section:
                from pymatgen.io.cif import CifParser

                for name in section[mp_level01_titles[3]].keys():
                    cif = section[mp_level01_titles[3]].pop(name)
                    parser = CifParser.from_string(cif)
                    structure = parser.get_structures(primitive=False)[0]
                    section[mp_level01_titles[3]].rec_update(
                        nest_dict(structure.as_dict(), [name]))

        return MPFile.from_dict(rdct)