示例#1
0
    def get_contributions(self):
        projection = {'_id': 1, 'mp_cat_id': 1, 'content': 1}
        docs = self.query_contributions(projection=projection)
        if not docs:
            raise Exception('No contributions found for ALS Beamline Explorer!')

        data = []
        columns = ['formula', 'cid']
        keys = RecursiveDict([
            ('composition', ['Co', 'Cu', 'Ce']),
            #('position', ['x', 'y']),
            ('XAS', ['min', 'max']),
            ('XMCD', ['min', 'max'])
        ])
        columns += ['##'.join([k, sk]) for k, subkeys in keys.items() for sk in subkeys]

        for doc in docs:
            mpfile = MPFile.from_contribution(doc)
            identifier = mpfile.ids[0]
            contrib = mpfile.hdata[identifier]['data']
            cid_url = self.get_cid_url(doc)
            row = [identifier, cid_url]
            row += [contrib[k][sk] for k, subkeys in keys.items() for sk in subkeys]
            data.append((identifier, row))
        return Table.from_items(data, orient='index', columns=columns)
    def get_contributions(self):
        projection = {'_id': 1, 'identifier': 1, 'content': 1}
        docs = self.query_contributions(projection=projection)
        if not docs:
            raise Exception('No contributions found for ALS Beamline Explorer!')

        data = []
        columns = ['formula', 'cid']
        keys = RecursiveDict([
            ('composition', ['Co', 'Cu', 'Ce']),
            #('position', ['x', 'y']),
            ('XAS', ['min', 'max']),
            ('XMCD', ['min', 'max'])
        ])
        columns += ['##'.join([k, sk]) for k, subkeys in keys.items() for sk in subkeys]

        for doc in docs:
            mpfile = MPFile.from_contribution(doc)
            identifier = mpfile.ids[0]
            contrib = mpfile.hdata[identifier]['data']
            cid_url = self.get_cid_url(doc)
            row = [identifier, cid_url]
            row += [contrib[k][sk] for k, subkeys in keys.items() for sk in subkeys]
            data.append((identifier, row))
        return Table.from_items(data, orient='index', columns=columns)
示例#3
0
    def to_backgrid_dict(self):
        """Backgrid-conform dict from DataFrame"""
        # shorten global import times by importing django here
        import numpy as np
        from mpcontribs.io.core.utils import get_composition_from_string
        from pandas import MultiIndex
        import pymatgen.util as pmg_util
        from pymatgen.core.composition import CompositionError

        table = dict()
        nrows_max = 260
        nrows = self.shape[0]
        df = Table(self.head(n=nrows_max)) if nrows > nrows_max else self
        numeric_columns = df.select_dtypes(
            include=[np.number]).columns.tolist()

        if isinstance(df.index, MultiIndex):
            df.reset_index(inplace=True)

        table['columns'] = []
        table['rows'] = super(Table, df).to_dict(orient='records')

        for col_index, col in enumerate(list(df.columns)):
            cell_type = 'number'

            # avoid looping rows to minimize use of `df.iat` (time-consuming in 3d)
            if not col.startswith('level_') and col not in numeric_columns:
                is_url_column, prev_unit, old_col = True, None, col

                for row_index in range(df.shape[0]):
                    cell = str(df.iat[row_index, col_index])
                    cell_split = cell.split(' ', 1)

                    if not cell or len(
                            cell_split) == 1:  # empty cell or no space
                        is_url_column = bool(
                            is_url_column
                            and (not cell or mp_id_pattern.match(cell)))
                        if is_url_column:
                            if cell:
                                value = 'https://materialsproject.org/materials/{}'.format(
                                    cell)
                                table['rows'][row_index][col] = value
                        elif cell:
                            try:
                                composition = get_composition_from_string(cell)
                                composition = pmg_util.string.unicodeify(
                                    composition)
                                table['rows'][row_index][col] = composition
                            except (CompositionError, ValueError,
                                    OverflowError):
                                try:
                                    # https://stackoverflow.com/a/38020041
                                    result = urlparse(cell)
                                    if not all([
                                            result.scheme, result.netloc,
                                            result.path
                                    ]):
                                        break
                                    is_url_column = True
                                except:
                                    break

                    else:
                        value, unit = cell_split  # TODO convert cell_split[0] to float?
                        is_url_column = False
                        try:
                            float(value
                                  )  # unit is only a unit if value is number
                        except ValueError:
                            continue
                        table['rows'][row_index].pop(old_col)
                        if prev_unit is None:
                            prev_unit = unit
                            col = '{} [{}]'.format(col, unit)
                        table['rows'][row_index][
                            col] = cell if prev_unit != unit else value

                cell_type = 'uri' if is_url_column else 'string'

            col_split = col.split('##')
            nesting = [col_split[0]] if len(col_split) > 1 else []
            table['columns'].append({
                'name': col,
                'cell': cell_type,
                'nesting': nesting,
                'editable': 0
            })
            if len(col_split) > 1:
                table['columns'][-1].update(
                    {'label': '##'.join(col_split[1:])})
            if len(table['columns']) > 12:
                table['columns'][-1]['renderable'] = 0

        header = RecursiveDict()
        for idx, col in enumerate(table['columns']):
            if 'label' in col:
                k, sk = col['name'].split('##')
                sk_split = sk.split()
                if len(sk_split) == 2:
                    d = {'name': sk_split[0], 'unit': sk_split[1], 'idx': idx}
                    if k not in header:
                        header[k] = [d]
                    else:
                        header[k].append(d)
                elif k in header:
                    header.pop(k)

        for k, skl in header.items():
            units = [sk['unit'] for sk in skl]
            if units.count(units[0]) == len(units):
                for sk in skl:
                    table['columns'][sk['idx']]['label'] = sk['name']
                    table['columns'][sk['idx']]['nesting'][0] = '{} {}'.format(
                        k, sk['unit'])

        return table
def run(mpfile, **kwargs):

    indir = "/Users/patrick/Downloads/ThinFilmPV"
    summary_data = json.load(open(os.path.join(indir, "SUMMARY.json"), "r"))
    absorption_data = json.load(
        open(os.path.join(indir, "ABSORPTION-CLIPPED.json"), "r"))
    dos_data = json.load(open(os.path.join(indir, "DOS.json"), "r"))
    formulae_data = json.load(
        open(os.path.join(indir, "FORMATTED-FORMULAE.json"), "r"))
    config = RecursiveDict([
        ("SLME_500_nm", ["SLME|500nm", "%"]),
        ("SLME_1000_nm", ["SLME|1000nm", "%"]),
        ("E_g", ["ΔE.corrected", "eV"]),
        ("E_g_d", ["ΔE.direct", "eV"]),
        ("E_g_da", ["ΔE.dipole-allowed", "eV"]),
        ("m_e", ["mᵉ", "mₑ"]),
        ("m_h", ["mʰ", "mₑ"]),
    ])

    print(len(summary_data.keys()))
    for mp_id, d in summary_data.items():
        print(mp_id)
        formula = formulae_data[mp_id].replace("<sub>",
                                               "").replace("</sub>", "")
        query = {"identifier": mp_id, "project": "screening_inorganic_pv"}
        # r = db.contributions.update_one(query, {'$set': {'content.data.formula': formula}})
        # print(r.modified_count)
        # continue

        rd = RecursiveDict({"formula": formula})
        for k, v in config.items():
            value = clean_value(d[k], v[1], max_dgts=4)
            if not "." in v[0]:
                rd[v[0]] = value
            else:
                keys = v[0].split(".")
                if not keys[0] in rd:
                    rd[keys[0]] = RecursiveDict({keys[1]: value})
                else:
                    rd[keys[0]][keys[1]] = value

        mpfile.add_hierarchical_data({"data": rd}, identifier=mp_id)

        doc = query.copy()
        doc["content.data"] = mpfile.document[mp_id]["data"]
        doc["collaborators"] = [{
            "name": "Patrick Huck",
            "email": "*****@*****.**"
        }]
        # r = db.contributions.update_one(query, {'$set': doc}, upsert=True)
        # cid = r.upserted_id
        cid = db.contributions.find_one(query, {"_id": 1})["_id"]

        df = DataFrame(data=absorption_data[mp_id])
        df.columns = ["hν [eV]", "α [cm⁻¹]"]
        mpfile.add_data_table(mp_id, df, "absorption")
        table = mpfile.document[mp_id]["absorption"]
        table.pop("@module")
        table.pop("@class")
        table["identifier"] = mp_id
        table["project"] = "screening_inorganic_pv"
        table["name"] = "absorption"
        table["cid"] = cid
        # r = db.tables.insert_one(table)
        # tids = [r.inserted_id]
        r = db.tables.update_one(
            {
                "identifier": mp_id,
                "project": "screening_inorganic_pv",
                "name": "absorption",
                "cid": cid,
            },
            {"$set": table},
        )
        print(len(table["data"]), r.modified_count)