Пример #1
0
 def get_cid_url(self, doc):
     """infer URL for contribution detail page from MongoDB doc"""
     from mpcontribs.config import mp_id_pattern
     is_mp_id = mp_id_pattern.match(doc['mp_cat_id'])
     collection = 'materials' if is_mp_id else 'compositions'
     return '/'.join([
         self.preamble.rsplit('/', 1)[0], 'explorer', collection, doc['_id']
     ])
Пример #2
0
 def get_cid_url(self, doc):
     """infer URL for contribution detail page from MongoDB doc"""
     from mpcontribs.config import mp_id_pattern
     is_mp_id = mp_id_pattern.match(doc['identifier'])
     collection = 'materials' if is_mp_id else 'compositions'
     return '/'.join([
         self.preamble.rsplit('/', 1)[0], 'explorer', collection , doc['_id']
     ])
Пример #3
0
def normalize_root_level(title):
    """convert root-level title into conventional identifier; non-identifiers
    become part of shared (meta-)data. Returns: (is_general, title)"""
    from pymatgen.core.composition import CompositionError
    try:
        composition = get_composition_from_string(title)
        return False, composition
    except (CompositionError, KeyError, TypeError, ValueError):
        if mp_id_pattern.match(title.lower()):
            return False, title.lower()
        return True, title
Пример #4
0
def normalize_root_level(title):
    """convert root-level title into conventional identifier; non-identifiers
    become part of shared (meta-)data. Returns: (is_general, title)"""
    try:
        composition = get_composition_from_string(title)
        return False, composition
    except:
        if mp_id_pattern.match(title.lower()):
            return False, title.lower()
        else:
            return True, title
Пример #5
0
def normalize_root_level(title):
    """convert root-level title into conventional identifier; non-identifiers
    become part of shared (meta-)data. Returns: (is_general, title)"""
    try:
        composition = Composition(title).get_integer_formula_and_factor()[0]
        return False, composition
    except:
        if mp_id_pattern.match(title.lower()):
            return False, title.lower()
        else:
            return True, title
Пример #6
0
def normalize_root_level(title):
    """convert root-level title into conventional identifier; non-identifiers
    become part of shared (meta-)data. Returns: (is_general, title)"""
    try:
        composition = Composition(title).get_integer_formula_and_factor()[0]
        return False, composition
    except:
        if mp_id_pattern.match(title.lower()):
            return False, title.lower()
        else:
            return True, title
Пример #7
0
def normalize_root_level(title):
    """convert root-level title into conventional identifier; non-identifiers
    become part of shared (meta-)data. Returns: (is_general, title)"""
    from pymatgen.core.composition import CompositionError
    try:
        composition = get_composition_from_string(title)
        return False, composition
    except (CompositionError, KeyError, TypeError, ValueError):
        if mp_id_pattern.match(title.lower()):
            return False, title.lower()
        return True, title
Пример #8
0
 def delete(self, project, cids):
     for contrib in self.contributions.find({'_id': {'$in': cids}}):
         mp_cat_id, cid = contrib['mp_cat_id'], contrib['_id']
         is_mp_id = mp_id_pattern.match(mp_cat_id)
         coll = self.materials if is_mp_id else self.compositions
         key = '.'.join([project, str(cid)])
         coll.update({}, {'$unset': {key: 1}}, multi=True)
     # remove `project` field when no contributions remaining
     for coll in [self.materials, self.compositions]:
         for doc in coll.find({project: {'$exists': 1}}):
             for d in doc.itervalues():
                 if not d:
                     coll.update({'_id': doc['_id']}, {'$unset': {project: 1}})
Пример #9
0
def run(mpfile, dup_check_test_site=True):

    from pymatgen import MPRester
    existing_identifiers = {}
    #for b in [False, True]:
    #    with DlrVietenRester(test_site=b) as mpr:
    #        for doc in mpr.query_contributions():
    #            existing_identifiers[doc['mp_cat_id']] = doc['_id']
    #    if not dup_check_test_site:
    #        break

    google_sheet = mpfile.document[mp_level01_titles[0]].pop('google_sheet')
    google_sheet += '/export?format=xlsx'
    df_dct = pd.read_excel(google_sheet, sheetname=None)

    mpr = MPRester()
    update = 0
    for sheet in df_dct.keys():
        print(sheet)
        df = df_dct[sheet]

        sheet_split = sheet.split()
        composition = sheet_split[0]
        identifier = get_composition_from_string(composition)
        if len(sheet_split) > 1 and mp_id_pattern.match(sheet_split[1]):
            identifier = sheet_split[1]
        print('identifier = {}'.format(identifier))

        if 'CIF' in sheet_split:
            print('adding CIF ...')
            df.columns = [df.columns[0]] + [''] * (df.shape[1] - 1)
            cif = df.to_csv(na_rep='',
                            index=False,
                            sep='\t',
                            quoting=csv.QUOTE_NONE)
            mpfile.add_structure(cif, identifier=identifier, fmt='cif')

        else:
            print('adding data ...')
            mpfile.add_hierarchical_data({'composition': composition},
                                         identifier=identifier)
            mpfile.add_data_table(identifier, df, name='dH_dS')

        if identifier in existing_identifiers:
            cid = existing_identifiers[identifier]
            mpfile.insert_id(identifier, cid)
            update += 1

    print len(mpfile.ids), 'contributions to submit.'
    if update > 0:
        print update, 'contributions to update.'
Пример #10
0
    def to_backgrid_dict(self):
        """Backgrid-conform dict from DataFrame"""
        # shorten global import times by importing django here
        import numpy as np
        from mpcontribs.io.core.utils import get_composition_from_string
        from pandas import MultiIndex
        import pymatgen.util as pmg_util
        from pymatgen.core.composition import CompositionError

        table = dict()
        nrows_max = 260
        nrows = self.shape[0]
        df = Table(self.head(n=nrows_max)) if nrows > nrows_max else self
        numeric_columns = df.select_dtypes(
            include=[np.number]).columns.tolist()

        if isinstance(df.index, MultiIndex):
            df.reset_index(inplace=True)

        table['columns'] = []
        table['rows'] = super(Table, df).to_dict(orient='records')

        for col_index, col in enumerate(list(df.columns)):
            cell_type = 'number'

            # avoid looping rows to minimize use of `df.iat` (time-consuming in 3d)
            if not col.startswith('level_') and col not in numeric_columns:
                is_url_column, prev_unit, old_col = True, None, col

                for row_index in range(df.shape[0]):
                    cell = str(df.iat[row_index, col_index])
                    cell_split = cell.split(' ', 1)

                    if not cell or len(
                            cell_split) == 1:  # empty cell or no space
                        is_url_column = bool(
                            is_url_column
                            and (not cell or mp_id_pattern.match(cell)))
                        if is_url_column:
                            if cell:
                                value = 'https://materialsproject.org/materials/{}'.format(
                                    cell)
                                table['rows'][row_index][col] = value
                        elif cell:
                            try:
                                composition = get_composition_from_string(cell)
                                composition = pmg_util.string.unicodeify(
                                    composition)
                                table['rows'][row_index][col] = composition
                            except (CompositionError, ValueError,
                                    OverflowError):
                                try:
                                    # https://stackoverflow.com/a/38020041
                                    result = urlparse(cell)
                                    if not all([
                                            result.scheme, result.netloc,
                                            result.path
                                    ]):
                                        break
                                    is_url_column = True
                                except:
                                    break

                    else:
                        value, unit = cell_split  # TODO convert cell_split[0] to float?
                        is_url_column = False
                        try:
                            float(value
                                  )  # unit is only a unit if value is number
                        except ValueError:
                            continue
                        table['rows'][row_index].pop(old_col)
                        if prev_unit is None:
                            prev_unit = unit
                            col = '{} [{}]'.format(col, unit)
                        table['rows'][row_index][
                            col] = cell if prev_unit != unit else value

                cell_type = 'uri' if is_url_column else 'string'

            col_split = col.split('##')
            nesting = [col_split[0]] if len(col_split) > 1 else []
            table['columns'].append({
                'name': col,
                'cell': cell_type,
                'nesting': nesting,
                'editable': 0
            })
            if len(col_split) > 1:
                table['columns'][-1].update(
                    {'label': '##'.join(col_split[1:])})
            if len(table['columns']) > 12:
                table['columns'][-1]['renderable'] = 0

        header = RecursiveDict()
        for idx, col in enumerate(table['columns']):
            if 'label' in col:
                k, sk = col['name'].split('##')
                sk_split = sk.split()
                if len(sk_split) == 2:
                    d = {'name': sk_split[0], 'unit': sk_split[1], 'idx': idx}
                    if k not in header:
                        header[k] = [d]
                    else:
                        header[k].append(d)
                elif k in header:
                    header.pop(k)

        for k, skl in header.items():
            units = [sk['unit'] for sk in skl]
            if units.count(units[0]) == len(units):
                for sk in skl:
                    table['columns'][sk['idx']]['label'] = sk['name']
                    table['columns'][sk['idx']]['nesting'][0] = '{} {}'.format(
                        k, sk['unit'])

        return table
Пример #11
0
def get_card(request, cid, db_type=None, mdb=None):
    """
    @api {post} /card/:cid?API_KEY=:api_key Contribution Card/Preview
    @apiVersion 0.2.0
    @apiName PostGetCard
    @apiGroup Contribution

    @apiDescription Either returns a string containing html for hierarchical
    data, or if existent, a list of URLs for static versions of embedded graphs.

    @apiParam {String} api_key User's unique API_KEY
    @apiParam {json} provenance_keys List of provenance keys

    @apiSuccess {String} created_at Response timestamp
    @apiSuccess {Bool} valid_response Response is valid
    @apiSuccess {String} response Response preview of h- or t-data/graphs ("card")

    @apiSuccessExample Success-Response:
        HTTP/1.1 200 OK
        {
            "created_at": "2017-08-09T19:59:59.936618",
            "valid_response": true,
            "response": ["<graph-url>"]
        }
    """
    from mpcontribs.io.core.components import Tree, Plots, render_plot
    from mpcontribs.io.core.utils import nested_dict_iter
    from mpcontribs.io.core.recdict import RecursiveDict, render_dict
    from django.template import Template, Context
    from django.core.urlresolvers import reverse
    from mpcontribs.config import mp_id_pattern
    prov_keys = loads(request.POST.get('provenance_keys', '["title"]'))
    contrib = mdb.contrib_ad.query_contributions(
        {'_id': ObjectId(cid)},
        projection={'_id': 0, 'mp_cat_id': 1, 'content': 1, 'collaborators': 1}
    )[0]
    mpid = contrib['mp_cat_id']
    hdata = Tree(contrib['content'])
    plots = Plots(contrib['content'])
    title = hdata.get('title', 'No title available.')
    descriptions = hdata.get('description', 'No description available.').strip().split('.', 1)
    description = '{}.'.format(descriptions[0])
    if len(descriptions) > 1 and descriptions[1]:
        description += '''<a href="#"
        class="read_more">More &raquo;</a><span class="more_text"
        hidden>{}</span>'''.format(descriptions[1])
    authors = hdata.get('authors', 'No authors available.').split(',', 1)
    provenance = '<h5>{}'.format(authors[0])
    if len(authors) > 1:
        provenance += '''<button class="btn-sm btn-link" type=button
        data-toggle="tooltip" data-placement="bottom"
        data-container="body" title="{}" style="padding: 0px 0px 0px 3px;"
        >et al.</a>'''.format(authors[1].strip())
    provenance += '</h5>'
    dois = hdata.get('dois', hdata.get('urls', '')).split(' ')
    doi_urls = []
    for x in dois:
        if x.startswith('http'):
            doi_urls.append(x)
        else:
            doi_urls.append('https://doi.org/{}'.format(x))
    provenance += ''.join(['''<a href={}
        class="btn btn-link" role=button style="padding: 0"
        target="_blank"><i class="fa fa-book fa-border fa-lg"></i></a>'''.format(x, y)
        for x, y in zip(doi_urls, dois) if x
    ])
    #if plots:
    #    card = []
    #    for name, plot in plots.items():
    #        filename = '{}_{}.png'.format(mpid, name)
    #        cwd = os.path.dirname(__file__)
    #        filepath = os.path.abspath(os.path.join(
    #            cwd, '..', '..', 'webtzite', 'static', 'img', filename
    #        ))
    #        if not os.path.exists(filepath):
    #            render_plot(plot, filename=filepath)
    #        index = request.build_absolute_uri(reverse('webtzite_index')[:-1])
    #        imgdir = '/'.join([index.rsplit('/', 1)[0], 'static', 'img'])
    #        fileurl = '/'.join([imgdir, filename])
    #        card.append(fileurl)
    #else:
    data = RecursiveDict()
    for idx, (k,v) in enumerate(hdata.get('data', {}).items()):
        data[k] = v
        if idx >= 6:
            break # humans can grasp 7 items quickly
    data = render_dict(data, webapp=True)
    is_mp_id = mp_id_pattern.match(mpid)
    collection = 'materials' if is_mp_id else 'compositions'
    more = reverse('mpcontribs_explorer_contribution', args=[collection, cid])
    card = '''
    <div class="panel panel-default">
        <div class="panel-heading">
            <h4 class="panel-title">
                {}
                <a class="btn-sm btn-default pull-right" role="button"
                   style=" margin-top:-6px;"
                   href="{}" target="_blank">More Info</a>
            </h4>
        </div>
        <div class="panel-body" style="padding-left: 0px">
            <div class="col-md-8" style="padding-top: 0px">
                <blockquote class="blockquote" style="font-size: 13px;">{}</blockquote>
            </div>
            <div class="col-md-4 well" style="padding: 0px 0px 5px 5px;">{}</div>
            <div class="col-md-12" style="padding-right: 0px;">{}</div>
        </div>
    </div>
    <script>
    requirejs(['main'], function() {{
        require(['jquery'], function() {{
            $(function(){{
                $("a.read_more").click(function(event){{
                    event.preventDefault();
                    $(this).parents(".blockquote").find(".more_text").show();
                    $(this).parents(".blockquote").find(".read_more").hide();
                }});
            }});
        }});
    }});
    </script>
    '''.format(
            title, more, description, provenance, data
    )
    return {"valid_response": True, "response": card}
Пример #12
0
    def to_backgrid_dict(self):
        """Backgrid-conform dict from DataFrame"""
        # shorten global import times by importing django here
        import numpy as np
        from mpcontribs.io.core.utils import get_composition_from_string
        from pandas import MultiIndex
        import pymatgen.util as pmg_util
        from pymatgen.core.composition import CompositionError

        table = dict()
        nrows_max = 260
        nrows = self.shape[0]
        df = Table(self.head(n=nrows_max)) if nrows > nrows_max else self

        if isinstance(df.index, MultiIndex):
            df.reset_index(inplace=True)

        table['columns'] = []
        table['rows'] = super(Table, df).to_dict(orient='records')

        for col_index, col in enumerate(list(df.columns)):
            cell_type = 'number'

            # avoid looping rows to minimize use of `df.iat` (time-consuming in 3d)
            if not col.startswith('level_') and col[-1] != ']':
                is_url_column = True

                for row_index in range(df.shape[0]):
                    cell = str(df.iat[row_index, col_index])
                    is_url_column = bool(
                        is_url_column
                        and (not cell or mp_id_pattern.match(cell)))
                    if is_url_column:
                        if cell:
                            value = 'https://materialsproject.org/materials/{}'.format(
                                cell)
                            table['rows'][row_index][col] = value
                    elif cell:
                        try:
                            composition = get_composition_from_string(cell)
                            composition = pmg_util.string.unicodeify(
                                composition)
                            table['rows'][row_index][col] = composition
                        except (CompositionError, ValueError, OverflowError):
                            try:
                                # https://stackoverflow.com/a/38020041
                                result = urlparse(cell)
                                if not all([
                                        result.scheme, result.netloc,
                                        result.path
                                ]):
                                    break
                                is_url_column = True
                            except:
                                break

                cell_type = 'uri' if is_url_column else 'string'

            col_split = col.split('.')
            nesting = [col_split[0]] if len(col_split) > 1 else []
            table['columns'].append({
                'name': col,
                'cell': cell_type,
                'nesting': nesting,
                'editable': 0
            })
            if len(col_split) > 1:
                table['columns'][-1].update({'label': '.'.join(col_split[1:])})
            if len(table['columns']) > 12:
                table['columns'][-1]['renderable'] = 0

        return table
Пример #13
0
    def build(self, contributor_email, cid, api_key=None, endpoint=None):
        """update materials/compositions collections with contributed data"""
        cid_short, cid_str = get_short_object_id(cid), str(cid)
        contrib = self.find_contribution(cid)
        if not contrib:
            raise Exception('Contribution {} not found!'.format(cid))
        if contributor_email not in contrib['collaborators']:
            raise ValueError(
                "Build stopped: building contribution {} not "
                "allowed due to insufficient permissions of {}! Ask "
                "someone of {} to make you a collaborator on {}.".format(
                    cid_short, contributor_email, contrib['collaborators'],
                    cid_short))
        from pymatgen.util.provenance import Author
        mpfile = MPFileCore.from_contribution(contrib)
        mp_cat_id = mpfile.ids[0]
        is_mp_id = mp_id_pattern.match(mp_cat_id)
        self.curr_coll = self.materials if is_mp_id else self.compositions
        author = Author.parse_author(contributor_email)
        project = str(author.name).translate(None, '.') \
                if 'project' not in contrib else contrib['project']

        nb = nbf.new_notebook()
        if isinstance(self.db, dict):
            contrib.pop('_id')
            if 'cid' in contrib['content']:
                contrib['content'].pop('cid')
            nb['cells'].append(
                nbf.new_code_cell(
                    "from mpcontribs.io.core.mpfile import MPFileCore\n"
                    "from mpcontribs.io.core.recdict import RecursiveDict\n"
                    "mpfile = MPFileCore.from_contribution({})\n"
                    "identifier = '{}'".format(contrib, mp_cat_id)))
        else:
            nb['cells'].append(
                nbf.new_code_cell(
                    "from mpcontribs.rest.rester import MPContribsRester"))
            os.environ['PMG_MAPI_KEY'] = api_key
            os.environ['PMG_MAPI_ENDPOINT'] = endpoint
            nb['cells'].append(
                nbf.new_code_cell(
                    "with MPContribsRester() as mpr:\n"
                    "    mpfile = mpr.find_contribution('{}')\n"
                    "    identifier = mpfile.ids[0]".format(cid)))
        nb['cells'].append(
            nbf.new_markdown_cell("## Contribution #{} for {}".format(
                cid_short, mp_cat_id)))
        nb['cells'].append(nbf.new_markdown_cell("### Hierarchical Data"))
        nb['cells'].append(nbf.new_code_cell("mpfile.hdata[identifier]"))
        if mpfile.tdata[mp_cat_id]:
            nb['cells'].append(nbf.new_markdown_cell("### Tabular Data"))
        for table_name, table in mpfile.tdata[mp_cat_id].iteritems():
            nb['cells'].append(
                nbf.new_markdown_cell("#### {}".format(table_name)))
            nb['cells'].append(
                nbf.new_code_cell(
                    "mpfile.tdata[identifier]['{}']".format(table_name)))
        if mpfile.gdata[mp_cat_id]:
            nb['cells'].append(nbf.new_markdown_cell("### Graphical Data"))
        for plot_name, plot in mpfile.gdata[mp_cat_id].iteritems():
            nb['cells'].append(
                nbf.new_markdown_cell("#### {}".format(plot_name)))
            nb['cells'].append(
                nbf.new_code_cell(
                    "mpfile.gdata[identifier]['{}']".format(plot_name)))

        if mpfile.sdata[mp_cat_id]:
            nb['cells'].append(nbf.new_markdown_cell("### Structural Data"))
        for structure_name, structure in mpfile.sdata[mp_cat_id].iteritems():
            nb['cells'].append(
                nbf.new_markdown_cell("#### {}".format(structure_name)))
            nb['cells'].append(
                nbf.new_code_cell(
                    "mpfile.sdata[identifier]['{}']".format(structure_name)))

        self.ep.preprocess(nb, {'metadata': {'path': self.nbdir}})

        if isinstance(self.db, dict):
            return [mp_cat_id, project, cid_short, export_notebook(nb, cid)]
        else:
            build_doc = RecursiveDict()
            build_doc['mp_cat_id'] = mp_cat_id
            build_doc['project'] = project
            build_doc['nb'] = nb
            self.curr_coll.update({'_id': cid}, {'$set': build_doc},
                                  upsert=True)
            return '{}/{}'.format(  # return URL for contribution page
                ('materials' if is_mp_id else 'compositions'), cid_str)
Пример #14
0
def get_backgrid_table(df):
    """Backgrid-conform dict from DataFrame"""
    # shorten global import times by importing django here
    import numpy as np
    from django.core.validators import URLValidator
    from django.core.exceptions import ValidationError
    from pandas import MultiIndex

    val = URLValidator()
    table = dict()
    nrows = df.shape[0]
    nrows_max = 200
    if nrows > nrows_max:
        df = Table(df.head(n=nrows_max))
    numeric_columns = df.select_dtypes(include=[np.number]).columns.tolist()

    if isinstance(df.index, MultiIndex):
        df.reset_index(inplace=True)

    table['columns'] = []
    table['rows'] = super(Table, df).to_dict(orient='records')

    for col_index, col in enumerate(list(df.columns)):
        cell_type = 'number'

        # avoid looping rows to minimize use of `df.iat` (time-consuming in 3d)
        if not col.startswith('level_') and col not in numeric_columns:
            is_url_column, prev_unit, old_col = True, None, col

            for row_index in xrange(nrows):
                cell = unicode(df.iat[row_index, col_index])
                cell_split = cell.split(' ', 1)
                if not cell or len(cell_split) == 1:  # empty cell or no space
                    if is_url_column:
                        is_url_column = bool(not cell
                                             or mp_id_pattern.match(cell))
                        if is_url_column:
                            if cell:
                                value = 'https://materialsproject.org/materials/{}'.format(
                                    cell)
                                table['rows'][row_index][col] = value
                        else:
                            try:
                                val(cell)
                                is_url_column = True
                            except ValidationError:
                                # is_url_column already set to False
                                break
                else:
                    value, unit = cell_split  # TODO convert cell_split[0] to float?
                    table['rows'][row_index].pop(old_col)
                    if prev_unit is None:
                        is_url_column = False
                        prev_unit = unit
                        col = '{} [{}]'.format(col, unit)
                    table['rows'][row_index][
                        col] = cell if prev_unit != unit else value

            cell_type = 'uri' if is_url_column else 'string'

        col_split = col.split('##')
        nesting = [col_split[0]] if len(col_split) > 1 else []
        table['columns'].append({
            'name': col,
            'cell': cell_type,
            'nesting': nesting,
            'editable': 0
        })
        if len(col_split) > 1:
            table['columns'][-1].update({'label': '##'.join(col_split[1:])})
        if len(table['columns']) > 9:
            table['columns'][-1]['renderable'] = 0

    return table