def get_cid_url(self, doc): """infer URL for contribution detail page from MongoDB doc""" from mpcontribs.config import mp_id_pattern is_mp_id = mp_id_pattern.match(doc['mp_cat_id']) collection = 'materials' if is_mp_id else 'compositions' return '/'.join([ self.preamble.rsplit('/', 1)[0], 'explorer', collection, doc['_id'] ])
def get_cid_url(self, doc): """infer URL for contribution detail page from MongoDB doc""" from mpcontribs.config import mp_id_pattern is_mp_id = mp_id_pattern.match(doc['identifier']) collection = 'materials' if is_mp_id else 'compositions' return '/'.join([ self.preamble.rsplit('/', 1)[0], 'explorer', collection , doc['_id'] ])
def normalize_root_level(title): """convert root-level title into conventional identifier; non-identifiers become part of shared (meta-)data. Returns: (is_general, title)""" from pymatgen.core.composition import CompositionError try: composition = get_composition_from_string(title) return False, composition except (CompositionError, KeyError, TypeError, ValueError): if mp_id_pattern.match(title.lower()): return False, title.lower() return True, title
def normalize_root_level(title): """convert root-level title into conventional identifier; non-identifiers become part of shared (meta-)data. Returns: (is_general, title)""" try: composition = get_composition_from_string(title) return False, composition except: if mp_id_pattern.match(title.lower()): return False, title.lower() else: return True, title
def normalize_root_level(title): """convert root-level title into conventional identifier; non-identifiers become part of shared (meta-)data. Returns: (is_general, title)""" try: composition = Composition(title).get_integer_formula_and_factor()[0] return False, composition except: if mp_id_pattern.match(title.lower()): return False, title.lower() else: return True, title
def normalize_root_level(title): """convert root-level title into conventional identifier; non-identifiers become part of shared (meta-)data. Returns: (is_general, title)""" try: composition = Composition(title).get_integer_formula_and_factor()[0] return False, composition except: if mp_id_pattern.match(title.lower()): return False, title.lower() else: return True, title
def normalize_root_level(title): """convert root-level title into conventional identifier; non-identifiers become part of shared (meta-)data. Returns: (is_general, title)""" from pymatgen.core.composition import CompositionError try: composition = get_composition_from_string(title) return False, composition except (CompositionError, KeyError, TypeError, ValueError): if mp_id_pattern.match(title.lower()): return False, title.lower() return True, title
def delete(self, project, cids): for contrib in self.contributions.find({'_id': {'$in': cids}}): mp_cat_id, cid = contrib['mp_cat_id'], contrib['_id'] is_mp_id = mp_id_pattern.match(mp_cat_id) coll = self.materials if is_mp_id else self.compositions key = '.'.join([project, str(cid)]) coll.update({}, {'$unset': {key: 1}}, multi=True) # remove `project` field when no contributions remaining for coll in [self.materials, self.compositions]: for doc in coll.find({project: {'$exists': 1}}): for d in doc.itervalues(): if not d: coll.update({'_id': doc['_id']}, {'$unset': {project: 1}})
def run(mpfile, dup_check_test_site=True): from pymatgen import MPRester existing_identifiers = {} #for b in [False, True]: # with DlrVietenRester(test_site=b) as mpr: # for doc in mpr.query_contributions(): # existing_identifiers[doc['mp_cat_id']] = doc['_id'] # if not dup_check_test_site: # break google_sheet = mpfile.document[mp_level01_titles[0]].pop('google_sheet') google_sheet += '/export?format=xlsx' df_dct = pd.read_excel(google_sheet, sheetname=None) mpr = MPRester() update = 0 for sheet in df_dct.keys(): print(sheet) df = df_dct[sheet] sheet_split = sheet.split() composition = sheet_split[0] identifier = get_composition_from_string(composition) if len(sheet_split) > 1 and mp_id_pattern.match(sheet_split[1]): identifier = sheet_split[1] print('identifier = {}'.format(identifier)) if 'CIF' in sheet_split: print('adding CIF ...') df.columns = [df.columns[0]] + [''] * (df.shape[1] - 1) cif = df.to_csv(na_rep='', index=False, sep='\t', quoting=csv.QUOTE_NONE) mpfile.add_structure(cif, identifier=identifier, fmt='cif') else: print('adding data ...') mpfile.add_hierarchical_data({'composition': composition}, identifier=identifier) mpfile.add_data_table(identifier, df, name='dH_dS') if identifier in existing_identifiers: cid = existing_identifiers[identifier] mpfile.insert_id(identifier, cid) update += 1 print len(mpfile.ids), 'contributions to submit.' if update > 0: print update, 'contributions to update.'
def to_backgrid_dict(self): """Backgrid-conform dict from DataFrame""" # shorten global import times by importing django here import numpy as np from mpcontribs.io.core.utils import get_composition_from_string from pandas import MultiIndex import pymatgen.util as pmg_util from pymatgen.core.composition import CompositionError table = dict() nrows_max = 260 nrows = self.shape[0] df = Table(self.head(n=nrows_max)) if nrows > nrows_max else self numeric_columns = df.select_dtypes( include=[np.number]).columns.tolist() if isinstance(df.index, MultiIndex): df.reset_index(inplace=True) table['columns'] = [] table['rows'] = super(Table, df).to_dict(orient='records') for col_index, col in enumerate(list(df.columns)): cell_type = 'number' # avoid looping rows to minimize use of `df.iat` (time-consuming in 3d) if not col.startswith('level_') and col not in numeric_columns: is_url_column, prev_unit, old_col = True, None, col for row_index in range(df.shape[0]): cell = str(df.iat[row_index, col_index]) cell_split = cell.split(' ', 1) if not cell or len( cell_split) == 1: # empty cell or no space is_url_column = bool( is_url_column and (not cell or mp_id_pattern.match(cell))) if is_url_column: if cell: value = 'https://materialsproject.org/materials/{}'.format( cell) table['rows'][row_index][col] = value elif cell: try: composition = get_composition_from_string(cell) composition = pmg_util.string.unicodeify( composition) table['rows'][row_index][col] = composition except (CompositionError, ValueError, OverflowError): try: # https://stackoverflow.com/a/38020041 result = urlparse(cell) if not all([ result.scheme, result.netloc, result.path ]): break is_url_column = True except: break else: value, unit = cell_split # TODO convert cell_split[0] to float? is_url_column = False try: float(value ) # unit is only a unit if value is number except ValueError: continue table['rows'][row_index].pop(old_col) if prev_unit is None: prev_unit = unit col = '{} [{}]'.format(col, unit) table['rows'][row_index][ col] = cell if prev_unit != unit else value cell_type = 'uri' if is_url_column else 'string' col_split = col.split('##') nesting = [col_split[0]] if len(col_split) > 1 else [] table['columns'].append({ 'name': col, 'cell': cell_type, 'nesting': nesting, 'editable': 0 }) if len(col_split) > 1: table['columns'][-1].update( {'label': '##'.join(col_split[1:])}) if len(table['columns']) > 12: table['columns'][-1]['renderable'] = 0 header = RecursiveDict() for idx, col in enumerate(table['columns']): if 'label' in col: k, sk = col['name'].split('##') sk_split = sk.split() if len(sk_split) == 2: d = {'name': sk_split[0], 'unit': sk_split[1], 'idx': idx} if k not in header: header[k] = [d] else: header[k].append(d) elif k in header: header.pop(k) for k, skl in header.items(): units = [sk['unit'] for sk in skl] if units.count(units[0]) == len(units): for sk in skl: table['columns'][sk['idx']]['label'] = sk['name'] table['columns'][sk['idx']]['nesting'][0] = '{} {}'.format( k, sk['unit']) return table
def get_card(request, cid, db_type=None, mdb=None): """ @api {post} /card/:cid?API_KEY=:api_key Contribution Card/Preview @apiVersion 0.2.0 @apiName PostGetCard @apiGroup Contribution @apiDescription Either returns a string containing html for hierarchical data, or if existent, a list of URLs for static versions of embedded graphs. @apiParam {String} api_key User's unique API_KEY @apiParam {json} provenance_keys List of provenance keys @apiSuccess {String} created_at Response timestamp @apiSuccess {Bool} valid_response Response is valid @apiSuccess {String} response Response preview of h- or t-data/graphs ("card") @apiSuccessExample Success-Response: HTTP/1.1 200 OK { "created_at": "2017-08-09T19:59:59.936618", "valid_response": true, "response": ["<graph-url>"] } """ from mpcontribs.io.core.components import Tree, Plots, render_plot from mpcontribs.io.core.utils import nested_dict_iter from mpcontribs.io.core.recdict import RecursiveDict, render_dict from django.template import Template, Context from django.core.urlresolvers import reverse from mpcontribs.config import mp_id_pattern prov_keys = loads(request.POST.get('provenance_keys', '["title"]')) contrib = mdb.contrib_ad.query_contributions( {'_id': ObjectId(cid)}, projection={'_id': 0, 'mp_cat_id': 1, 'content': 1, 'collaborators': 1} )[0] mpid = contrib['mp_cat_id'] hdata = Tree(contrib['content']) plots = Plots(contrib['content']) title = hdata.get('title', 'No title available.') descriptions = hdata.get('description', 'No description available.').strip().split('.', 1) description = '{}.'.format(descriptions[0]) if len(descriptions) > 1 and descriptions[1]: description += '''<a href="#" class="read_more">More »</a><span class="more_text" hidden>{}</span>'''.format(descriptions[1]) authors = hdata.get('authors', 'No authors available.').split(',', 1) provenance = '<h5>{}'.format(authors[0]) if len(authors) > 1: provenance += '''<button class="btn-sm btn-link" type=button data-toggle="tooltip" data-placement="bottom" data-container="body" title="{}" style="padding: 0px 0px 0px 3px;" >et al.</a>'''.format(authors[1].strip()) provenance += '</h5>' dois = hdata.get('dois', hdata.get('urls', '')).split(' ') doi_urls = [] for x in dois: if x.startswith('http'): doi_urls.append(x) else: doi_urls.append('https://doi.org/{}'.format(x)) provenance += ''.join(['''<a href={} class="btn btn-link" role=button style="padding: 0" target="_blank"><i class="fa fa-book fa-border fa-lg"></i></a>'''.format(x, y) for x, y in zip(doi_urls, dois) if x ]) #if plots: # card = [] # for name, plot in plots.items(): # filename = '{}_{}.png'.format(mpid, name) # cwd = os.path.dirname(__file__) # filepath = os.path.abspath(os.path.join( # cwd, '..', '..', 'webtzite', 'static', 'img', filename # )) # if not os.path.exists(filepath): # render_plot(plot, filename=filepath) # index = request.build_absolute_uri(reverse('webtzite_index')[:-1]) # imgdir = '/'.join([index.rsplit('/', 1)[0], 'static', 'img']) # fileurl = '/'.join([imgdir, filename]) # card.append(fileurl) #else: data = RecursiveDict() for idx, (k,v) in enumerate(hdata.get('data', {}).items()): data[k] = v if idx >= 6: break # humans can grasp 7 items quickly data = render_dict(data, webapp=True) is_mp_id = mp_id_pattern.match(mpid) collection = 'materials' if is_mp_id else 'compositions' more = reverse('mpcontribs_explorer_contribution', args=[collection, cid]) card = ''' <div class="panel panel-default"> <div class="panel-heading"> <h4 class="panel-title"> {} <a class="btn-sm btn-default pull-right" role="button" style=" margin-top:-6px;" href="{}" target="_blank">More Info</a> </h4> </div> <div class="panel-body" style="padding-left: 0px"> <div class="col-md-8" style="padding-top: 0px"> <blockquote class="blockquote" style="font-size: 13px;">{}</blockquote> </div> <div class="col-md-4 well" style="padding: 0px 0px 5px 5px;">{}</div> <div class="col-md-12" style="padding-right: 0px;">{}</div> </div> </div> <script> requirejs(['main'], function() {{ require(['jquery'], function() {{ $(function(){{ $("a.read_more").click(function(event){{ event.preventDefault(); $(this).parents(".blockquote").find(".more_text").show(); $(this).parents(".blockquote").find(".read_more").hide(); }}); }}); }}); }}); </script> '''.format( title, more, description, provenance, data ) return {"valid_response": True, "response": card}
def to_backgrid_dict(self): """Backgrid-conform dict from DataFrame""" # shorten global import times by importing django here import numpy as np from mpcontribs.io.core.utils import get_composition_from_string from pandas import MultiIndex import pymatgen.util as pmg_util from pymatgen.core.composition import CompositionError table = dict() nrows_max = 260 nrows = self.shape[0] df = Table(self.head(n=nrows_max)) if nrows > nrows_max else self if isinstance(df.index, MultiIndex): df.reset_index(inplace=True) table['columns'] = [] table['rows'] = super(Table, df).to_dict(orient='records') for col_index, col in enumerate(list(df.columns)): cell_type = 'number' # avoid looping rows to minimize use of `df.iat` (time-consuming in 3d) if not col.startswith('level_') and col[-1] != ']': is_url_column = True for row_index in range(df.shape[0]): cell = str(df.iat[row_index, col_index]) is_url_column = bool( is_url_column and (not cell or mp_id_pattern.match(cell))) if is_url_column: if cell: value = 'https://materialsproject.org/materials/{}'.format( cell) table['rows'][row_index][col] = value elif cell: try: composition = get_composition_from_string(cell) composition = pmg_util.string.unicodeify( composition) table['rows'][row_index][col] = composition except (CompositionError, ValueError, OverflowError): try: # https://stackoverflow.com/a/38020041 result = urlparse(cell) if not all([ result.scheme, result.netloc, result.path ]): break is_url_column = True except: break cell_type = 'uri' if is_url_column else 'string' col_split = col.split('.') nesting = [col_split[0]] if len(col_split) > 1 else [] table['columns'].append({ 'name': col, 'cell': cell_type, 'nesting': nesting, 'editable': 0 }) if len(col_split) > 1: table['columns'][-1].update({'label': '.'.join(col_split[1:])}) if len(table['columns']) > 12: table['columns'][-1]['renderable'] = 0 return table
def build(self, contributor_email, cid, api_key=None, endpoint=None): """update materials/compositions collections with contributed data""" cid_short, cid_str = get_short_object_id(cid), str(cid) contrib = self.find_contribution(cid) if not contrib: raise Exception('Contribution {} not found!'.format(cid)) if contributor_email not in contrib['collaborators']: raise ValueError( "Build stopped: building contribution {} not " "allowed due to insufficient permissions of {}! Ask " "someone of {} to make you a collaborator on {}.".format( cid_short, contributor_email, contrib['collaborators'], cid_short)) from pymatgen.util.provenance import Author mpfile = MPFileCore.from_contribution(contrib) mp_cat_id = mpfile.ids[0] is_mp_id = mp_id_pattern.match(mp_cat_id) self.curr_coll = self.materials if is_mp_id else self.compositions author = Author.parse_author(contributor_email) project = str(author.name).translate(None, '.') \ if 'project' not in contrib else contrib['project'] nb = nbf.new_notebook() if isinstance(self.db, dict): contrib.pop('_id') if 'cid' in contrib['content']: contrib['content'].pop('cid') nb['cells'].append( nbf.new_code_cell( "from mpcontribs.io.core.mpfile import MPFileCore\n" "from mpcontribs.io.core.recdict import RecursiveDict\n" "mpfile = MPFileCore.from_contribution({})\n" "identifier = '{}'".format(contrib, mp_cat_id))) else: nb['cells'].append( nbf.new_code_cell( "from mpcontribs.rest.rester import MPContribsRester")) os.environ['PMG_MAPI_KEY'] = api_key os.environ['PMG_MAPI_ENDPOINT'] = endpoint nb['cells'].append( nbf.new_code_cell( "with MPContribsRester() as mpr:\n" " mpfile = mpr.find_contribution('{}')\n" " identifier = mpfile.ids[0]".format(cid))) nb['cells'].append( nbf.new_markdown_cell("## Contribution #{} for {}".format( cid_short, mp_cat_id))) nb['cells'].append(nbf.new_markdown_cell("### Hierarchical Data")) nb['cells'].append(nbf.new_code_cell("mpfile.hdata[identifier]")) if mpfile.tdata[mp_cat_id]: nb['cells'].append(nbf.new_markdown_cell("### Tabular Data")) for table_name, table in mpfile.tdata[mp_cat_id].iteritems(): nb['cells'].append( nbf.new_markdown_cell("#### {}".format(table_name))) nb['cells'].append( nbf.new_code_cell( "mpfile.tdata[identifier]['{}']".format(table_name))) if mpfile.gdata[mp_cat_id]: nb['cells'].append(nbf.new_markdown_cell("### Graphical Data")) for plot_name, plot in mpfile.gdata[mp_cat_id].iteritems(): nb['cells'].append( nbf.new_markdown_cell("#### {}".format(plot_name))) nb['cells'].append( nbf.new_code_cell( "mpfile.gdata[identifier]['{}']".format(plot_name))) if mpfile.sdata[mp_cat_id]: nb['cells'].append(nbf.new_markdown_cell("### Structural Data")) for structure_name, structure in mpfile.sdata[mp_cat_id].iteritems(): nb['cells'].append( nbf.new_markdown_cell("#### {}".format(structure_name))) nb['cells'].append( nbf.new_code_cell( "mpfile.sdata[identifier]['{}']".format(structure_name))) self.ep.preprocess(nb, {'metadata': {'path': self.nbdir}}) if isinstance(self.db, dict): return [mp_cat_id, project, cid_short, export_notebook(nb, cid)] else: build_doc = RecursiveDict() build_doc['mp_cat_id'] = mp_cat_id build_doc['project'] = project build_doc['nb'] = nb self.curr_coll.update({'_id': cid}, {'$set': build_doc}, upsert=True) return '{}/{}'.format( # return URL for contribution page ('materials' if is_mp_id else 'compositions'), cid_str)
def get_backgrid_table(df): """Backgrid-conform dict from DataFrame""" # shorten global import times by importing django here import numpy as np from django.core.validators import URLValidator from django.core.exceptions import ValidationError from pandas import MultiIndex val = URLValidator() table = dict() nrows = df.shape[0] nrows_max = 200 if nrows > nrows_max: df = Table(df.head(n=nrows_max)) numeric_columns = df.select_dtypes(include=[np.number]).columns.tolist() if isinstance(df.index, MultiIndex): df.reset_index(inplace=True) table['columns'] = [] table['rows'] = super(Table, df).to_dict(orient='records') for col_index, col in enumerate(list(df.columns)): cell_type = 'number' # avoid looping rows to minimize use of `df.iat` (time-consuming in 3d) if not col.startswith('level_') and col not in numeric_columns: is_url_column, prev_unit, old_col = True, None, col for row_index in xrange(nrows): cell = unicode(df.iat[row_index, col_index]) cell_split = cell.split(' ', 1) if not cell or len(cell_split) == 1: # empty cell or no space if is_url_column: is_url_column = bool(not cell or mp_id_pattern.match(cell)) if is_url_column: if cell: value = 'https://materialsproject.org/materials/{}'.format( cell) table['rows'][row_index][col] = value else: try: val(cell) is_url_column = True except ValidationError: # is_url_column already set to False break else: value, unit = cell_split # TODO convert cell_split[0] to float? table['rows'][row_index].pop(old_col) if prev_unit is None: is_url_column = False prev_unit = unit col = '{} [{}]'.format(col, unit) table['rows'][row_index][ col] = cell if prev_unit != unit else value cell_type = 'uri' if is_url_column else 'string' col_split = col.split('##') nesting = [col_split[0]] if len(col_split) > 1 else [] table['columns'].append({ 'name': col, 'cell': cell_type, 'nesting': nesting, 'editable': 0 }) if len(col_split) > 1: table['columns'][-1].update({'label': '##'.join(col_split[1:])}) if len(table['columns']) > 9: table['columns'][-1]['renderable'] = 0 return table