Пример #1
0
 def post_save(cls, sender, document, **kwargs):
     set_root_keys = set(k.split(".", 1)[0] for k in document._delta()[0].keys())
     cid = document.contribution.id
     nbs = Notebooks.objects(pk=cid)
     if not set_root_keys or set_root_keys == {"is_public"}:
         nbs.update(set__is_public=document.is_public)
     else:
         nbs.delete()
         document.update(unset__cif=True)
         Contributions.objects(pk=cid).update(unset__structures=True)
Пример #2
0
 def get(self, project):
     """Retrieve overview graph for a project.
     ---
     operationId: get_graph
     parameters:
         - name: project
           in: path
           type: string
           pattern: '^[a-zA-Z0-9_]{3,30}$'
           required: true
           description: project name/slug
         - name: columns
           in: query
           type: array
           items:
               type: string
           required: true
           description: comma-separated list of column names to plot
     responses:
         200:
             description: x-y-data in plotly format
             schema:
                 type: array
                 items:
                     type: object
                     properties:
                         x:
                             type: array
                             items:
                                 type: number
                         y:
                             type: array
                             items:
                                 type: number
     """
     mask = ['content.data', 'identifier']
     columns = request.args.get('columns').split(',')
     objects = Contributions.objects(project=project).only(*mask)
     data = [{'x': [], 'y': []} for col in columns]
     for obj in objects:
         d = obj['content']['data']
         for idx, col in enumerate(columns):
             k, sk = padded(col.split('##'), n=2)
             if k in d:
                 val = d[k].get(sk) if sk else d[k]
                 if val:
                     data[idx]['x'].append(obj.identifier)
                     data[idx]['y'].append(val.split(' ')[0])
     return data
Пример #3
0
 def get(self, project):
     """Retrieve overview graph for a project.
     ---
     operationId: get_graph
     parameters:
         - name: project
           in: path
           type: string
           pattern: '^[a-zA-Z0-9_]{3,30}$'
           required: true
           description: project name/slug
         - name: columns
           in: query
           type: array
           items:
               type: string
           required: true
           description: comma-separated list of column names to plot
     responses:
         200:
             description: x-y-data in plotly format
             schema:
                 type: array
                 items:
                     type: object
                     properties:
                         x:
                             type: array
                             items:
                                 type: number
                         y:
                             type: array
                             items:
                                 type: number
     """
     mask = ['content.data', 'identifier']
     columns = request.args.get('columns').split(',')
     objects = Contributions.objects(project=project).only(*mask)
     data = [{'x': [], 'y': []} for col in columns]
     for obj in objects:
         d = obj['content']['data']
         for idx, col in enumerate(columns):
             k, sk = padded(col.split('##'), n=2)
             if k in d:
                 val = d[k].get(sk) if sk else d[k]
                 if val:
                     data[idx]['x'].append(obj.identifier)
                     data[idx]['y'].append(val.split(' ')[0])
     return data
Пример #4
0
def fix_units(name):
    # make sure correct units are indicated in project.columns before running this
    fields = list(Contributions._fields.keys())
    project = Projects.objects.with_id(name).reload("columns")
    query = Q()

    for column in project.columns:
        if column.unit and column.unit != "NaN":
            path = column.path.replace(".", "__")
            q = {f"{path}__unit__ne": column["unit"]}
            query |= Q(**q)

    contribs = Contributions.objects(Q(project=name) & query).only(*fields)
    num = contribs.count()
    print(name, num)

    for idx, contrib in enumerate(contribs):
        contrib.data = remap(contrib.data, visit=visit,
                             enter=enter)  # pull out display
        contrib.save(signal_kwargs={"skip": True
                                    })  # reparse display with intended unit

        if idx and not idx % 250:
            print(idx)
Пример #5
0
    def get(self, project):
        """Retrieve a table of contributions for a project.
        ---
        operationId: get_table
        parameters:
            - name: project
              in: path
              type: string
              pattern: '^[a-zA-Z0-9_]{3,30}$'
              required: true
              description: project name/slug
            - name: columns
              in: query
              type: array
              items:
                  type: string
              description: comma-separated list of column names to tabulate
            - name: page
              in: query
              type: integer
              default: 1
              description: page to retrieve (in batches of `per_page`)
            - name: per_page
              in: query
              type: integer
              default: 20
              minimum: 2
              maximum: 20
              description: number of results to return per page
            - name: q
              in: query
              type: string
              description: substring to search for in first non-id column
            - name: order
              in: query
              type: string
              description: sort ascending or descending
              enum: [asc, desc]
            - name: sort_by
              in: query
              type: string
              description: column name to sort by
        responses:
            200:
                description: Paginated table response in backgrid format (items = rows of table)
                schema:
                    type: object
                    properties:
                        total_count:
                            type: integer
                        total_pages:
                            type: integer
                        page:
                            type: integer
                        last_page:
                            type: integer
                        per_page:
                            type: integer
                        items:
                            type: array
                            items:
                                type: object
        """
        # config and parameters
        explorer = 'http://localhost:8080/explorer' if current_app.config['DEBUG'] \
            else 'https://portal.mpcontribs.org/explorer'
        mp_site = 'https://materialsproject.org/materials'
        mask = ['content.data', 'content.structures', 'identifier']
        search = request.args.get('q')
        page = int(request.args.get('page', 1))
        PER_PAGE_MAX = current_app.config['PER_PAGE_MAX']
        per_page = int(request.args.get('per_page', PER_PAGE_MAX))
        per_page = PER_PAGE_MAX if per_page > PER_PAGE_MAX else per_page
        order = request.args.get('order')
        sort_by = request.args.get('sort_by', 'identifier')
        general_columns = ['identifier', 'id']
        user_columns = request.args.get('columns', '').split(',')
        objects = Contributions.objects(project=project).only(*mask)

        # default user_columns
        sample = objects.first()['content']['data']
        data_keys = sorted(
            list(
                k.rsplit('.', 1)[0] if k.endswith('.display') else k
                for k, v in nested_to_record(sample, sep='.').items()
                if not k.endswith('.value') and not k.endswith('.unit')))
        if not data_keys:
            return {
                'total_count': 0,
                'total_pages': 0,
                'page': 1,
                'last_page': 1,
                'per_page': per_page,
                'items': []
            }
        formula_key_exists = bool('formula' in data_keys)
        if formula_key_exists:
            general_columns.append('formula')
        else:
            # test whether search key exists in all docs and is not a number/object
            search_key = data_keys[0].replace('.', '__')
            q1 = {f'content__data__{search_key}__exists': False}
            q2 = {f'content__data__{search_key}__type': 'object'}
            if objects(Q(**q1) | Q(**q2)).count() < 1:
                general_columns.append(data_keys[0])
            else:
                general_columns.append('formula')

        if not user_columns[0]:
            if formula_key_exists:
                data_keys.remove('formula')
            user_columns = data_keys if 'formula' in general_columns else data_keys[
                1:]

        # add units to column names
        units = [
            objects.distinct(f'content.data.{col}.unit')
            for col in user_columns
        ]
        columns = general_columns + [
            '{} [{}]'.format(col, units[idx][0]) if units[idx] else col
            for idx, col in enumerate(user_columns)
        ]

        # search and sort
        if search is not None:
            kwargs = {
                f'content__data__{general_columns[-1]}__exists': True,
                f'content__data__{general_columns[-1]}__contains': search
            }
            objects = objects(Q(identifier__contains=search) | Q(**kwargs))
        sort_by_key = sort_by
        if ' ' in sort_by and sort_by[-1] == ']':
            sort_by = sort_by.split(' ')[0]  # remove unit
            sort_by_key = f'content.data.{sort_by}.value'
        elif sort_by in columns[2:]:
            sort_by_key = f'content.data.{sort_by}'
        order_sign = '-' if order == 'desc' else '+'
        order_by = f"{order_sign}{sort_by_key}"
        objects = objects.order_by(order_by)

        # generate table page
        items = []
        for doc in objects.paginate(page=page, per_page=per_page).items:
            mp_id = doc['identifier']
            contrib = nested_to_record(doc['content']['data'], sep='.')
            search_value = contrib.get(general_columns[-1],
                                       mp_id).replace(' ', '')
            row = [
                f"{mp_site}/{mp_id}", f"{explorer}/{doc['id']}", search_value
            ]

            for idx, col in enumerate(user_columns):
                cell = ''
                if 'CIF' in col:
                    structures = doc['content']['structures']
                    if '.' in col:  # grouped columns
                        sname = '.'.join(col.split(
                            '.')[:-1])  # remove CIF string from field name
                        for d in structures:
                            if d['name'] == sname:
                                cell = f"{explorer}/{d['id']}.cif"
                                break
                    elif structures:
                        cell = f"{explorer}/{structures[0]['id']}.cif"
                else:
                    cell = contrib.get(col + '.value', contrib.get(col, ''))
                row.append(str(cell))

            items.append(dict(zip(columns, row)))

        total_count = objects.count()
        total_pages = int(total_count / per_page)
        if total_pages % per_page:
            total_pages += 1

        return {
            'total_count': total_count,
            'total_pages': total_pages,
            'page': page,
            'last_page': total_pages,
            'per_page': per_page,
            'items': items
        }
Пример #6
0
def make(projects=None, cids=None, force=False):
    """build the notebook / details page"""
    start = time.perf_counter()
    remaining_time = rq.default_timeout - 5
    mask = ["id", "needs_build", "notebook"]
    query = Q()

    if projects:
        query &= Q(project__in=projects)
    if cids:
        query &= Q(id__in=cids)
    if not force:
        query &= Q(needs_build=True) | Q(needs_build__exists=False)

    job = get_current_job()
    ret = {"input": {"projects": projects, "cids": cids, "force": force}}
    if job:
        ret["job"] = {
            "id": job.id,
            "enqueued_at": job.enqueued_at.isoformat(),
            "started_at": job.started_at.isoformat()
        }

    exclude = list(Contributions._fields.keys())
    documents = Contributions.objects(query).exclude(*exclude).only(*mask)
    total = documents.count()
    count = 0

    for idx, document in enumerate(documents):
        stop = time.perf_counter()
        remaining_time -= stop - start

        if remaining_time < 0:
            if job:
                restart_kernels()

            ret["result"] = {
                "status": "TIMEOUT",
                "count": count,
                "total": total
            }
            return ret

        start = time.perf_counter()

        if not force and document.notebook and \
                not getattr(document, "needs_build", True):
            continue

        if document.notebook:
            try:
                nb = Notebooks.objects.get(id=document.notebook.id)
                nb.delete()
                document.update(unset__notebook="")
                logger.debug(f"Notebook {document.notebook.id} deleted.")
            except DoesNotExist:
                pass

        cid = str(document.id)
        logger.debug(f"prep notebook for {cid} ...")
        document.reload("tables", "structures", "attachments")

        cells = [
            # define client only once in kernel
            # avoids API calls for regex expansion for query parameters
            nbf.new_code_cell("\n".join([
                "if 'client' not in locals():",
                "\tclient = Client(",
                f'\t\theaders={{"X-Authenticated-Groups": "{ADMIN_GROUP}"}},',
                f'\t\thost="{MPCONTRIBS_API_HOST}"',
                "\t)",
                "print(client.get_totals())",
                # return something. See while loop in `run_cells`
            ])),
            nbf.new_code_cell("\n".join([
                f'c = client.get_contribution("{document.id}")', 'c.display()'
            ])),
        ]

        if document.tables:
            cells.append(nbf.new_markdown_cell("## Tables"))
            for table in document.tables:
                cells.append(
                    nbf.new_code_cell("\n".join(
                        [f't = client.get_table("{table.id}")',
                         't.display()'])))

        if document.structures:
            cells.append(nbf.new_markdown_cell("## Structures"))
            for structure in document.structures:
                cells.append(
                    nbf.new_code_cell("\n".join([
                        f's = client.get_structure("{structure.id}")',
                        's.display()'
                    ])))

        if document.attachments:
            cells.append(nbf.new_markdown_cell("## Attachments"))
            for attachment in document.attachments:
                cells.append(
                    nbf.new_code_cell("\n".join([
                        f'a = client.get_attachment("{attachment.id}")',
                        'a.info()'
                    ])))

        try:
            outputs = execute_cells(cid, cells)
        except Exception as e:
            if job:
                restart_kernels()

            ret["result"] = {
                "status": "ERROR",
                "cid": cid,
                "count": count,
                "total": total,
                "exc": str(e)
            }
            return ret

        if not outputs:
            if job:
                restart_kernels()

            ret["result"] = {
                "status": "ERROR: NO OUTPUTS",
                "cid": cid,
                "count": count,
                "total": total
            }
            return ret

        for idx, output in outputs.items():
            cells[idx]["outputs"] = output

        doc = nbf.new_notebook()
        doc["cells"] = [
            nbf.new_code_cell("from mpcontribs.client import Client"),
            nbf.new_code_cell(f'client = Client()'),
        ]
        doc["cells"] += cells[1:]  # skip localhost Client

        try:
            nb = Notebooks(**doc).save()
            document.update(notebook=nb, needs_build=False)
        except Exception as e:
            if job:
                restart_kernels()

            ret["result"] = {
                "status": "ERROR",
                "cid": cid,
                "count": count,
                "total": total,
                "exc": str(e)
            }
            return ret

        count += 1

    if total and job:
        restart_kernels()

    ret["result"] = {"status": "COMPLETED", "count": count, "total": total}
    return ret
Пример #7
0
    def get(self, project):
        """Retrieve a table of contributions for a project.
        ---
        operationId: get_table
        parameters:
            - name: project
              in: path
              type: string
              pattern: '^[a-zA-Z0-9_]{3,30}$'
              required: true
              description: project name/slug
            - name: columns
              in: query
              type: array
              items:
                  type: string
              required: true
              description: comma-separated list of column names to tabulate
            - name: page
              in: query
              type: integer
              default: 1
              description: page to retrieve (in batches of `per_page`)
            - name: per_page
              in: query
              type: integer
              default: 20
              minimum: 2
              maximum: 20
              description: number of results to return per page
            - name: q
              in: query
              type: string
              description: substring to search for in formula
            - name: order
              in: query
              type: string
              description: sort ascending or descending
              enum: [asc, desc]
            - name: sort_by
              in: query
              type: string
              description: column name to sort by
        responses:
            200:
                description: paginated table response in backgrid format
                schema:
                    type: string
        """
        # config and parameters
        explorer = 'http://localhost:8080/explorer' if current_app.config['DEBUG'] \
            else 'https://portal.mpcontribs.org/explorer'
        mp_site = 'https://materialsproject.org/materials'
        mask = ['content.data', 'content.structures', 'identifier']
        search = request.args.get('q')
        page = int(request.args.get('page', 1))
        per_page = int(request.args.get('per_page', PER_PAGE_MAX))
        per_page = PER_PAGE_MAX if per_page > PER_PAGE_MAX else per_page
        order = request.args.get('order')
        sort_by = request.args.get('sort_by')
        general_columns = ['identifier', 'id', 'formula']
        user_columns = request.args.get('columns', '').split(',')
        columns = general_columns + user_columns
        grouped_columns = [
            list(padded(col.split('##'), n=2)) for col in user_columns
        ]

        # query, projection and search
        objects = Contributions.objects(project=project).only(*mask)
        if search is not None:
            objects = objects(content__data__formula__contains=search)

        # sorting
        sort_by_key = sort_by if sort_by in general_columns[:2] else f'content.data.{sort_by}'
        order_sign = '-' if order == 'desc' else '+'
        order_by = f"{order_sign}{sort_by_key}"
        objects = objects.order_by(order_by)

        # generate table page
        cursor, items = None, []
        for doc in objects.paginate(page=page, per_page=per_page).items:
            mp_id = doc['identifier']
            contrib = doc['content']['data']
            formula = contrib['formula'].replace(' ', '')
            row = [f"{mp_site}/{mp_id}", f"{explorer}/{doc['id']}", formula]

            for idx, (k, sk) in enumerate(grouped_columns):
                cell = ''
                if k == 'CIF' or sk == 'CIF':
                    if cursor is None:
                        cursor = objects.aggregate(
                            *get_pipeline('content.structures'))
                        struc_names = dict(
                            (str(item["_id"]), item.get("keys", []))
                            for item in cursor)
                    snames = struc_names.get(str(doc['id']))
                    if snames:
                        if k == 'CIF':
                            cell = f"{explorer}/{doc['id']}/{snames[0]}.cif"
                        else:
                            for sname in snames:
                                if k in sname:
                                    cell = f"{explorer}/{doc['id']}/{sname}.cif"
                                    break
                else:
                    if sk is None:
                        cell = contrib.get(k, '')
                    else:
                        cell = contrib.get(k, {sk: ''}).get(sk, '')
                # move unit to column header and only append value to row
                value, unit = padded(cell.split(), fillvalue='', n=2)
                if unit and unit not in user_columns[idx]:
                    user_columns[idx] += f' [{unit}]'
                row.append(value)

            columns = general_columns + user_columns  # rewrite after update
            items.append(dict(zip(columns, row)))

            # row_jarvis = [mp_id, cid_url, contrib['formula']]
            # for k in columns_jarvis[len(general_columns):]:
            #     if k == columns_jarvis[-1]:
            #         row_jarvis.append(cif_urls[keys[1]])
            #     else:
            #         row_jarvis.append(contrib.get(keys[1], {k: ''}).get(k, ''))
            # if row_jarvis[3]:
            #     data_jarvis.append((mp_id, row_jarvis))

        total_count = objects.count()
        total_pages = int(total_count / per_page)
        if total_pages % per_page:
            total_pages += 1

        #    return [
        #        Table.from_items(data, orient='index', columns=columns),
        #        Table.from_items(data_jarvis, orient='index', columns=columns_jarvis)
        #    ]
        return {
            'total_count': total_count,
            'total_pages': total_pages,
            'page': page,
            'last_page': total_pages,
            'per_page': per_page,
            'items': items
        }
Пример #8
0
    def post_save(cls, sender, document, **kwargs):
        admin_email = current_app.config["MAIL_DEFAULT_SENDER"]
        admin_topic = current_app.config["MAIL_TOPIC"]
        scheme = "http" if current_app.config["DEBUG"] else "https"

        if kwargs.get("created"):
            ts = current_app.config["USTS"]
            email_project = [document.owner, document.name]
            token = ts.dumps(email_project)
            link = url_for("projects.applications",
                           token=token,
                           _scheme=scheme,
                           _external=True)
            subject = f'New project "{document.name}"'
            hours = int(current_app.config["USTS_MAX_AGE"] / 3600)
            doc_yaml = yaml.dump(document.to_mongo().to_dict(),
                                 indent=4,
                                 sort_keys=False)
            html = render_template("admin_email.html",
                                   doc=doc_yaml,
                                   link=link,
                                   hours=hours)
            send_email(admin_topic, subject, html)
            resp = sns_client.create_topic(
                Name=f"mpcontribs_{document.name}",
                Attributes={"DisplayName": f"MPContribs {document.title}"},
            )
            endpoint = document.owner.split(":", 1)[1]
            sns_client.subscribe(TopicArn=resp["TopicArn"],
                                 Protocol="email",
                                 Endpoint=endpoint)
        else:
            delta_set, delta_unset = document._delta()

            if "is_approved" in delta_set and document.is_approved:
                subject = f'Your project "{document.name}" has been approved'
                netloc = urllib.parse.urlparse(request.url).netloc.replace(
                    "-api", "")
                portal = f"{scheme}://{netloc}"
                html = render_template("owner_email.html",
                                       approved=True,
                                       admin_email=admin_email,
                                       host=portal,
                                       project=document.name)
                topic_arn = ":".join(
                    admin_topic.split(":")[:-1] +
                    ["mpcontribs_" + document.name])
                send_email(topic_arn, subject, html)

            if "columns" in delta_set or "columns" in delta_unset or (
                    not delta_set and not delta_unset):
                from mpcontribs.api.contributions.document import Contributions, COMPONENTS

                columns = {}
                ncontribs = Contributions.objects(project=document.id).count()

                if "columns" in delta_set:
                    # document.columns updated by the user as intended
                    for col in document.columns:
                        columns[col.path] = col
                elif "columns" in delta_unset or ncontribs:
                    # document.columns unset by user to reinit all columns from DB
                    # -> get paths and units across all contributions from DB
                    group = {
                        "_id": "$project",
                        "merged": {
                            "$mergeObjects": "$data"
                        }
                    }
                    pipeline = [{
                        "$match": {
                            "project": document.id
                        }
                    }, {
                        "$group": group
                    }]
                    result = list(Contributions.objects.aggregate(pipeline))
                    merged = {} if not result else result[0]["merged"]
                    flat = flatten(remap(merged, visit=visit, enter=enter),
                                   reducer="dot")

                    for k, v in flat.items():
                        path = f"data.{k}"
                        columns[path] = Column(path=path)
                        if v is not None:
                            columns[path].unit = v

                # set min/max for all number columns
                min_max_paths = [
                    path for path, col in columns.items()
                    if col["unit"] != "NaN"
                ]
                group = {"_id": None}

                for path in min_max_paths:
                    field = f"{path}{delimiter}value"
                    for k in ["min", "max"]:
                        clean_path = path.replace(delimiter, "__")
                        key = f"{clean_path}__{k}"
                        group[key] = {f"${k}": f"${field}"}

                pipeline = [{
                    "$match": {
                        "project": document.id
                    }
                }, {
                    "$group": group
                }]
                result = list(Contributions.objects.aggregate(pipeline))
                min_max = {} if not result else result[0]

                for clean_path in min_max_paths:
                    for k in ["min", "max"]:
                        path = clean_path.replace(delimiter, "__")
                        m = min_max.get(f"{path}__{k}")
                        if m is not None:
                            setattr(columns[clean_path], k, m)

                # update stats
                stats_kwargs = {
                    "columns": len(columns),
                    "contributions": ncontribs
                }

                for component in COMPONENTS.keys():
                    pipeline = [{
                        "$match": {
                            "project": document.id,
                            component: {
                                "$exists": True,
                                "$not": {
                                    "$size": 0
                                }
                            }
                        }
                    }, {
                        "$count": "count"
                    }]
                    result = list(Contributions.objects.aggregate(pipeline))

                    if result:
                        stats_kwargs[component] = result[0]["count"]
                        columns[component] = Column(path=component)
                    else:
                        stats_kwargs[component] = 0

                stats = Stats(**stats_kwargs)
                document.update(stats=stats, columns=columns.values())
Пример #9
0
    def get(self, project):
        """Retrieve a table of contributions for a project.
        ---
        operationId: get_table
        parameters:
            - name: project
              in: path
              type: string
              pattern: '^[a-zA-Z0-9_]{3,30}$'
              required: true
              description: project name/slug
            - name: columns
              in: query
              type: array
              items:
                  type: string
              required: true
              description: comma-separated list of column names to tabulate
            - name: page
              in: query
              type: integer
              default: 1
              description: page to retrieve (in batches of `per_page`)
            - name: per_page
              in: query
              type: integer
              default: 20
              minimum: 2
              maximum: 20
              description: number of results to return per page
            - name: q
              in: query
              type: string
              description: substring to search for in formula
            - name: order
              in: query
              type: string
              description: sort ascending or descending
              enum: [asc, desc]
            - name: sort_by
              in: query
              type: string
              description: column name to sort by
        responses:
            200:
                description: paginated table response in backgrid format
                schema:
                    type: string
        """
        # config and parameters
        explorer = 'http://localhost:8080/explorer' if current_app.config['DEBUG'] \
            else 'https://portal.mpcontribs.org/explorer'
        mp_site = 'https://materialsproject.org/materials'
        mask = ['content.data', 'content.structures', 'identifier']
        search = request.args.get('q')
        page = int(request.args.get('page', 1))
        per_page = int(request.args.get('per_page', PER_PAGE_MAX))
        per_page = PER_PAGE_MAX if per_page > PER_PAGE_MAX else per_page
        order = request.args.get('order')
        sort_by = request.args.get('sort_by')
        general_columns = ['identifier', 'id', 'formula']
        user_columns = request.args.get('columns', '').split(',')
        columns = general_columns + user_columns
        grouped_columns = [list(padded(col.split('##'), n=2)) for col in user_columns]

        # query, projection and search
        objects = Contributions.objects(project=project).only(*mask)
        if search is not None:
            objects = objects(content__data__formula__contains=search)

        # sorting
        sort_by_key = sort_by if sort_by in general_columns[:2] else f'content.data.{sort_by}'
        order_sign = '-' if order == 'desc' else '+'
        order_by = f"{order_sign}{sort_by_key}"
        objects = objects.order_by(order_by)

        # generate table page
        cursor, items = None, []
        for doc in objects.paginate(page=page, per_page=per_page).items:
            mp_id = doc['identifier']
            contrib = doc['content']['data']
            formula = contrib['formula'].replace(' ', '')
            row = [f"{mp_site}/{mp_id}", f"{explorer}/{doc['id']}", formula]

            for idx, (k, sk) in enumerate(grouped_columns):
                cell = ''
                if k == 'CIF' or sk == 'CIF':
                    if cursor is None:
                        cursor = objects.aggregate(*get_pipeline('content.structures'))
                        struc_names = dict(
                            (str(item["_id"]), item.get("keys", []))
                            for item in cursor
                        )
                    snames = struc_names.get(str(doc['id']))
                    if snames:
                        if k == 'CIF':
                            cell = f"{explorer}/{doc['id']}/{snames[0]}.cif"
                        else:
                            for sname in snames:
                                if k in sname:
                                    cell = f"{explorer}/{doc['id']}/{sname}.cif"
                                    break
                else:
                    if sk is None:
                        cell = contrib.get(k, '')
                    else:
                        cell = contrib.get(k, {sk: ''}).get(sk, '')
                # move unit to column header and only append value to row
                value, unit = padded(cell.split(), fillvalue='', n=2)
                if unit and unit not in user_columns[idx]:
                    user_columns[idx] += f' [{unit}]'
                row.append(value)

            columns = general_columns + user_columns # rewrite after update
            items.append(dict(zip(columns, row)))

            # row_jarvis = [mp_id, cid_url, contrib['formula']]
            # for k in columns_jarvis[len(general_columns):]:
            #     if k == columns_jarvis[-1]:
            #         row_jarvis.append(cif_urls[keys[1]])
            #     else:
            #         row_jarvis.append(contrib.get(keys[1], {k: ''}).get(k, ''))
            # if row_jarvis[3]:
            #     data_jarvis.append((mp_id, row_jarvis))

        total_count = objects.count()
        total_pages = int(total_count/per_page)
        if total_pages%per_page:
            total_pages += 1

        #    return [
        #        Table.from_items(data, orient='index', columns=columns),
        #        Table.from_items(data_jarvis, orient='index', columns=columns_jarvis)
        #    ]
        return {
            'total_count': total_count, 'total_pages': total_pages, 'page': page,
            'last_page': total_pages, 'per_page': per_page, 'items': items
        }