Exemplo n.º 1
0
 def set_error_data(block_index, message):
     interpreted_blocks[block_index] = (MarkupBlockSchema().load({
         'id':
         block_index,
         'text':
         'ERROR: ' + message
     }).data)
Exemplo n.º 2
0
    def flush_bundles():
        """
        Having collected bundles in |bundle_infos|, flush them into |blocks|,
        potentially as a single table depending on the mode.
        """
        if len(bundle_infos) == 0:
            return

        def raise_genpath_usage_error():
            raise UsageError('Expected \'% display ' + mode +
                             ' (genpath)\', but got \'% display ' +
                             ' '.join([mode] + args) + '\'')

        # Print out the curent bundles somehow
        mode = current_display[0]
        args = current_display[1:]
        if mode == 'hidden':
            pass
        elif mode == 'contents' or mode == 'image':
            for item_index, bundle_info in bundle_infos:
                if is_missing(bundle_info):
                    blocks.append(MarkupBlockSchema().load({
                        'text': 'ERROR: cannot access bundle',
                        'error': True
                    }).data)
                    continue

                # Parse arguments
                if len(args) == 0:
                    raise_genpath_usage_error()
                # these two are required for the target
                (bundle_uuid,
                 target_genpath) = genpath_to_target(bundle_info, args[0])
                properties = parse_properties(args[1:])

                block_object = {
                    'target_genpath':
                    target_genpath,
                    'bundles_spec':
                    BundleUUIDSpecSchema().load(
                        BundleUUIDSpecSchema.create_json([bundle_info])).data,
                    'status':
                    FetchStatusSchema.get_unknown_status(),
                }

                if mode == 'contents':
                    try:
                        block_object['max_lines'] = int(
                            properties.get('maxlines',
                                           DEFAULT_CONTENTS_MAX_LINES))
                    except ValueError:
                        raise UsageError("maxlines must be integer")
                    blocks.append(
                        BundleContentsBlockSchema().load(block_object).data)
                elif mode == 'image':
                    block_object['width'] = properties.get('width', None)
                    block_object['height'] = properties.get('height', None)
                    blocks.append(
                        BundleImageBlockSchema().load(block_object).data)
        elif mode == 'record':
            # display record schema =>
            # key1: value1
            # key2: value2
            # ...
            schema = get_schema(args)
            for item_index, bundle_info in bundle_infos:
                header = ('key', 'value')
                rows = []
                for (name, genpath, post) in schema:
                    rows.append(RecordsRowSchema().load({
                        'key':
                        name + ':',
                        'value':
                        apply_func(post,
                                   interpret_genpath(bundle_info, genpath)),
                    }).data)
                blocks.append(RecordsBlockSchema().load({
                    'bundles_spec':
                    BundleUUIDSpecSchema().load(
                        BundleUUIDSpecSchema.create_json([bundle_info])).data,
                    'status':
                    FetchStatusSchema.get_unknown_status(),
                    'header':
                    header,
                    'rows':
                    rows,
                }).data)
        elif mode == 'table':
            # display table schema =>
            # key1       key2
            # b1_value1  b1_value2
            # b2_value1  b2_value2
            schema = get_schema(args)
            header = tuple(name for (name, genpath, post) in schema)
            rows = []
            processed_bundle_infos = []
            # Cache the mapping between owner_id to owner on current worksheet
            owner_cache = {}
            for item_index, bundle_info in bundle_infos:
                if 'metadata' in bundle_info:
                    rows.append({
                        name: apply_func(
                            post,
                            interpret_genpath(bundle_info,
                                              genpath,
                                              db_model=db_model,
                                              owner_cache=owner_cache),
                        )
                        for (name, genpath, post) in schema
                    })
                    processed_bundle_infos.append(copy.deepcopy(bundle_info))
                else:
                    # The front-end relies on the name metadata field existing
                    processed_bundle_info = copy.deepcopy(bundle_info)
                    processed_bundle_info['metadata'] = {'name': '<invalid>'}
                    rows.append({
                        name: apply_func(
                            post,
                            interpret_genpath(processed_bundle_info, genpath))
                        for (name, genpath, post) in schema
                    })
                    processed_bundle_infos.append(processed_bundle_info)

            blocks.append(TableBlockSchema().load({
                'bundles_spec':
                BundleUUIDSpecSchema().load(
                    BundleUUIDSpecSchema.create_json(
                        processed_bundle_infos)).data,
                'status':
                FetchStatusSchema.get_unknown_status(),
                'header':
                header,
                'rows':
                rows,
            }).data)

        elif mode == 'graph':
            # display graph <genpath> <properties>
            if len(args) == 0:
                raise_genpath_usage_error()
            # trajectories is list of {
            #   'uuid': ...,
            #   'display_name': ..., # What to show as the description of a bundle
            #   'target': (bundle_uuid, subpath)
            # }
            properties = parse_properties(args[1:])

            trajectories = [{
                'bundle_uuid':
                bundle_info['uuid'],
                'display_name':
                interpret_genpath(bundle_info,
                                  properties.get('display_name', 'name')),
                'target_genpath':
                genpath_to_target(bundle_info, args[0])[1],
            } for item_index, bundle_info in bundle_infos]

            try:
                max_lines = int(
                    properties.get('maxlines', DEFAULT_CONTENTS_MAX_LINES))
            except ValueError:
                raise UsageError("maxlines must be integer")

            blocks.append(GraphBlockSchema().load({
                'trajectories':
                trajectories,
                'bundles_spec':
                BundleUUIDSpecSchema().load(
                    BundleUUIDSpecSchema.create_json([
                        bundle_infos[0][1]
                    ])).data,  # Only show the first one for now
                # 'bundles_spec': BundleUUIDSpecSchema().load(BundleUUIDSpecSchema.create_json(
                #     [copy.deepcopy(bundle_info) for item_index, bundle_info in bundle_infos]).data,
                'max_lines':
                max_lines,
                'xlabel':
                properties.get('xlabel', None),
                'ylabel':
                properties.get('ylabel', None),
            }).data)
        else:
            raise UsageError('Unknown display mode: %s' % mode)
        bundle_infos[:] = []  # Clear
Exemplo n.º 3
0
def interpret_items(schemas, raw_items, db_model=None):
    """
    Interpret different items based on their types.
    :param schemas: initial mapping from name to list of schema items (columns of a table)
    :param raw_items: list of (raw) worksheet items (triples) to interpret
    :param db_model: database model which is used to query database
    :return: {'items': interpreted_items, ...}, where interpreted_items is a list of:
    {
        'mode': display mode ('markup' | 'contents' | 'image' | 'html', etc.)
        'interpreted': one of
            - rendered string
            - target = (bundle_uuid, genpath)
            - (header = (col1, ..., coln), rows = [{col1:value1, ..., coln:valuen}, ...]) [for tables]
            - {keywords: [...]} for mode = 'search' or 'wsearch'
        'properties': dict of properties (e.g., width, maxlines, etc.),
        'bundle_info': bundle_info or list of bundle_infos,
        'subworksheet_info': subworksheet,
    }
    In addition, return an alignment between the raw items and the interpreted items.
    Each interpreted item has a focusIndex, and possibly consists of a list of
    table rows (indexed by subFocusIndex).  Here is an example:
      --- Raw ---                   --- Interpreted ---
      rawIndex                                         (focusIndex, subFocusIndex)
      0        % display table
      1        [bundle]             [table - row 0     (0, 0)
      2        [bundle]                    - row 1]    (0, 1)
      3
      4        hello                [markup            (1, 0)
      5        world                       ]
      6        [worksheet]          [worksheet]        (2, 0)
      7
    The mapping should be computed as follows:
    - Some raw items contribute directly to a particular interpreted item.
    - Others (blank lines, directives, schema definitions) don't.
    - Those that don't should get mapped to the next interpreted item.
    """
    raw_to_block = []  # rawIndex => (focusIndex, subFocusIndex)

    # Set default schema
    current_schema = None
    default_display = ('table', 'default')
    current_display = default_display
    blocks = []
    bundle_infos = []
    worksheet_infos = []

    def get_schema(args):  # args is a list of schema names
        args = args if len(args) > 0 else ['default']
        schema = []
        for arg in args:
            # If schema doesn't exist, then treat as item (e.g., uuid).
            schema += schemas.get(
                arg, canonicalize_schema_items([arg.split(':', 2)]))
        return schema

    def is_missing(info):
        return 'metadata' not in info

    def parse_properties(args):
        properties = {}
        for item in args:
            if '=' not in item:
                raise UsageError('Expected <key>=<value>, but got %s' % item)
            key, value = item.split('=', 1)
            properties[key] = value
        return properties

    def genpath_to_target(bundle_info, genpath):
        # bundle_info, '/stdout' => target = (uuid, 'stdout')
        if not is_file_genpath(genpath):
            raise UsageError('Not file genpath: %s' % genpath)
        # strip off the leading / from genpath to create a subpath in the target.
        return (bundle_info['uuid'], genpath[1:])

    def flush_bundles():
        """
        Having collected bundles in |bundle_infos|, flush them into |blocks|,
        potentially as a single table depending on the mode.
        """
        if len(bundle_infos) == 0:
            return

        def raise_genpath_usage_error():
            raise UsageError('Expected \'% display ' + mode +
                             ' (genpath)\', but got \'% display ' +
                             ' '.join([mode] + args) + '\'')

        # Print out the curent bundles somehow
        mode = current_display[0]
        args = current_display[1:]
        if mode == 'hidden':
            pass
        elif mode == 'contents' or mode == 'image':
            for item_index, bundle_info in bundle_infos:
                if is_missing(bundle_info):
                    blocks.append(MarkupBlockSchema().load({
                        'text': 'ERROR: cannot access bundle',
                        'error': True
                    }).data)
                    continue

                # Parse arguments
                if len(args) == 0:
                    raise_genpath_usage_error()
                # these two are required for the target
                (bundle_uuid,
                 target_genpath) = genpath_to_target(bundle_info, args[0])
                properties = parse_properties(args[1:])

                block_object = {
                    'target_genpath':
                    target_genpath,
                    'bundles_spec':
                    BundleUUIDSpecSchema().load(
                        BundleUUIDSpecSchema.create_json([bundle_info])).data,
                    'status':
                    FetchStatusSchema.get_unknown_status(),
                }

                if mode == 'contents':
                    try:
                        block_object['max_lines'] = int(
                            properties.get('maxlines',
                                           DEFAULT_CONTENTS_MAX_LINES))
                    except ValueError:
                        raise UsageError("maxlines must be integer")
                    blocks.append(
                        BundleContentsBlockSchema().load(block_object).data)
                elif mode == 'image':
                    block_object['width'] = properties.get('width', None)
                    block_object['height'] = properties.get('height', None)
                    blocks.append(
                        BundleImageBlockSchema().load(block_object).data)
        elif mode == 'record':
            # display record schema =>
            # key1: value1
            # key2: value2
            # ...
            schema = get_schema(args)
            for item_index, bundle_info in bundle_infos:
                header = ('key', 'value')
                rows = []
                for (name, genpath, post) in schema:
                    rows.append(RecordsRowSchema().load({
                        'key':
                        name + ':',
                        'value':
                        apply_func(post,
                                   interpret_genpath(bundle_info, genpath)),
                    }).data)
                blocks.append(RecordsBlockSchema().load({
                    'bundles_spec':
                    BundleUUIDSpecSchema().load(
                        BundleUUIDSpecSchema.create_json([bundle_info])).data,
                    'status':
                    FetchStatusSchema.get_unknown_status(),
                    'header':
                    header,
                    'rows':
                    rows,
                }).data)
        elif mode == 'table':
            # display table schema =>
            # key1       key2
            # b1_value1  b1_value2
            # b2_value1  b2_value2
            schema = get_schema(args)
            header = tuple(name for (name, genpath, post) in schema)
            rows = []
            processed_bundle_infos = []
            # Cache the mapping between owner_id to owner on current worksheet
            owner_cache = {}
            for item_index, bundle_info in bundle_infos:
                if 'metadata' in bundle_info:
                    rows.append({
                        name: apply_func(
                            post,
                            interpret_genpath(bundle_info,
                                              genpath,
                                              db_model=db_model,
                                              owner_cache=owner_cache),
                        )
                        for (name, genpath, post) in schema
                    })
                    processed_bundle_infos.append(copy.deepcopy(bundle_info))
                else:
                    # The front-end relies on the name metadata field existing
                    processed_bundle_info = copy.deepcopy(bundle_info)
                    processed_bundle_info['metadata'] = {'name': '<invalid>'}
                    rows.append({
                        name: apply_func(
                            post,
                            interpret_genpath(processed_bundle_info, genpath))
                        for (name, genpath, post) in schema
                    })
                    processed_bundle_infos.append(processed_bundle_info)

            blocks.append(TableBlockSchema().load({
                'bundles_spec':
                BundleUUIDSpecSchema().load(
                    BundleUUIDSpecSchema.create_json(
                        processed_bundle_infos)).data,
                'status':
                FetchStatusSchema.get_unknown_status(),
                'header':
                header,
                'rows':
                rows,
            }).data)

        elif mode == 'graph':
            # display graph <genpath> <properties>
            if len(args) == 0:
                raise_genpath_usage_error()
            # trajectories is list of {
            #   'uuid': ...,
            #   'display_name': ..., # What to show as the description of a bundle
            #   'target': (bundle_uuid, subpath)
            # }
            properties = parse_properties(args[1:])

            trajectories = [{
                'bundle_uuid':
                bundle_info['uuid'],
                'display_name':
                interpret_genpath(bundle_info,
                                  properties.get('display_name', 'name')),
                'target_genpath':
                genpath_to_target(bundle_info, args[0])[1],
            } for item_index, bundle_info in bundle_infos]

            try:
                max_lines = int(
                    properties.get('maxlines', DEFAULT_CONTENTS_MAX_LINES))
            except ValueError:
                raise UsageError("maxlines must be integer")

            blocks.append(GraphBlockSchema().load({
                'trajectories':
                trajectories,
                'bundles_spec':
                BundleUUIDSpecSchema().load(
                    BundleUUIDSpecSchema.create_json([
                        bundle_infos[0][1]
                    ])).data,  # Only show the first one for now
                # 'bundles_spec': BundleUUIDSpecSchema().load(BundleUUIDSpecSchema.create_json(
                #     [copy.deepcopy(bundle_info) for item_index, bundle_info in bundle_infos]).data,
                'max_lines':
                max_lines,
                'xlabel':
                properties.get('xlabel', None),
                'ylabel':
                properties.get('ylabel', None),
            }).data)
        else:
            raise UsageError('Unknown display mode: %s' % mode)
        bundle_infos[:] = []  # Clear

    def flush_worksheets():
        if len(worksheet_infos) == 0:
            return

        blocks.append(SubworksheetsBlock().load({
            'subworksheet_infos':
            copy.deepcopy(worksheet_infos)
        }).data)

        worksheet_infos[:] = []

    # Go through all the raw items...
    last_was_empty_line = False
    for raw_index, item in enumerate(raw_items):
        new_last_was_empty_line = True
        try:
            (bundle_info, subworksheet_info, value_obj, item_type, id,
             sort_key) = item

            is_bundle = item_type == TYPE_BUNDLE
            is_search = item_type == TYPE_DIRECTIVE and get_command(
                value_obj) == 'search'
            is_directive = item_type == TYPE_DIRECTIVE
            is_worksheet = item_type == TYPE_WORKSHEET

            if not is_bundle:
                flush_bundles()

            if not is_worksheet:
                flush_worksheets()

            # Reset display to minimize long distance dependencies of directives
            if not (is_bundle or is_search):
                current_display = default_display

            # Reset schema to minimize long distance dependencies of directives
            if not is_directive:
                current_schema = None

            if item_type == TYPE_BUNDLE:
                raw_to_block.append((len(blocks), len(bundle_infos)))
                bundle_infos.append((raw_index, bundle_info))
            elif item_type == TYPE_WORKSHEET:
                raw_to_block.append((len(blocks), len(worksheet_infos)))
                worksheet_infos.append(subworksheet_info)
            elif item_type == TYPE_MARKUP:
                new_last_was_empty_line = value_obj == ''
                if (len(blocks) > 0
                        and blocks[-1]['mode'] == BlockModes.markup_block
                        and not last_was_empty_line
                        and not new_last_was_empty_line):
                    # Join with previous markup item
                    blocks[-1]['text'] += '\n' + value_obj
                    # Ids
                    blocks[-1]['ids'] = blocks[-1].get('ids', [])
                    blocks[-1]['ids'].append(id)
                    blocks[-1]['sort_keys'] = blocks[-1].get('sort_keys', [])
                    blocks[-1]['sort_keys'].append(sort_key)
                elif not new_last_was_empty_line:
                    block = (MarkupBlockSchema().load({
                        'id': len(blocks),
                        'text': value_obj,
                        'ids': [id],
                        'sort_keys': [sort_key],
                    }).data)
                    blocks.append(block)
                # Important: set raw_to_block after so we can focus on current item.
                if new_last_was_empty_line:
                    raw_to_block.append(None)
                else:
                    raw_to_block.append((len(blocks) - 1, 0))
            elif item_type == TYPE_DIRECTIVE:
                command = get_command(value_obj)
                if command == '%' or command == '' or command is None:
                    # Comment
                    pass
                elif command == 'schema':
                    # Start defining new schema
                    if len(value_obj) < 2:
                        raise UsageError("`schema` missing name")
                    name = value_obj[1]
                    schemas[name] = current_schema = []
                elif command == 'addschema':
                    # Add to schema
                    if current_schema is None:
                        raise UsageError(
                            "`addschema` must be preceded by `schema` directive"
                        )
                    if len(value_obj) < 2:
                        raise UsageError("`addschema` missing name")
                    name = value_obj[1]
                    current_schema += schemas[name]
                elif command == 'add':
                    # Add to schema
                    if current_schema is None:
                        raise UsageError(
                            "`add` must be preceded by `schema` directive")
                    schema_item = canonicalize_schema_item(value_obj[1:])
                    current_schema.append(schema_item)
                elif command == 'display':
                    # Set display
                    current_display = value_obj[1:]
                else:
                    raise UsageError("unknown directive `%s`" % command)

                raw_to_block.append(None)
            else:
                raise RuntimeError('Unknown worksheet item type: %s' %
                                   item_type)

            # Flush bundles once more at the end
            if raw_index == len(raw_items) - 1:
                flush_bundles()
                flush_worksheets()

        except UsageError as e:
            current_schema = None
            bundle_infos[:] = []
            worksheet_infos[:] = []
            blocks.append(MarkupBlockSchema().load({
                'text':
                'Error in source line %d: %s' % (raw_index + 1, str(e)),
                'error':
                True
            }).data)

            raw_to_block.append((len(blocks) - 1, 0))

        except Exception:
            current_schema = None
            bundle_infos[:] = []
            worksheet_infos[:] = []
            import traceback

            traceback.print_exc()
            blocks.append(MarkupBlockSchema().load({
                'text':
                'Unexpected error while parsing line %d' % (raw_index + 1),
                'error':
                True,
            }).data)

            raw_to_block.append((len(blocks) - 1, 0))

        finally:
            last_was_empty_line = new_last_was_empty_line

    # TODO: fix inconsistencies resulting from UsageErrors thrown in flush_bundles()
    if len(raw_to_block) != len(raw_items):
        print(
            "WARNING: Length of raw_to_block does not match length of raw_items",
            file=sys.stderr)

    # Package the result
    block_to_raw = {}
    next_interpreted_index = None
    # Go in reverse order so we can assign raw items that map to None to the next interpreted item
    for raw_index, interpreted_index in reversed(list(
            enumerate(raw_to_block))):
        if interpreted_index is None:  # e.g., blank line, directive
            interpreted_index = next_interpreted_index
            raw_to_block[raw_index] = interpreted_index
        else:
            interpreted_index_str = str(interpreted_index[0]) + ',' + str(
                interpreted_index[1])
            if interpreted_index_str not in block_to_raw:  # Bias towards the last item
                block_to_raw[interpreted_index_str] = raw_index
        next_interpreted_index = interpreted_index

    # Return the result
    result = {}
    result['blocks'] = blocks
    result['raw_to_block'] = raw_to_block
    result['block_to_raw'] = block_to_raw
    return result