示例#1
0
文件: main.py 项目: mediatum/mediatum
def show_node(req):
    """ opens administration window with content """

    p = req.path[1:].split("/")
    style = req.params.get("style", u"")
    user = current_user

    v = {}
    v["user"] = user
    v["guestuser"] = get_guest_user().login_name
    v["version"] = core.__version__
    v["content"] = show_content(req, p[0])
    v["navigation"] = adminNavigation()
    v["breadcrumbs"] = getMenuItemID(v["navigation"], req.path[1:])

    spc = [
        Menu("sub_header_frontend", u"/"),
        Menu("sub_header_edit", u"/edit"),
        Menu("sub_header_logout", u"/logout")
    ]

    if user.is_workflow_editor:
        spc.append(Menu("sub_header_workflow", u"../publish/"))

    v["spc"] = spc

    if len(p) > 0:
        if style == "":
            req.writeTAL("web/admin/frame.html", v, macro="frame")
        else:
            req.write(v["content"])
示例#2
0
def show_node(req):
    """ opens administration window with content """

    p = req.path[1:].split("/")
    style = req.params.get("style", u"")
    user = current_user

    v = {}
    v["user"] = user
    v["guestuser"] = get_guest_user().login_name
    v["version"] = core.__version__
    v["content"] = show_content(req, p[0])
    v["navigation"] = adminNavigation()
    v["breadcrumbs"] = getMenuItemID(v["navigation"], req.path[1:])

    spc = [
        Menu("sub_header_frontend", u"/"),
        Menu("sub_header_edit", u"/edit"),
        Menu("sub_header_logout", u"/logout")
    ]

    if user.is_workflow_editor:
        spc.append(Menu("sub_header_workflow", u"../publish/"))

    v["spc"] = spc

    if len(p) > 0:
        if style == "":
            req.writeTAL("web/admin/frame.html", v, macro="frame")
        else:
            req.write(v["content"])
示例#3
0
def create():
    """
    Creates the sitemap files and the sitemap index files which are located at /web/root/
    """
    logging.getLogger('everything').info('Creating Sitemaps and Sitemap Index...')
    from core.users import get_guest_user

    base_dir = os.path.abspath(os.path.join(os.path.dirname(__file__), os.pardir))
    hostname = config.get('host.name')

    root = q(Collections).one()
    guest_user = get_guest_user()
    all_nodes = root.all_children_by_query(q(Node)).filter_read_access(user=guest_user)
    sitemaps = []

    node_dict = {'collection': [],
                 'directory': [],
                 'document': [],
                 'dissertation': [],
                 'image': [],
                 'video': [],
                 'audio': [],
    }

    for node in all_nodes:
        # Arkitekt had a guest field that is actually not visible
        if node.has_read_access(user=guest_user):
            for node_type in node_dict.keys():
                if node_type in q(Node).get(node.id).type:
                    node_dict[node_type].append((unicode(node.id), q(Node).get(node.id).updatetime))

    # Reassign node_dict to a dict where empty values were removed
    node_dict = dict((k, v) for k, v in node_dict.iteritems() if v)

    # Sitemap can have at most 50k entries
    for key in node_dict.keys():
        if key in ('dissertation', 'document', 'image'):
            priority_level = '1.0'
        elif key == 'videos':
            priority_level = '0.8'
        else:
            priority_level = '0.5'

        # Create multiple sitemaps for node lists > 50k
        if len(node_dict[key]) > 50000:
            partitions = int(ceil((len(node_dict[key]) / 50000.)))
            for partition_number in range(partitions):
                sitemap = Sitemap(base_dir, ''.join(['sitemap-', str(key), str(partition_number), '.xml']), hostname)
                sitemaps.append(sitemap.name)
                sitemap.create_sitemap(node_dict[key][partition_number * 50000:(partition_number + 1) * 50000], priority_level)
        else:
            sitemap = Sitemap(base_dir, ''.join(['sitemap-', key, '.xml']), hostname)
            sitemaps.append(sitemap.name)
            sitemap.create_sitemap(node_dict[key], priority_level)

    siteindex = SitemapIndex(base_dir, 'sitemap-index.xml', hostname)
    now = '+'.join([datetime.datetime.now().strftime('%Y-%m-%dT%H:%M:%S'), '02:00'])
    siteindex.create_sitemap_index(sitemaps, now)

    logging.getLogger('everything').info('Generation of Sitemaps and SitemapIndex Complete')
示例#4
0
    def has_access_to_node_id(node_id,
                              accesstype,
                              user=None,
                              ip=None,
                              date=None):
        # XXX: the database-independent code could move to core.node
        from core import db
        from core.users import get_guest_user

        if user is None:
            user = get_guest_user()

        if user.is_admin:
            return True

        if ip is None:
            ip = IPv4Address("0.0.0.0")

        if date is None:
            date = func.current_date()

        accessfunc = access_funcs[accesstype]
        group_ids = user.group_ids
        access = accessfunc(node_id, group_ids, ip, date)
        return db.session.execute(select([access])).scalar()
示例#5
0
def GetRecord(req):
    if "identifier" in req.params:
        nid = identifier2id(req.params.get("identifier"))
        if nid is None:
            return writeError(req, "idDoesNotExist")
    else:
        return writeError(req, "badArgument")

    metadataformat = req.params.get("metadataPrefix", None)
    if not checkMetaDataFormat(metadataformat):
        return writeError(req, "badArgument")

    node = q(Node).get(nid)
    if node is None:
        return writeError(req, "idDoesNotExist")

    if metadataformat and (metadataformat.lower() in FORMAT_FILTERS.keys()) and not filterFormat(node, metadataformat.lower()):
        return writeError(req, "noPermission")

    if parentIsMedia(node):
        return writeError(req, "noPermission")

    if not node.has_read_access(user=get_guest_user()):
        return writeError(req, "noPermission")

    schema_name = node.getSchema()
    mask = get_oai_export_mask_for_schema_name_and_metadataformat(schema_name, metadataformat)

    req.write('<GetRecord>')
    writeRecord(req, node, metadataformat, mask=mask)
    req.write('</GetRecord>')
    if DEBUG:
        timetable_update(req, "leaving GetRecord")
示例#6
0
def search_nodes(query, mapping_prefix='Z3950_search_'):
    """
    Search nodes that match the query.

    'query' is a tree of QueryBoolNode and QueryMatchNode objects.

    Query root nodes are configured by a naming convention.  The names
    of mappings that starting with the given 'mapping_prefix' must end
    with a node ID, which is then used as root node for the search
    based on that field mapping.
    """

    def get_root_for_mapping(mapping_node):
        name = mapping_node.name
        node_id = name[len(mapping_prefix):]
        node = q(Node).get(node_id)
        return node

    mapping_nodes = q(Mapping).filter(Mapping.name.startswith(mapping_prefix))
    roots_and_mappings = [(get_root_for_mapping(m), m) for m in mapping_nodes]

    if not roots_and_mappings:
        logg.info('no mappings configured, skipping search')
        return []

    logg.debug('using mapping roots: %s', [(n1.id, n2.id) for (n1, n2) in roots_and_mappings])

    # run one search per root node
    node_ids = []
    guest = get_guest_user()
    search_languages = get_service_search_languages()

    for root_node, mapping_node in roots_and_mappings:
        if root_node is None:
            logg.error("Configuration problem detected: Z39.50 search mapping '%s' found, but no matching root node", mapping_node.name)
            continue
        # map query fields to node attributes
        field_mapping = {}
        for field in mapping_node.children:
            field_mapping[field.name] = field.getDescription().split(';')
        # XXX: this is redundant - why build an infix query string
        # XXX: just to parse it afterwards?
        # XXX: better: create search tree and apply it to a query instead of using node.search()
        query_string = query.build_query_string(field_mapping)
        searchtree = search.parse_searchquery_old_style(query_string)
        if query_string is None:
            logg.info('unable to map query: [%r] using mapping %s', query, field_mapping)
            continue
        logg.info('executing query for node %s: %s', root_node.id, query_string)
        for n in root_node.search(searchtree, search_languages).filter_read_access(user=guest):
            node_ids.append(n.id)

    # use a round-robin algorithm to merge the separate query results
    # in order to produce maximally diverse results in the first hits
    # return merge_ids_as_round_robin(node_ids)
    return node_ids
示例#7
0
def ListMetadataFormats(req):
    if "set" in req.params:
        return writeError(req, "badArgument")

    # supported oai metadata formats are configured in section
    # oai.formats in the mediatum.cfg file
    d = config.getsubset('oai')
    formats = [x.strip() for x in d['formats'].split(',') if x.strip()]

    if "identifier" in req.params:
        # list only formats available for the given identifier
        try:
            nid = identifier2id(req.params.get("identifier"))
            if nid is None:
                return writeError(req, "idDoesNotExist")
            node = q(Node).get(nid)
        except (TypeError, KeyError):
            return writeError(req, "badArgument")
        if node is None:
            return writeError(req, "badArgument")

        if not node.has_read_access(user=get_guest_user()):
            return writeError(req, "noPermission")

        formats = [x for x in formats if nodeHasOAIExportMask(node, x.lower())]
        formats = [x for x in formats if filterFormat(node, x.lower())]

    # write xml for metadata formats list
    req.write('\n      <ListMetadataFormats>\n')
    for mdf in formats:
        try:
            req.write("""
             <metadataFormat>
               <metadataPrefix>%s</metadataPrefix>
               <schema>%s</schema>
               <metadataNamespace>%s</metadataNamespace>
             </metadataFormat>
             """ % (mdf, d["schema.%s" % mdf], d["namespace.%s" % mdf]))
        except:
            logg.exception(
                "%s: OAI error reading oai metadata format %s from config file",
                __file__, mdf)
    req.write('\n</ListMetadataFormats>')
    if DEBUG:
        timetable_update(req, "leaving ListMetadataFormats")
示例#8
0
def getAccessRights(node):
    """ Get acccess rights for the public.
    The values returned descend from
    http://wiki.surffoundation.nl/display/standards/info-eu-repo/#info-eu-repo-AccessRights.
    This values are used by OpenAIRE portal.

    """
    try:  # if node.get('updatetime') is empty, the method parse_date would raise an exception
        l_date = parse_date(node.get('updatetime'))
    except:
        l_date = date.now()
    guest_user = get_guest_user()
    if date.now() < l_date:
        return "embargoedAccess"
    elif node.has_read_access(user=guest_user):
        if node.has_data_access(user=guest_user):
            return "openAccess"
        else:
            return "restrictedAccess"
    else:
        return "closedAccess"
示例#9
0
文件: node.py 项目: mediatum/mediatum
    def has_access_to_node_id(node_id, accesstype, user=None, ip=None, date=None):
        # XXX: the database-independent code could move to core.node
        from core import db
        from core.users import get_guest_user

        if user is None:
            user = get_guest_user()

        if user.is_admin:
            return True

        if ip is None:
            ip = IPv4Address("0.0.0.0")

        if date is None:
            date = func.current_date()

        accessfunc = access_funcs[accesstype]
        group_ids = user.group_ids
        access = accessfunc(node_id, group_ids, ip, date)
        return db.session.execute(select([access])).scalar()
示例#10
0
    def getLinks(self):
        guest_user = get_guest_user()
        l = [Link("/logout", t(self.language, "sub_header_logout_title"),
                  t(self.language, "sub_header_logout"), icon="/img/logout.gif")]
        if self.user == guest_user:
            if config.get("config.ssh") == "yes":
                host = config.get("host.name") or self.host
                l = [Link("https://" + host + "/login", t(self.language, "sub_header_login_title"),
                          t(self.language, "sub_header_login"), icon="/img/login.gif")]
            else:
                l = [Link("/login", t(self.language, "sub_header_login_title"),
                          t(self.language, "sub_header_login"), icon="/img/login.gif")]

        if self.is_workflow_area:
            l += [Link("/", t(self.language, "sub_header_frontend_title"),
                       t(self.language, "sub_header_frontend"), icon="/img/frontend.gif")]

        if self.user.is_editor:
            idstr = ""
            if self.id:
                idstr = "?id=" + unicode(self.id)
            # set edit-link to upload_dir if user comes from collections
            if not self.id or int(self.id) == get_collections_node().id:
                if self.user.upload_dir:
                    idstr = "?id=" + unicode(self.user.upload_dir.id)
            l += [Link("/edit" + idstr, t(self.language, "sub_header_edit_title"),
                       t(self.language, "sub_header_edit"), icon="/img/edit.gif")]

        if self.user.is_admin:
            l += [Link("/admin", t(self.language, "sub_header_administration_title"),
                       t(self.language, "sub_header_administration"), icon="/img/admin.gif")]

        if self.user.is_workflow_editor:
            l += [Link("/publish/", t(self.language, "sub_header_workflow_title"),
                       t(self.language, "sub_header_workflow"), icon="/img/workflow.gif")]

        if self.user.can_change_password:
            l += [Link("/pwdchange", t(self.language, "sub_header_changepwd_title"),
                       t(self.language, "sub_header_changepwd"), "_parent", icon="/img/changepwd.gif")]
        return l
示例#11
0
def build_accessfunc_arguments(user=None, ip=None, date=None, req=None):
    """Build the expected arguments for the DB permission procedures has_*_access_to_node()
    IP and date are returned unchanged when passed to this function. 
    For missing arguments, default values are set from request information or current date.
    :returns: 3-tuple of group_ids, ip and date
        For admin users, it returns (None, None, None) which means: ignore all access checks.
        Users can test for this and skip permission checks completely.
    """
    from core.users import get_guest_user

    if user is None and ip is None:
        if req is None:
            req = request

        from core.users import user_from_session

        user = user_from_session(req.session)
        # XXX: like in mysql version, what's the real solution?
        try:
            ip = IPv4Address(req.remote_addr)
        except AddressValueError:
            logg.warn("illegal IP address %s, refusing IP-based access",
                      req.remote_addr)
            ip = None

    if user is None:
        user = get_guest_user()

    # admin sees everything ;)
    if user.is_admin:
        return (None, None, None)

    if ip is None:
        ip = IPv4Address("0.0.0.0")

    if date is None:
        date = sqlfunc.current_date()

    return user.group_ids, ip, date
示例#12
0
def build_accessfunc_arguments(user=None, ip=None, date=None, req=None):
    """Build the expected arguments for the DB permission procedures has_*_access_to_node()
    IP and date are returned unchanged when passed to this function. 
    For missing arguments, default values are set from request information or current date.
    :returns: 3-tuple of group_ids, ip and date
        For admin users, it returns (None, None, None) which means: ignore all access checks.
        Users can test for this and skip permission checks completely.
    """
    from core.users import get_guest_user

    if user is None and ip is None:
        if req is None:
            req = request

        from core.users import user_from_session

        user = user_from_session(req.session)
        # XXX: like in mysql version, what's the real solution?
        try:
            ip = IPv4Address(req.remote_addr)
        except AddressValueError:
            logg.warn("illegal IP address %s, refusing IP-based access", req.remote_addr)
            ip = None

    if user is None:
        user = get_guest_user()

    # admin sees everything ;)
    if user.is_admin:
        return (None, None, None)

    if ip is None:
        ip = IPv4Address("0.0.0.0")
    
    if date is None:
        date = sqlfunc.current_date()

    return user.group_ids, ip, date
示例#13
0
from core.database.postgres.node import t_noderelation
from core.database.postgres import alchemyext

# change this to True in your IPython notebook after running mediatumipython.py
IPYTHON_NOTEBOOK = False


# use default connection specified by mediatum config for ipython-sql magic
SQLMAGICS_CONNECTION_FACTORY = lambda: core.db.connectstr
# TODO: changing the connection string should be possible for the postgres connector, too


from core.users import get_guest_user
try:
    guest_user = get_guest_user()
except:
    guest_user = None

# we don't want to raise warnings for missing node classes, just stub them and be silent
_core_init_loglevel = logging.getLogger("core.init").level
logging.getLogger("core.init").setLevel(logging.ERROR)
initmodule.check_undefined_nodeclasses(stub_undefined_nodetypes=True)
logging.getLogger("core.init").setLevel(_core_init_loglevel)

from core import db, Node, File, NodeToFile
from core import User, UserGroup, AuthenticatorInfo
from core import AccessRule, AccessRuleset, NodeToAccessRule, NodeToAccessRuleset
from core import Fts, Setting
from core import app
示例#14
0
from core.database.postgres.node import t_noderelation
from core.database.postgres import alchemyext

# change this to True in your IPython notebook after running mediatumipython.py
IPYTHON_NOTEBOOK = False


# use default connection specified by mediatum config for ipython-sql magic
SQLMAGICS_CONNECTION_FACTORY = lambda: core.db.connectstr
# TODO: changing the connection string should be possible for the postgres connector, too


from core.users import get_guest_user
try:
    guest_user = get_guest_user()
except:
    guest_user = None

# we don't want to raise warnings for missing node classes, just stub them and be silent
_core_init_loglevel = logging.getLogger("core.init").level
logging.getLogger("core.init").setLevel(logging.ERROR)
initmodule.check_undefined_nodeclasses(stub_undefined_nodetypes=True)
logging.getLogger("core.init").setLevel(_core_init_loglevel)

from core import db, Node, File, NodeToFile
from core import User, UserGroup, AuthenticatorInfo
from core import AccessRule, AccessRuleset, NodeToAccessRule, NodeToAccessRuleset
from core import Fts, Setting
from core import app
示例#15
0
    def getLinks(self):
        guest_user = get_guest_user()
        l = [
            Link("/logout",
                 t(self.language, "sub_header_logout_title"),
                 t(self.language, "sub_header_logout"),
                 icon="/img/logout.gif")
        ]
        if self.user == guest_user:
            if config.get("config.ssh") == "yes":
                host = config.get("host.name") or self.host
                l = [
                    Link("https://" + host + "/login",
                         t(self.language, "sub_header_login_title"),
                         t(self.language, "sub_header_login"),
                         icon="/img/login.gif")
                ]
            else:
                l = [
                    Link("/login",
                         t(self.language, "sub_header_login_title"),
                         t(self.language, "sub_header_login"),
                         icon="/img/login.gif")
                ]

        if self.is_workflow_area:
            l += [
                Link("/",
                     t(self.language, "sub_header_frontend_title"),
                     t(self.language, "sub_header_frontend"),
                     icon="/img/frontend.gif")
            ]

        if self.user.is_editor:
            idstr = ""
            if self.id:
                idstr = "?id=" + unicode(self.id)
            # set edit-link to upload_dir if user comes from collections
            if not self.id or int(self.id) == get_collections_node().id:
                if self.user.upload_dir:
                    idstr = "?id=" + unicode(self.user.upload_dir.id)
            l += [
                Link("/edit" + idstr,
                     t(self.language, "sub_header_edit_title"),
                     t(self.language, "sub_header_edit"),
                     icon="/img/edit.gif")
            ]

        if self.user.is_admin:
            l += [
                Link("/admin",
                     t(self.language, "sub_header_administration_title"),
                     t(self.language, "sub_header_administration"),
                     icon="/img/admin.gif")
            ]

        if self.user.is_workflow_editor:
            l += [
                Link("/publish/",
                     t(self.language, "sub_header_workflow_title"),
                     t(self.language, "sub_header_workflow"),
                     icon="/img/workflow.gif")
            ]

        if self.user.can_change_password:
            l += [
                Link("/pwdchange",
                     t(self.language, "sub_header_changepwd_title"),
                     t(self.language, "sub_header_changepwd"),
                     "_parent",
                     icon="/img/changepwd.gif")
            ]
        return l
示例#16
0
def get_node_data_struct(req,
                         path,
                         params,
                         data,
                         id,
                         debug=True,
                         allchildren=False,
                         singlenode=False,
                         parents=False,
                         send_children=False,
                         fetch_files=False,
                         csv=False):

    res = _prepare_response()
    timetable = res["timetable"]

    # verify signature if a user is given, otherwise use guest user
    if params.get('user'):
        user = _handle_oauth(res, req.fullpath, params, timetable)
    else:
        user = get_guest_user()
        res['oauthuser'] = ''  # username supplied for authentication (login name) in query parameter user

    if user is not None:
        res['username'] = user.login_name
        res['userid'] = user.id
    else:
        res['userid'] = ''  # unique id for authenticated user if applicable (node.id for internal, dirid for dynamic users)
        res['username'] = ''  # name of the user, may be the name of the guest user or a personal name

    result_shortlist = []

    # query parameters
    typefilter = params.get(
        'type', '')  # return only nodes of given type like dissertation/diss
    parent_type = params.get(
        'parent_type', ''
    )  # return only nodes that have only parents of  given type like folder or collection
    # XXX: do we want version support?
    #     send_versions = params.get('send_versions', '').lower()  # return also nodes that are older versions of other nodes
    # return only nodes that have an EXIF location that lies between the given lon,lat values
    exif_location_rect = params.get('exif_location_rect', '')
    mdt_name = params.get('mdt_name', '')
    attrreg = params.get('attrreg', '')
    searchquery = params.get('q', '')  # node query
    sortfield = params.get('sortfield', '')
    sortformat = params.get('sortformat', '')  # 'sissfi'
    limit = params.get("limit", DEFAULT_NODEQUERY_LIMIT)
    offset = params.get("start", 0)
    csv_allchildren = csv and allchildren

    # check node existence
    node = q(Node).get(id)
    if node is None:
        return _client_error_response(404, u"node not found")

    home = get_home_root_node()
    collections = get_collections_node()
    # check node access
    if node.has_read_access(user=user) and (node.is_descendant_of(collections)
                                            or node.is_descendant_of(home)):
        pass
    else:
        return _client_error_response(403, u"forbidden")

    if mdt_name:
        mdt = q(Metadatatype).filter_by(name=mdt_name).count()
        if not mdt:
            return _client_error_response(
                404, u'no such metadata type: ' + mdt_name)

    if allchildren:
        if csv:
            # fetch only those columns which are needed, this is faster than fetch all columns and need less space
            nodequery = node.all_children_by_query(
                q(Node.attrs.label("attributes"), Node.id, Node.name,
                  Node.schema, Node.type))
        else:
            nodequery = node.all_children
    elif parents:
        nodequery = node.parents
    else:
        nodequery = node.children

    if searchquery:
        search_languages = get_service_search_languages()

        try:
            searchtree = search.parse_searchquery_old_style(searchquery)
        except search.SearchQueryException as e:
            return _client_error_response(400, str(e))

        nodequery = apply_searchtree_to_query(nodequery, searchtree,
                                              search_languages)

    if typefilter:
        nodequery = nodequery.filter(
            (Node.type + "/" + Node.schema).op("~")(typefilter))

    if attrreg:
        spl = attrreg.split('=')
        if len(spl) != 2:
            return _client_error_response(400,
                                          "wrong attrreg value: " + attrreg)
        akey, aval = spl

        nodequery = nodequery.filter(Node.attrs[akey].astext.op("~")(aval))

    sortdirection = u""

    if sortfield:
        sfields = [x.strip() for x in sortfield.split(',')]
        sfields_without_sign = []

        sortformat = sortformat[:len(sfields)]

        for sfield, sformat in izip_longest(sfields, sortformat,
                                            fillvalue="s"):
            if sformat == "i":
                astype = Integer
            elif sformat == "f":
                astype = Float
            else:
                astype = Unicode

            if sfield[0] == "-":
                sfield = sfield[1:]
                desc = True
                sortdirection += u"d"
            else:
                desc = False
                sortdirection += u"u"
            sfields_without_sign.append(sfield)

            if sfield == 'node.id':
                order_expr = Node.id
            elif sfield == 'node.name':
                order_expr = Node.name
            elif sfield == 'node.type':
                order_expr = Node.type
            elif sfield == 'node.orderpos':
                order_expr = Node.orderpos
            else:
                order_expr = Node.attrs[sfield].cast(astype)

            if desc:
                order_expr = sql.desc(order_expr)

            nodequery = nodequery.order_by(order_expr.nullslast())

        sfields = sfields_without_sign
    else:
        sfields = []

    ### TODO: do we need this?

    if parent_type:
        raise NotImplementedError("parent_type not supported at the moment")
        # XXX: do we need this?
        pass

    ### actually get the nodes

    if csv_allchildren:
        nodequery = nodequery.order_by('attributes').distinct()
    else:
        nodequery = nodequery.distinct().options(undefer(Node.attrs))

    if fetch_files:
        nodequery = nodequery.options(joinedload(Node.file_objects))

    if singlenode:
        # we already checked that node can be accessed by the user, just return the node
        nodelist = [node]
        node_count = 1
        limit = 1
    else:
        if mdt_name:
            nodequery = nodequery.filter(Node.schema == mdt_name)

        nodequery = nodequery.filter_read_access(user=user)

        if offset:
            nodequery = nodequery.offset(offset)

        if limit:
            nodequery = nodequery.limit(limit)

        atime = time.time()

        try:
            nodelist = nodequery.all()
        except Exception as e:
            return _client_error_response(
                400, "the database failed with the message: {}".format(str(e)))

        node_count = len(nodelist)
        timetable.append([
            'fetching nodes from db returned {} results'.format(node_count),
            time.time() - atime
        ])
        atime = time.time()

    i0 = int(params.get('i0', '0'))
    i1 = int(params.get('i1', node_count))

    def attr_list(node, sfields):
        r = []
        for sfield in sfields:
            r.append([sfield, node.get(sfield)])
        return r

    if 'add_shortlist' in params:
        if sortfield:
            result_shortlist = [[
                i, x.id, x.name, x.type,
                attr_list(x, sfields)
            ] for i, x in enumerate(nodelist)][i0:i1]
            timetable.append([
                'build result_shortlist for %d nodes and %d sortfields' %
                (len(result_shortlist), len(sfields)),
                time.time() - atime
            ])
            atime = time.time()
        else:
            result_shortlist = [[i, x.id, x.name, x.type]
                                for i, x in enumerate(nodelist)][i0:i1]
            timetable.append([
                'build result_shortlist for %d nodes (no sortfield)' %
                len(result_shortlist),
                time.time() - atime
            ])
            atime = time.time()

    ### XXX: filtering in python, should be moved to the database

    if exif_location_rect:
        raise NotImplementedError("not supported at the moment")

        components = exif_location_rect.split(',')

        if len(components) != 4:
            return _client_error_response(
                400, u"exif_location_rect is invalid: {}".format(
                    exif_location_rect))

        nodelist = _exif_location_filter(nodelist, components)

    ### build result

    res['nodelist'] = nodelist
    res['sfields'] = sfields
    res['sortfield'] = sortfield
    res['sortdirection'] = sortdirection
    res['result_shortlist'] = result_shortlist
    res['timetable'] = timetable
    res['nodelist_start'] = offset
    res['nodelist_limit'] = limit
    res['nodelist_count'] = node_count
    res['path'] = req.path
    res['status'] = 'ok'
    res['html_response_code'] = '200'  # ok
    res['build_response_end'] = time.time()
    dataready = "%.3f" % (res['build_response_end'] -
                          res["build_response_start"])
    res['dataready'] = dataready
    return res
示例#17
0
def struct2rss(req,
               path,
               params,
               data,
               struct,
               debug=False,
               singlenode=False,
               send_children=False):
    nodelist = struct['nodelist']
    language = params.get('lang', 'en')
    items_list = []

    host = u"http://" + unicode(_get_header(req, "HOST") or configured_host)
    collections = get_collections_node()
    user = get_guest_user()

    for n in nodelist:
        nodename = n.name
        nodeid = str(n.id)
        updatetime = utime = try_node_date(n)

        # categories to be included in all items - mask generated or not
        default_categories = u'<category>node type: ' + n.type + '/' + n.schema + u'</category>\r\n'

        # check for export mask for this node
        try:
            try:
                mdt = n.metadatatype
            except:
                mdt = None
            mask = mdt.getMask('rss')
            if mask.get('masktype') != 'export':
                mask = None
        except:
            mask = None

        if mask:
            item_xml = u'<item>\r\n' + mask.getViewHTML(
                [n], flags=8) + default_categories + u'\r\n</item>\r\n'
            items_list = items_list + [(updatetime, nodename, nodeid, item_xml)
                                       ]
            continue

        # no rss export mask: build default item from nodesmall mask
        item_d = {}

        browsingPathList = getBrowsingPathList(n)
        browsingPathList = [
            x for x in browsingPathList if x[-1].has_read_access(
                user=user) and x[-1].is_descendant_of(collections)
        ]
        browsingPathList_names = [
            map(lambda x: x.name, browsingPath)
            for browsingPath in browsingPathList
        ]

        # assumption: longest path is most detailled and illustrative for being used in the title
        x = sorted([[len(p), i, p]
                    for i, p in enumerate(browsingPathList_names)])
        x.reverse()
        try:
            most_detailed_path = x[0][2]
        except:  # browsing path list may be empty (for directories, collections, ...)
            most_detailed_path = ''

        item_d['title'] = esc(u"{} ({}, {}/{}) {}".format(
            nodename or u'-unnamed-node-', nodeid, n.type, n.schema,
            u"/".join(most_detailed_path)))
        item_d['item_pubDate'] = utime
        item_d['guid'] = host + u'/node?id=%s' % nodeid
        item_d['link'] = host + u'/node?id=%s' % nodeid

        if mdt:
            lang_mask = mdt.masks.filter(
                Node.name.startswith(u"nodesmall")).filter(
                    Node.a.language == language).first()
            if lang_mask is not None:
                mask = lang_mask
            else:
                mask = mdt.get_mask('nodesmall')
        else:
            mask = None

        if mask is not None:
            attr_list = mask.getViewHTML(
                [n], VIEW_DATA_ONLY,
                language)  # [[attr_name, value, label, type], ...]
        else:
            attr_list = [
                ['', n.id, 'node id', ''],
                ['', n.name, 'node name', ''],
                ['', n.type + "/" + n.schema, 'node type', ''],
            ]

        description = u''
        for x in attr_list:
            description = description + (u'''<b>%s: </b>%s<br/>\r\n''' %
                                         (x[2], x[1]))

        item_d['description'] = description
        categories = default_categories

        for x in browsingPathList_names:
            categories = categories + u'<category>' + esc(
                u'/'.join(x)) + u'</category>\r\n'

        ddcs = n.get('ddc').strip()
        if ddcs.strip():
            ddcs = ddcs.split(';')
            for ddc in ddcs:
                categories = categories + u'<category>' + esc(
                    ddc) + u'</category>\r\n'

        subjects = n.get('subject').strip()
        if subjects:
            subjects = subjects.split(';')
            for subject in subjects:
                categories = categories + u'<category>' + esc(
                    subject) + u'</category>\r\n'

        item_d['categories'] = categories

        for k, v in item_d.items():
            item_d[k] = v

        items_list = items_list + [(updatetime, nodename, nodeid,
                                    (template_rss_item % item_d))]

    if items_list:
        items_list.sort()
        items_list.reverse()

    items = ''
    for x in items_list:
        items += (x[3] + u'\r\n')

    pubDate = lastBuildDate = format_date(format='rfc822')

    struct['dataready'] = (u"%.3f" %
                           (time.time() - struct['build_response_start']))

    fcd = feed_channel_dict.copy()
    fcd['lang'] = u'de'
    fcd['pubdate'] = pubDate
    fcd['lastbuild'] = lastBuildDate
    fcd['link'] = host
    fcd['atom_link'] = host + req.fullpath
    fcd['image_title'] = 'testlogo'
    fcd['image_link'] = host + u'/img/testlogo.png'
    fcd['image_url'] = host + u'/img/testlogo.png'

    if 'feed_info' in params:
        for k, v in params['feed_info'].items():
            fcd[k] = v
    else:
        fcd['title'] = host + req.fullpath + req.query
    fcd['items'] = items
    s = template_rss_channel % fcd  # params['feed_info']

    return s.encode("utf8")
示例#18
0
def search_nodes(query, mapping_prefix='Z3950_search_'):
    """
    Search nodes that match the query.

    'query' is a tree of QueryBoolNode and QueryMatchNode objects.

    Query root nodes are configured by a naming convention.  The names
    of mappings that starting with the given 'mapping_prefix' must end
    with a node ID, which is then used as root node for the search
    based on that field mapping.
    """
    def get_root_for_mapping(mapping_node):
        name = mapping_node.name
        node_id = name[len(mapping_prefix):]
        node = q(Node).get(node_id)
        return node

    mapping_nodes = q(Mapping).filter(Mapping.name.startswith(mapping_prefix))
    roots_and_mappings = [(get_root_for_mapping(m), m) for m in mapping_nodes]

    if not roots_and_mappings:
        logg.info('no mappings configured, skipping search')
        return []

    logg.debug('using mapping roots: %s',
               [(n1.id, n2.id) for (n1, n2) in roots_and_mappings])

    # run one search per root node
    node_ids = []
    guest = get_guest_user()
    search_languages = get_service_search_languages()

    for root_node, mapping_node in roots_and_mappings:
        if root_node is None:
            logg.error(
                "Configuration problem detected: Z39.50 search mapping '%s' found, but no matching root node",
                mapping_node.name)
            continue
        # map query fields to node attributes
        field_mapping = {}
        for field in mapping_node.children:
            field_mapping[field.name] = field.getDescription().split(';')
        # XXX: this is redundant - why build an infix query string
        # XXX: just to parse it afterwards?
        # XXX: better: create search tree and apply it to a query instead of using node.search()
        query_string = query.build_query_string(field_mapping)
        searchtree = search.parse_searchquery_old_style(query_string)
        if query_string is None:
            logg.info('unable to map query: [%r] using mapping %s', query,
                      field_mapping)
            continue
        logg.info('executing query for node %s: %s', root_node.id,
                  query_string)
        for n in root_node.search(
                searchtree, search_languages).filter_read_access(user=guest):
            node_ids.append(n.id)

    # use a round-robin algorithm to merge the separate query results
    # in order to produce maximally diverse results in the first hits
    # return merge_ids_as_round_robin(node_ids)
    return node_ids
示例#19
0
def create():
    """
    Creates the sitemap files and the sitemap index files which are located at /web/root/
    """
    logging.getLogger('everything').info(
        'Creating Sitemaps and Sitemap Index...')
    from core.users import get_guest_user

    base_dir = os.path.abspath(
        os.path.join(os.path.dirname(__file__), os.pardir))
    hostname = config.get('host.name')

    root = q(Collections).one()
    guest_user = get_guest_user()
    all_nodes = root.all_children_by_query(
        q(Node)).filter_read_access(user=guest_user)
    sitemaps = []

    node_dict = {
        'collection': [],
        'directory': [],
        'document': [],
        'dissertation': [],
        'image': [],
        'video': [],
        'audio': [],
    }

    for node in all_nodes:
        # Arkitekt had a guest field that is actually not visible
        if node.has_read_access(user=guest_user):
            for node_type in node_dict.keys():
                if node_type in q(Node).get(node.id).type:
                    node_dict[node_type].append(
                        (unicode(node.id), q(Node).get(node.id).updatetime))

    # Reassign node_dict to a dict where empty values were removed
    node_dict = dict((k, v) for k, v in node_dict.iteritems() if v)

    # Sitemap can have at most 50k entries
    for key in node_dict.keys():
        if key in ('dissertation', 'document', 'image'):
            priority_level = '1.0'
        elif key == 'videos':
            priority_level = '0.8'
        else:
            priority_level = '0.5'

        # Create multiple sitemaps for node lists > 50k
        if len(node_dict[key]) > 50000:
            partitions = int(ceil((len(node_dict[key]) / 50000.)))
            for partition_number in range(partitions):
                sitemap = Sitemap(
                    base_dir, ''.join(
                        ['sitemap-',
                         str(key),
                         str(partition_number), '.xml']), hostname)
                sitemaps.append(sitemap.name)
                sitemap.create_sitemap(
                    node_dict[key][partition_number *
                                   50000:(partition_number + 1) * 50000],
                    priority_level)
        else:
            sitemap = Sitemap(base_dir, ''.join(['sitemap-', key, '.xml']),
                              hostname)
            sitemaps.append(sitemap.name)
            sitemap.create_sitemap(node_dict[key], priority_level)

    siteindex = SitemapIndex(base_dir, 'sitemap-index.xml', hostname)
    now = '+'.join(
        [datetime.datetime.now().strftime('%Y-%m-%dT%H:%M:%S'), '02:00'])
    siteindex.create_sitemap_index(sitemaps, now)

    logging.getLogger('everything').info(
        'Generation of Sitemaps and SitemapIndex Complete')
示例#20
0
def retrieveNodes(req, setspec, date_from=None, date_to=None, metadataformat=None):
    schemata = []

    nodequery = None
    res = []

    if metadataformat == 'mediatum':
        metadatatypes = q(Metadatatypes).one().children
        schemata = [m.name for m in metadatatypes if m.type == 'metadatatype' and m.name not in ['directory', 'collection']]
    elif metadataformat:
        schemata = get_schemata_for_metadataformat(metadataformat)

    if DEBUG:
        timetable_update(req, "in retrieveNodes: find schemata with export mask for metadata type %s (%d found: '%s')" %
                         (metadataformat.lower(), len(schemata), ustr([x for x in schemata])))

    if setspec:
        nodequery = oaisets.getNodesQueryForSetSpec(setspec, schemata)
        # if for this oai group set no function is defined that retrieve the nodes query, use the filters
        if not nodequery:
            collections_root = q(Collections).one()
            nodequery = collections_root.all_children
            setspecFilter = oaisets.getNodesFilterForSetSpec(setspec, schemata)
            if schemata:
                nodequery = nodequery.filter(Node.schema.in_(schemata))
            if type(setspecFilter) == list:
                for sFilter in setspecFilter:
                    nodequery = nodequery.filter(sFilter)
            else:
                nodequery = nodequery.filter(setspecFilter)
    else:
        collections_root = q(Collections).one()
        nodequery = collections_root.all_children
        nodequery = nodequery.filter(Node.schema.in_(schemata))

    if DEBUG:
        timetable_update(req, "in retrieveNodes: after building NodeList for %d nodes" % (len(res)))

    if date_from:
        nodequery = nodequery.filter(Node.attrs[DATEFIELD].astext >= str(date_from))
        if DEBUG:
            timetable_update(req, "in retrieveNodes: after filtering date_from --> %d nodes" % (len(res)))
    if date_to:
        nodequery = nodequery.filter(Node.attrs[DATEFIELD].astext <= str(date_to))
        if DEBUG:
            timetable_update(req, "in retrieveNodes: after filtering date_to --> %d nodes" % (len(res)))

    if nodequery:
        guest_user = get_guest_user()
        nodequery = nodequery.filter_read_access(user=guest_user)
    else:
        res = [n for n in res if n.has_read_access(user=get_guest_user())]
    if DEBUG:
        timetable_update(req, "in retrieveNodes: after read access filter --> %d nodes" % (len(res)))

    if not nodequery:
        collections = q(Collections).one()
        res = [n for n in res if isDescendantOf(n, collections)]
    if DEBUG:
        timetable_update(req, "in retrieveNodes: after checking descendance from basenode --> %d nodes" % (len(res)))

    # superflous ?!
    #if schemata:
    #    res = [n for n in res if n.getSchema() in schemata]
    #    if DEBUG:
    #        timetable_update(req, "in retrieveNodes: after schemata (%s) filter --> %d nodes" % (ustr(schemata), len(res)))

    if metadataformat and metadataformat.lower() in FORMAT_FILTERS.keys():
        format_string = metadataformat.lower()
        format_filter = FORMAT_FILTERS[format_string]['filterQuery']
        nodequery = nodequery.filter(format_filter)
        #res = [n for n in res if filterFormat(n, format_string)]
        if DEBUG:
            timetable_update(req, "in retrieveNodes: after format (%s) filter --> %d nodes" % (format_string, len(res)))

    if nodequery:
        res = nodequery

    return res