Exemplos de UrlNoAmp em Python, exemplos de lib_util.UrlNoAmp em Python

Exemplo n.º 1

0

Exibir arquivo

Arquivo: lib_naming.py Projeto: vchateauneu/survol

def ParseEntitySurvolUri(uprs, longDisplay, force_entity_ip_addr):
    # sys.stderr.write("KnownScriptToTitle filScript=%s uprs=%s\n"%(filScript,str(uprs)))
    # uprs=ParseResult(
    #   scheme=u'http',
    #   netloc=u'127.0.0.1:8000',
    #   path=u'/survol/survolcgi.py',
    #   params='',
    #   query=u'script=/entity.py&amp;amp;xid=Win32_UserAccount.Domain=rchateau-HP,Name=rchateau',
    #   fragment='')
    # Maybe the script is run in the CGI script.
    # If so, we have to rebuild a valid URL.
    uprsQuery = uprs.query
    # Apparently the URL might contain "&amp;amp;" and "&" playing the same role.
    # It does not matter as it is purely cosmetic.
    # uprsQuery = uprsQuery.replace("&amp;amp;","&")
    uprsQuery = lib_util.UrlNoAmp(uprsQuery)
    spltCgiArgs = uprsQuery.split("&")
    #spltCgiArgs = uprsQuery.split("&amp;amp;")
    queryRebuild = ""
    queryDelim = "?"
    scriptRebuilt = None
    for oneSplt in spltCgiArgs:
        spltKV = oneSplt.split("=")
        # sys.stderr.write("spltKV=%s\n"%spltKV)
        if spltKV[0] == "script":
            scriptRebuilt = "=".join(spltKV[1:])
        else:
            queryRebuild += queryDelim + oneSplt
            queryDelim = "&"

    if scriptRebuilt:
        urlRebuilt = uprs.scheme + "://" + uprs.netloc + scriptRebuilt + queryRebuild
        # sys.stderr.write("ParseEntitySurvolUri urlRebuilt=%s\n"%(urlRebuilt))

        # ( labText, subjEntityGraphicClass, entity_id)
        return ParseEntityUri(urlRebuilt, longDisplay, force_entity_ip_addr)
    else:
        return ("Incomplete CGI script:" + str(uprs),
                "Unknown subjEntityGraphicClass", "Unknown entity_id")

Exemplo n.º 2

0

Exibir arquivo

Arquivo: lib_export_html.py Projeto: rchateauneu/survol

def _display_class_objects_no_jinja(dict_subj_prop_obj):
    """The subjects must be sorted by their title."""
    tuples_subjects_list = []
    for a_subj in dict_subj_prop_obj:
        subj_str = str(a_subj)
        subj_title, entity_graphic_class, entity_id = lib_naming.ParseEntityUri(
            subj_str)
        if subj_title:
            # The intention is to detect a specific test case with accented characters.
            if subj_title[0] == 'Y' and subj_title.find("Boulogne"):
                logging.debug("_display_class_objects_no_jinja subj_str=%s" %
                              subj_str)
                logging.debug("_display_class_objects_no_jinja subj_title=%s" %
                              subj_title)
                continue
        else:
            logging.debug("NO TITLE FOR %s" % subj_str)
        tuples_subjects_list.append(
            (a_subj, subj_str, subj_title, entity_graphic_class, entity_id))

    # Sorted by the title of the subject, which is the third value of the tuple.
    lib_util.natural_sort_list(tuples_subjects_list, key=lambda tup: tup[2])

    # Apparently, a problem is that "%" gets transformed into an hexadecimal number, preventing decoding.
    def _custom_decode_hex(the_str):
        the_str = lib_util.survol_unescape(the_str)
        return the_str.replace("%25", "%").replace("%2F", "/").replace(
            "%5C", "\\").replace("%3A", ":")

    # Now it iterates on the sorted list.
    # This reuses all the intermediate values.
    for a_subj, subj_str, subj_title, entity_graphic_class, entity_id in tuples_subjects_list:
        # FIXME: This is a specific test to catch a specific condition...
        if a_subj.find("Boulogne") >= 0 or subj_str.find(
                "Boulogne") >= 0 or subj_title.find("Boulogne") >= 0:
            logging.debug("a_subj=%s" % a_subj)
            logging.debug("subj_str=%s" % subj_str)
            logging.debug("subj_title=%s" % subj_title)
            continue

        dict_pred = dict_subj_prop_obj[a_subj]

        # Total number of lines.
        cnt_preds = 0
        for a_pred in dict_pred:
            lst_objs = dict_pred[a_pred]
            cnt_preds += len(lst_objs)

        must_write_col_one_subj = True

        subj_str_with_mode = _url_in_html_mode(subj_str)

        # The predicates, i.e. the properties associated a subject with an object,
        # must be alphabetically sorted.
        for a_pred in lib_util.natural_sorted(dict_pred):
            lst_objs = dict_pred[a_pred]

            pred_str = lib_exports.AntiPredicateUri(str(a_pred))
            cnt_objs = len(lst_objs)
            must_write_col_one_pred = True

            # The objects must be sorted by title.
            lst_tuples_objs = []
            for an_obj in lst_objs:
                obj_str = str(an_obj)
                obj_str = _custom_decode_hex(obj_str)
                obj_title = lib_naming.ParseEntityUri(obj_str)[0]
                lst_tuples_objs.append((an_obj, obj_str, obj_title))

            # Sorted by the title of the object, which is the third value of the tuple.
            lib_util.natural_sort_list(lst_tuples_objs, key=lambda tup: tup[2])

            for an_obj, obj_str, obj_title in lst_tuples_objs:
                # FIXME: This is a specific test to catch a specific condition...
                if an_obj.find("Boulogne") >= 0 or obj_str.find(
                        "Boulogne") >= 0 or obj_title.find("Boulogne") >= 0:
                    logging.debug("an_obj=%s" % an_obj)
                    logging.debug("obj_str=%s" % obj_str)
                    logging.debug("obj_title=%s" % obj_title)
                    continue

                # Nasty encoding errors. Probably the string should have been encoded before.
                if lib_util.isPlatformWindows and not lib_util.is_py3:
                    try:
                        dummy_str = obj_title.decode('ascii')
                    except UnicodeDecodeError as exc:
                        logging.debug("obj_title=%s" % obj_title)
                        obj_title = obj_title.decode('cp1252')

                yield '<tr>'

                if must_write_col_one_subj:
                    yield (
                        '<td valign="top" rowspan="%s"><a href="%s">%s</a></td>'
                        % (str(cnt_preds), subj_str_with_mode, subj_title))
                    must_write_col_one_subj = False

                if must_write_col_one_pred:
                    if a_pred not in _list_props_td_double_col_span:
                        yield '<td valign="top" rowspan="%s">%s</td>' % (
                            str(cnt_objs), pred_str)
                    must_write_col_one_pred = False

                if a_pred in _list_props_td_double_col_span:
                    col_span = 2
                else:
                    col_span = 1

                disp_mime_urls = True

                yield '<td colspan="%d">' % col_span
                if disp_mime_urls:
                    if lib_kbase.IsLink(an_obj):
                        obj_str_clean = lib_util.UrlNoAmp(obj_str)
                        mimeType = lib_mime.get_mime_type_from_url(
                            obj_str_clean)
                        if mimeType:
                            if mimeType.startswith("image/"):
                                yield (
                                    """<a href="%s"><img src="%s" alt="%s" height="42" width="42"></a>"""
                                    % (obj_str, obj_str, obj_title))
                            else:
                                yield """<a href="%s">%s</a>""" % (obj_str,
                                                                   obj_title)
                        else:
                            url_with_mode = lib_util.url_mode_replace(
                                obj_str, "html")
                            yield """<a href="%s">%s</a>""" % (url_with_mode,
                                                               obj_title)
                    else:
                        yield '%s' % obj_str
                else:
                    if lib_kbase.IsLink(an_obj):
                        url_with_mode = _url_in_html_mode(obj_str)
                        yield '<a href="%s">%s</a>' % (url_with_mode,
                                                       obj_title)
                    else:
                        yield '%s' % obj_str

                yield "</td>"
                yield "</tr>"

Exemplo n.º 3

0

Exibir arquivo

Arquivo: lib_export_html.py Projeto: vchateauneu/survol

def DispClassObjects(dictSubjPropObj):
    listPropsTdDoubleColSpan = [
        pc.property_information, pc.property_rdf_data_nolist2,
        pc.property_rdf_data_nolist1
    ]

    WrtAsUtf('<table class="class_objects">')

    # The subjects must be sorted by their title.
    lstTuplesSubjects = []
    for aSubj in dictSubjPropObj:
        subj_str = str(aSubj)
        (subj_title, entity_graphic_class,
         entity_id) = lib_naming.ParseEntityUri(subj_str)
        lstTuplesSubjects.append(
            (aSubj, subj_str, subj_title, entity_graphic_class, entity_id))

    # Sorted by the title of the subject, which is the third value of the tuple.
    lib_util.natural_sort_list(lstTuplesSubjects, key=lambda tup: tup[2])

    # Now it iterates on the sorted list.
    # This reuses all the intermediate values.
    for aSubj, subj_str, subj_title, entity_graphic_class, entity_id in lstTuplesSubjects:
        dictPred = dictSubjPropObj[aSubj]

        arrayGraphParams = lib_patterns.TypeToGraphParams(entity_graphic_class)
        # "Graphic_shape","Graphic_colorfill","Graphic_colorbg","Graphic_border","Graphic_is_rounded"
        colorClass = arrayGraphParams[1]

        # Total number of lines.
        cntPreds = 0
        for aPred in dictPred:
            lstObjs = dictPred[aPred]
            cntPreds += len(lstObjs)

        mustWriteColOneSubj = True

        subj_str_with_mode = UrlInHtmlMode(subj_str)

        # The predicates, i.e. the properties associated a subject with an object,
        # must be alphabetically sorted.
        for aPred in lib_util.natural_sorted(dictPred):
            lstObjs = dictPred[aPred]

            predStr = lib_exports.AntiPredicateUri(str(aPred))
            cntObjs = len(lstObjs)
            mustWriteColOnePred = True

            # The objects must be sorted by title.
            lstTuplesObjs = []
            for anObj in lstObjs:
                obj_str = str(anObj)
                obj_str = DesHex(obj_str)
                obj_title = lib_naming.ParseEntityUri(obj_str)[0]
                lstTuplesObjs.append((anObj, obj_str, obj_title))

            # Sorted by the title of the object, which is the third value of the tuple.
            lib_util.natural_sort_list(lstTuplesObjs, key=lambda tup: tup[2])

            for anObj, obj_str, obj_title in lstTuplesObjs:

                WrtAsUtf('<tr bgcolor="%s">' % colorClass)

                if mustWriteColOneSubj:
                    WrtAsUtf(
                        '<td valign="top" rowspan="%s"><a href="%s">%s</a></td>'
                        % (str(cntPreds), subj_str_with_mode, subj_title))
                    mustWriteColOneSubj = False

                if mustWriteColOnePred:
                    if aPred not in listPropsTdDoubleColSpan:
                        WrtAsUtf('<td valign="top" rowspan="%s">%s</td>' %
                                 (str(cntObjs), predStr))
                    mustWriteColOnePred = False

                if aPred in listPropsTdDoubleColSpan:
                    colSpan = 2
                else:
                    colSpan = 1

                dispMimeUrls = True

                WrtAsUtf('<td colspan="%d">' % (colSpan))
                if dispMimeUrls:
                    if lib_kbase.IsLink(anObj):
                        objStrClean = lib_util.UrlNoAmp(obj_str)
                        mimeType = lib_mime.GetMimeTypeFromUrl(objStrClean)
                        if mimeType:
                            if mimeType.startswith("image/"):
                                WrtAsUtf(
                                    """<a href="%s"><img src="%s" alt="%s" height="42" width="42"></a>"""
                                    % (obj_str, obj_str, obj_title))
                            else:
                                WrtAsUtf("""<a href="%s">%s</a>""" %
                                         (obj_str, obj_title))
                        else:
                            url_with_mode = lib_util.AnyUriModed(
                                obj_str, "html")
                            WrtAsUtf("""<a href="%s">%s</a>""" %
                                     (url_with_mode, obj_title))
                    else:
                        WrtAsUtf('%s' % (obj_str))
                else:
                    if lib_kbase.IsLink(anObj):
                        url_with_mode = UrlInHtmlMode(obj_str)
                        WrtAsUtf('<a href="%s">%s</a>' %
                                 (url_with_mode, obj_title))
                    else:
                        WrtAsUtf('%s' % (obj_str))

                WrtAsUtf("</td>")

                WrtAsUtf("</tr>")

    WrtAsUtf(" </table>")

Exemplo n.º 4

0

Exibir arquivo

Arquivo: lib_naming.py Projeto: rchateauneu/survol

def parse_entity_uri_with_host(uri_with_mode, long_display=True, force_entity_ip_addr=None):
    """
    This does two different things from an instance URI:
    - Split an URL into its class, key-value params and host.
    - And calculate a displayable string for this instance.

    This is used notably when transforming RDF into dot documents.
    The returned entity type is used for example choosing graphic attributes
    and gives more information than the simple entity type.

    Example:
    (labText, entity_graphic_class, entity_id) = lib_naming.ParseEntityUri(the_url)

    TODO: It could be split into two different functions.
    """

    # Maybe there is a host name before the entity type. It can contain letters, numbers,
    # hyphens, dots etc... but no ":" or "@".
    # THIS CANNOT WORK WITH IPV6 ADDRESSES...
    # WE MAY USE SCP SYNTAX: scp -6 osis@\[2001:db8:0:1\]:/home/osis/test.file ./test.file

    # This conversion because it might be called with rdflib.term.URIRef or rdflib.term.Literal
    # without explicit conversion, or unicode if py2.
    # This is not really in issue since this conversion is straightforward.
    non_str_type = six.binary_type if lib_util.is_py3 else six.text_type
    if isinstance(uri_with_mode, (rdflib.term.Literal, rdflib.term.URIRef, non_str_type)):
        uri_with_mode = str(uri_with_mode)
    assert isinstance(uri_with_mode, str)

    # This replaces "&amp;" by "&" up to two times if needed.
    uri_with_mode_clean = lib_util.UrlNoAmp(uri_with_mode)

    uprs = lib_util.survol_urlparse(uri_with_mode_clean)

    uprs_query = uprs.query
    uprs_query_split_cgi = uprs_query.split("&")
    uprs_query_dict = {k: v for k, _, v in (l.partition("=") for l in uprs_query_split_cgi)}
    cgi_arg_xid = uprs_query_dict.get("xid", None)
    uri_mode = uprs_query_dict.get("mode", "")
    associator_attribute = uprs_query_dict.get("__associator_attribute__", None)

    # Default value.
    entity_host = ""

    fil_script = os.path.basename(uprs.path)

    # This works for the scripts:
    # entity.py            xid=namespace/type:idGetNamespaceType
    # objtypes_wbem.py     Just extracts the namespace, as it prefixes the type: xid=namespace/type:id

    # See variable lib_util.xidCgiDelimiter="?xid="
    # Possibly, the "xid" parameter does not come at the beginning.
    # Only the first "=" delimiter counts for the CGI variable.
    # if uprs.query.startswith("xid="):
    if cgi_arg_xid is not None:
        # TODO: Maybe the chain contains HTML codes and therefore cannot be parsed.
        # Ex: "xid=%40%2F%3Aoracle_package." == "xid=@/:oracle_package."
        # entity_type, entity_id, entity_host = lib_util.ParseXid(uprs.query[4:])
        entity_type, entity_id, entity_host = lib_util.ParseXid(cgi_arg_xid)

        entity_graphic_class = entity_type

        entity_label = _calc_label(entity_host, entity_type, entity_id, force_entity_ip_addr, fil_script)

        # TODO: Consider external_url_to_title, similar logic with different results.
        if long_display:
            entity_label = _known_script_to_title(fil_script, uri_mode, entity_host, entity_label)

    # Maybe an internal script, but not entity.py
    # It has a special entity type as a display parameter
    elif uri_with_mode_clean.startswith(lib_util.uriRoot):
        # This is a bit of a special case which allows to display something if we know only
        # the type of the entity but its id is undefined. Instead of displaying nothing,
        # this attempts to display all available entities of this given type.
        # source_top/enumerate_process.py etc... Not "." because this has a special role in Python.
        mtch_enumerate = re.match(r"^.*/enumerate_([a-z0-9A-Z_]*)\.py$", uri_with_mode_clean)
        if mtch_enumerate:
            entity_graphic_class = mtch_enumerate.group(1)
            entity_id = ""
            # TODO: Change this label, not very nice.
            # This indicates that a specific script can list all objects of a given entity type.
            entity_label = entity_graphic_class + " enumeration"
        else:
            entity_graphic_class = "provider_script"
            entity_id = ""
            entity_label = _known_script_to_title(fil_script, uri_mode)
    elif uri_with_mode_clean.split(':')[0] in ["ftp", "http", "https", "urn", "mail"]:
        # Standard URLs. Example: lib_common.NodeUrl( "http://www.google.com" )
        entity_graphic_class = ""
        entity_id = ""
        # Display the complete URL, otherwise it is not clickable.
        entity_label = uri_with_mode
    else:
        entity_graphic_class = ""
        # This specific keyword used when no class is specified and there is no object. It is easy to spot.
        # It happens for example for blank nodes, BNode, used to created literal values with a key:
        # Arguments of a function, successive values with a time-stamp.
        entity_id = "PLAINTEXTONLY"

        # Maybe an external URI sending data in RDF, HTML etc...
        # We could also load the URL and gets its title if it is in HTML.
        basna = lib_util.EncodeUri(fil_script)
        if uprs.netloc != "":
            entity_label = uprs.netloc + "/" + basna
        else:
            entity_label = basna

        # TODO: " " are replaced by "%20". Why ? So change back.
        entity_label = entity_label.replace("%20", " ")

    assert isinstance(entity_graphic_class, str)

    if associator_attribute:
        entity_label = associator_attribute.replace(".", " ") + " of " + entity_label

    return entity_label, entity_graphic_class, entity_id, entity_host

Exemplo n.º 5

0

Exibir arquivo

def ParseEntityUri(uriWithMode, longDisplay=True, force_entity_ip_addr=None):
    #sys.stderr.write("ParseEntityUri uriWithMode=%s\n"%uriWithMode)

    # Maybe there is a host name before the entity type. It can contain letters, numbers,
    # hyphens, dots etc... but no ":" or "@".
    # THIS CANNOT WORK WITH IPV6 ADDRESSES...
    # WE MAY USE SCP SYNTAX: scp -6 osis@\[2001:db8:0:1\]:/home/osis/test.file ./test.file

    # In the URI, we might have the CGI parameter "&mode=json". It must be removed otherwise
    # it could be taken in entity_id, and the result of EntityToLabel() would be wrong.
    uriWithModeClean = lib_util.UrlNoAmp(uriWithMode)
    uri = lib_util.AnyUriModed(uriWithModeClean, "")
    uriMode = lib_util.GetModeFromUrl(uriWithModeClean)

    uprs = lib_util.survol_urlparse(uri)

    filScript = os.path.basename(uprs.path)
    # sys.stderr.write("ParseEntityUri filScript=%s\n"%filScript)

    # This works for the scripts:
    # entity.py            xid=namespace/type:idGetNamespaceType
    # objtypes_wbem.py     Just extracts the namespace, as it prefixes the type: xid=namespace/type:id

    # See variable lib_util.xidCgiDelimiter="?xid="
    if uprs.query.startswith("xid="):
        # TODO: Maybe the chain contains HTML codes and therefore cannot be parsed.
        # Ex: "xid=%40%2F%3Aoracle_package." == "xid=@/:oracle_package."
        (entity_type, entity_id,
         entity_host) = lib_util.ParseXid(uprs.query[4:])

        entity_graphic_class = entity_type

        entity_label = CalcLabel(entity_host, entity_type, entity_id,
                                 force_entity_ip_addr, filScript)

        # TODO: Consider external_url_to_title, similar logic with different results.
        if longDisplay:
            entity_label = KnownScriptToTitle(filScript, uriMode, entity_host,
                                              entity_label)

    # Maybe an internal script, but not entity.py
    # It has a special entity type as a display parameter
    elif uri.startswith(lib_util.uriRoot):
        # This is a bit of a special case which allows to display something if we know only
        # the type of the entity but its id is undefined. Instead of displaying nothing,
        # this attempts to display all available entities of this given type.
        # source_top/enumerate_process.py etc... Not "." because this has a special role in Python.
        mtch_enumerate = re.match(r"^.*/enumerate_([a-z0-9A-Z_]*)\.py$", uri)
        if mtch_enumerate:
            entity_graphic_class = mtch_enumerate.group(1)
            entity_id = ""
            # TODO: Change this label, not very nice.
            # This indicates that a specific script can list all objects of a given entity type.
            entity_label = entity_graphic_class + " enumeration"
        else:
            entity_graphic_class = "provider_script"
            entity_id = ""

            entity_label = KnownScriptToTitle(filScript, uriMode)

    elif uri.split(':')[0] in ["ftp", "http", "https", "urn", "mail"]:
        # Standard URLs. Example: lib_common.NodeUrl( "http://www.google.com" )
        entity_graphic_class = ""
        entity_id = ""
        # Display the complete URL, otherwise it is not clickable.
        entity_label = uriWithMode  # uri # uri.split('/')[2]

    else:
        entity_graphic_class = ""
        entity_id = "PLAINTEXTONLY"
        entity_label = UriToTitle(uprs)
        # TODO: " " are replaced by "%20". Why ? So change back.
        entity_label = entity_label.replace("%20", " ")

    return (entity_label, entity_graphic_class, entity_id)