示例#1
0
文件: neo4j_api.py 项目: yf1291/nlp3
    def search_with_triple(self, triple_doc, **kwargs):
        """
        根据三元组查询节点
        :param triple_doc: 三元组信息
        :param kwargs: 扩展信息(关联节点属性描述)
        :return:
        """
        self.debug('>>> start search_with_triple <<<')
        ret = {}
        if triple_doc:
            triple_subject = str2unicode(triple_doc.get('subject', ""))  # 主语
            triple_object = str2unicode(triple_doc.get('object', ""))  # 宾语
            triple_predicates = triple_doc.get('predicate', [])  # 谓语(关系属性)
            query_property = kwargs.get("query_property",
                                        'name')  # 用于查询关联节点的属性值,默认为name
            self.debug(
                'triple_subject=%s, triple_predicates=%s, triple_object=%s, query_property=%s',
                triple_subject,
                json.dumps(triple_predicates,
                           ensure_ascii=False), triple_object, query_property)

            # 查询关系属性库确定关系属性的类型(数据关系还是对象关系)
            predicate_docs = self.property_collection.find(
                {'uri': {
                    '$in': triple_predicates
                }})
            for predicate_item in predicate_docs:  # 遍历谓语关系并检索neo4j
                predicate_type = predicate_item.get('type', '')
                predicate_value = str(predicate_item.get('uri', ''))
                self.debug('predicate_type=%s, predicate_value=%s',
                           predicate_type, predicate_value)

                tmp_ret = {}
                if predicate_type == 'data':  # 谓语为数据关系
                    tmp_ret = self.query_node_property(triple_subject,
                                                       predicate_value,
                                                       triple_object)
                elif predicate_type == 'object':  # 谓语为对象关系
                    tmp_ret = self.query_node_relation(triple_subject,
                                                       predicate_value,
                                                       triple_object,
                                                       query_property)
                else:
                    self.warn(
                        '@@@@@@@@@@@@@@@@@@@@@@@@ unexpected value, predicate_type is None'
                    )
                ret = dict(ret, **tmp_ret)
        else:
            self.warn(
                '@@@@@@@@@@@@@@@@@@@@ unexpected value, triple_doc is None')
        self.debug('>>> end search_with_triple <<<')
        return ret
示例#2
0
def nt_password_hash(passwd,pad_to_21_bytes=True):
    """
   NtPasswordHash(
   IN  0-to-256-unicode-char Password,
   OUT 16-octet              PasswordHash )
   {
      /*
       * Use the MD4 algorithm [5] to irreversibly hash Password
       * into PasswordHash.  Only the password is hashed without
       * including any terminating 0.
       */
    """

    # we have to have UNICODE password
    pw = utils.str2unicode(passwd)

    # do MD4 hash
    md4_context = md4.new()
    md4_context.update(pw)

    res = md4_context.digest()

    if pad_to_21_bytes:
        # addig zeros to get 21 bytes string
	res = res + '\000\000\000\000\000'

    return res
示例#3
0
    def translate_to_basic(self, environment, connection, error_code):
        ""
        connection.logger.log('*** Translating NTLM to Basic...\n')
        user, password = self.get_credentials_from_basic(connection, error_code)
        if user:
            connection.logger.log("*** Found Basic credentials in client's header.\n")
            
            if environment['UNICODE']:
                environment['USER'] = utils.str2unicode(string.upper(user))
            else:
                environment['USER'] = string.upper(user)

            #environment['PASSWORD'] = password
            connection.logger.log("*** Basic User/Password: %s/%s.\n" % (user, password))

            connection.logger.log("*** Calculating hashed passwords (LM and NT)...")
            environment['LM_HASHED_PW'] = ntlm_procs.create_LM_hashed_password(password)
            environment['NT_HASHED_PW'] = ntlm_procs.create_NT_hashed_password(password)
            connection.logger.log("Done.\n")

            return 1

        else:
            connection.logger.log("*** There are no basic credentials in client's header.\n")
            connection.logger.log("*** Replacing NTLM value with Basic in rserver's header...")
            self.replace_ntlm_with_basic(connection, error_code)
            connection.logger.log("Done.\n")

            connection.logger.log("*** New server's header:\n=====\n" + connection.rserver_head_obj.__repr__())

            return 0
示例#4
0
def nt_password_hash(passwd, pad_to_21_bytes=True):
    """
   NtPasswordHash(
   IN  0-to-256-unicode-char Password,
   OUT 16-octet              PasswordHash )
   {
      /*
       * Use the MD4 algorithm [5] to irreversibly hash Password
       * into PasswordHash.  Only the password is hashed without
       * including any terminating 0.
       */
    """

    # we have to have UNICODE password
    pw = utils.str2unicode(passwd)

    # do MD4 hash
    md4_context = md4.new()
    md4_context.update(pw)

    res = md4_context.digest()

    if pad_to_21_bytes:
        # addig zeros to get 21 bytes string
        res = res + '\000\000\000\000\000'

    return res
示例#5
0
 def _match_predicate(self):
     self.debug('>>> start _match_predicate <<<')
     templates_docs = self.template_core.search_with_seg(
         self.query,
         query_fields=['key_index'],
     )
     templates_list = list(templates_docs)
     predicate_ret = ''
     subject_ret = ''
     self.debug("got templates_docs=%s",
                json.dumps(templates_list, ensure_ascii=False))
     if templates_list:
         for tmp_item in templates_list:
             pattern_str = tmp_item.get('pattern', '')
             predicate_value = tmp_item.get('predicate_value', '')
             if pattern_str and predicate_value:
                 pattern = re.compile(ur'%s' % pattern_str)
                 is_match = pattern.match(str2unicode(self.query))
                 if is_match:
                     self.debug('got match pattern=%s, predicate_value=%s',
                                pattern_str, predicate_value)
                     subject_ret = is_match.group('title')
                     predicate_ret = predicate_value
                     return subject_ret, predicate_ret
             else:
                 self.warn(
                     '@@@@@@@@@@@@@@@@@@@@@@@ unexpected pattern_str=%s, predicate_value=%s',
                     pattern_str, predicate_value)
     else:
         self.debug("retrieved None templates_docs")
     self.warn("@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ don't match any templates ")
     self.debug(">>> end _match_predicate <<<")
     return subject_ret, predicate_ret
示例#6
0
 def search_neighbors_info(self, name, relationship, **kwargs):
     docs = {}
     node_property = kwargs.get('node_property', '')
     if node_property:
         self.debug('search node name=%s, relationship=%s, property=%s',
                    name, relationship, node_property)
     else:
         self.debug('search node name=%s, relationship=%s', name, relationship)
     if name:
         if node_property:
             condition = BIO_CYPER_TEMPLATE['neighbors_property'] % \
                         (str2unicode(name), relationship, node_property)
         else:
             condition = BIO_CYPER_TEMPLATE['neighbors_data'] % \
                         (str2unicode(name), relationship)
         self.debug('condition=%s', condition)
         data = self.graph.run(condition).data()
         docs = self._extract_answer(data)
         self.debug('got name=%s, answer=%s', name, json.dumps(docs))
     else:
         self.warn('@@@@@@@@@@@@@@ name is None')
     return docs
示例#7
0
 def search_node_info(self, name, **kwargs):
     docs = {}
     self.debug('>>> start search_node_info <<<')
     node_property = kwargs.get('node_property', '')
     self.debug('search node with name=%s, property=%s', name, node_property)
     if name and node_property:
         condition = BIO_CYPER_TEMPLATE['node_property'] % (str2unicode(name), node_property)
         self.debug('condition=%s', condition)
         data = self.graph.run(condition).data()
         docs = self._extract_answer(data)
         if docs:
             self.debug('got node=%s, answer=%s', name, json.dumps(docs))
         else:
             self.debug('search equal_node name=%s, property=%s', name, node_property)
             condition = BIO_CYPER_TEMPLATE['equal_node_property'] % (str2unicode(name), node_property)
             self.debug('condition=%s', condition)
             data = self.graph.run(condition).data()
             docs = self._extract_answer(data)
             self.debug('got equal_node=%s, answer=%s', name, json.dumps(docs))
     else:
         self.warn('@@@@@@@@@@@@@@ unexpected name=%s, property=%s', name, node_property)
     self.debug('>>> end search_node_info <<<')
     return docs
示例#8
0
def create_NT_hashed_password(passwd):
    "create NT hashed password"

    # we have to have UNICODE password
    pw = utils.str2unicode(passwd)

    # do MD4 hash
    md4_context = md4.new()
    md4_context.update(pw)

    res = md4_context.digest()

    # addig zeros to get 21 bytes string
    res = res + '\000\000\000\000\000'

    return res
示例#9
0
def create_NT_hashed_password(passwd):
    "create NT hashed password"

    # we have to have UNICODE password
    pw = utils.str2unicode(passwd)

    # do MD4 hash
    md4_context = md4.new()
    md4_context.update(pw)

    res = md4_context.digest()

    # addig zeros to get 21 bytes string
    res = res + '\000\000\000\000\000'

    return res
示例#10
0
    def _match_predicate(self):
        """
        基于模板匹配谓语
        :return:返回匹配到的谓语
        """
        self.debug('>>> start _match_predicate <<<')
        template_docs = self.template_core.search_with_seg(
            self.query, query_fields=['key_index'])
        match_template_docs = []
        if template_docs:
            for tmp_item in template_docs:
                pattern_str = tmp_item.get('pattern', '')  # 模板的正则表达式
                predicates = tmp_item.get('predicates', [])  # 模板对应的谓语
                priority = tmp_item.get(
                    'priority', DEFAULT_TEMPLATE_PRIORITY)  # 模板优先级,(1为精确匹配)
                missing_tuple = tmp_item.get('missing_tuple', '')  # 缺失的三元祖

                pattern = re.compile(ur'%s' % pattern_str)
                is_match = pattern.match(str2unicode(self.query))
                if is_match:  # 模板匹配,将其添加到match_template_docs中
                    doc = {
                        'pattern': pattern_str,
                        'predicates': predicates,
                        'priority': priority,
                        'missing_tuple': missing_tuple
                    }
                    self.debug("got match pattern=%s", pattern_str)
                    doc['title'] = is_match.group('title')
                    if priority != 1:  # 匹配到的模板不是精确模板,加入到match_template_docs
                        match_template_docs.append(doc)
                    else:  # 匹配到的模板是精确模板,仅返回精确匹配模板
                        self.debug('got precise pattern=%s')
                        match_template_docs = [
                            doc,
                        ]
                        break
                else:  # 模板未匹配
                    self.debug("don't match pattern=%s", pattern_str)
        else:
            self.warn(
                '@@@@@@@@@@@@@@@@@@@@@@ unexpected value, templates_docs=None')
        self.debug(">>> end _match_predicate <<<")
        return match_template_docs
示例#11
0
    def translate_to_basic(self, environment, connection, error_code):
        ""
        connection.logger.log('*** Translating NTLM to Basic...\n')
        user, password = self.get_credentials_from_basic(
            connection, error_code)
        if user:
            connection.logger.log(
                "*** Found Basic credentials in client's header.\n")

            if environment['UNICODE']:
                environment['USER'] = utils.str2unicode(string.upper(user))
            else:
                environment['USER'] = string.upper(user)

            #environment['PASSWORD'] = password
            connection.logger.log("*** Basic User/Password: %s/%s.\n" %
                                  (user, password))

            connection.logger.log(
                "*** Calculating hashed passwords (LM and NT)...")
            environment['LM_HASHED_PW'] = ntlm_procs.create_LM_hashed_password(
                password)
            environment['NT_HASHED_PW'] = ntlm_procs.create_NT_hashed_password(
                password)
            connection.logger.log("Done.\n")

            return 1

        else:
            connection.logger.log(
                "*** There are no basic credentials in client's header.\n")
            connection.logger.log(
                "*** Replacing NTLM value with Basic in rserver's header...")
            self.replace_ntlm_with_basic(connection, error_code)
            connection.logger.log("Done.\n")

            connection.logger.log("*** New server's header:\n=====\n" +
                                  connection.rserver_head_obj.__repr__())

            return 0
示例#12
0
def collect_features(browser):
    '''
    Read DOM attributes from the current page loaded by the browser, derive page features

    Args:
        browser (cef): Browser object that with the page already loaded and ready

    Returns:
        tuple (header, attributes, dom, body html): the header and attributes
        are list of attributes which together forms a feature table; dom is the
        raw attributes extracted by JS code, in form of list of lists; the body
        html is the rendered html code of the <body> part of the page

    Data dictionary of collect_features() output:
      id          [int] seq num of node in JS dom tree
      parent      [int] id of parent node
      tagname     [str] HTML tag name
      depth       [int] node count to its deepest descendent in dom tree (etree-based)
      childcount  [int] num of children
      sourceline  [int] line num of source code (etree-based, i.e. start from <body> tag)
      sourcepct   [float] percentage position of source line in HTML (etree-based, within <body>)
      pospct      [float] percentage postiion of node in the DOM (depth-first search of JS DOM)
      xpct        [float] percentage position of element's left edge to window width
      x           [int] pixel coordinate of left edge of element's bounding box to the page
      y           [int] pixel coordinate of top edge of element's bounding box to the page
      width       [int] pixel width of element's bounding box
      height      [int] pixel height of element's bounding box
      fgcolor     [str] foreground color, in form of rgb(255,255,255) or rgba(255,255,255,1.0)
      bgcolor     [str] background color, in form of rgb(255,255,255) or rgba(255,255,255,1.0)
      textxws     [int] character length of text excluding whitespaces
      textlen     [int] character length of text
      htmllen     [int] character length of HTML code
      visible     [bool] visibility of this element
      fontsize    [float] font size
      xpath       [str] xpath of element
      textclip    [str] starting and ending snippet of text
    '''
    dom = [[str2unicode(x) for x in row]
           for row in browser.getDOMdata(True)] # synchronous get, and make all string into unicode
    winparam = browser.windowParams
    winwidth = winparam['innerWidth']
    logger.debug("%d web elements found" % len(dom))
    bodyhtml = next((x[-1] for x in dom if x[0]=='/html/body'),'')
    assert(bodyhtml) # we assumed there must be a body
    domtree = html2dom(bodyhtml) # need to pretty format source before use
    objectify.deannotate(domtree, cleanup_namespaces=True)
    linecount = len(bodyhtml.split("\n"))

    # populate DOM tree geometry data
    xpathHash = {attrs[0]:i for i,attrs in enumerate(dom)}
    depthHash = {} # actually "height", distance from node to deepest leaf, based on lxml etree
    def findElementDepth(e):
        "e: lxml etree element node, find its depth in dom tree"
        if e not in depthHash:
            if len(e): # e has children
                depthHash[e] = 1 + max(findElementDepth(x) for x in e.iterchildren())
            else: # e has no children, by definition depth=0
                depthHash[e] = 0
        return depthHash[e]

    # collect element attributes:
    attributes = []
    for i,attrs in enumerate(dom):
        if i and (i % 1000 == 0):
            logger.debug('...on element #%d' % i)
        xpath, display, visible, x, y, width, height, fgcolor, bgcolor, fontsize, textonly, htmlcode = attrs
        if not xpath or re.search(r'[^a-z0-9\[\]\/]',xpath) or re.search(r'(?<!\w)(script|head)(?!\w)',xpath):
            continue # skip these to avoid pollution by JS or HTML header
        etreenode  = domtree.xpath(xpath)
        if len(etreenode) != 1:
            if not etreenode:
                logger.error('JS reported XPath cannot be found in lxml: %s' % xpath)
                continue
            else:
                logger.error('XPath not unique for %s. %d elements found.' % (xpath, len(etreenode)))
        parent     = xpathHash.get(xpath.rsplit('/',1)[0])
        tagname    = xpath.rsplit('/',1)[-1].split('[',1)[0]
        depth      = findElementDepth(etreenode[0])
        if etreenode:
            childcount = len(etreenode)
        else:
            childcount = len(n for n in xpathHash if n.startwith(xpath) and '/' not in n[len(xpath):])
        sourceline = etreenode[0].sourceline
        fgcolor    = fgcolor.replace(' ','')
        bgcolor    = bgcolor.replace(' ','')
        textonly   = condense_space(textonly) # text from JS retains word boundary by replacing tag with space while etree.tostring() just remove tags
        htmlcode   = condense_space(htmlcode)
        if not htmlcode: # JS cannot give out the HTML, use etree version instead
            htmlcode = condense_space(etree.tostring(etreenode[0], encoding='utf8', method='html').decode('utf8'))
        # derived data
        textlen, htmllen = len(textonly), len(htmlcode)
        textxws = sum(1 for c in textonly if c and not c.isspace()) # text length excluding whitespaces
        if not htmllen:
            logger.error('empty HTML for tag %s on line %s at (%s,%s)+(%s,%s)' % (tagname, sourceline, x,y,width,height))
        textclip   = abbreviate(textonly)
        sourcepct  = sourceline/linecount
        xpct       = x/winwidth
        pospct     = (i+1)/len(dom)
        # remember this
        attributes.append([i, parent, tagname, depth, childcount, sourceline, sourcepct, pospct, xpct, x, y,
            width, height, fgcolor, bgcolor, textxws, textlen, htmllen, min(visible,display), fontsize,
            xpath, textclip])

    header = ("id parent tagname depth childcount sourceline sourcepct pospct xpct x y width height "
              "fgcolor bgcolor textxws textlen htmllen visible fontsize xpath textclip").split()
    return header, attributes, dom, bodyhtml
示例#13
0
    def build_env_dict(self, connection):
        ""
        connection.logger.log('*** Building environment for NTLM.\n')

        env = {}

        if connection.config['NTLM_AUTH']['NTLM_FLAGS']:
            env['FLAGS'] = connection.config['NTLM_AUTH']['NTLM_FLAGS']

            connection.logger.log('*** Using custom NTLM flags: %s\n' % env['FLAGS'])

        else:
            # I have seen flag field '\005\202' as well (with NT response).
            #0x8206 or 0x8207 or 0x8205
            env['FLAGS'] = "06820000"
            #flags = utils.hex2str(ed['NTLM_FLAGS'])

            connection.logger.log('*** Using default NTLM flags: %s\n' % env['FLAGS'])


        env['LM'] = connection.config['NTLM_AUTH']['LM_PART']
        env['NT'] = connection.config['NTLM_AUTH']['NT_PART']

        # we must have at least LM part
        if not (env['LM'] or env['NT']):
            env['LM'] = 1

        if env['LM'] == 1 and env['NT'] == 0:
            connection.logger.log('*** NTLM version with LM response only.\n')

        elif env['LM'] == 1 and env['NT'] == 1:
            connection.logger.log('*** NTLM version with LM and NT responses.\n')

        elif env['LM'] == 0 and env['NT'] == 1:
            connection.logger.log('*** NTLM version with NT response only.\n')

        #env['UNICODE'] = connection.config['NTLM_AUTH']['UNICODE']
        if env['NT']:
            env['UNICODE'] = 1
        else:
            env['UNICODE'] = 0

        # have to put these ones into [NTLM] section
        env['DOMAIN'] = string.upper(connection.config['NTLM_AUTH']['NT_DOMAIN'])

        # Check if there is explicit NT_Hostname in config, if there is one then take it,
        # if there is no one then take gethostname() result.
        if connection.config['NTLM_AUTH']['NT_HOSTNAME']:
            env['HOST'] = string.upper(connection.config['NTLM_AUTH']['NT_HOSTNAME'])
        else:
            env['HOST'] = string.upper(connection.config['GENERAL']['HOST'])

        env['USER'] = string.upper(connection.config['NTLM_AUTH']['USER'])

        connection.logger.log('*** NTLM Domain/Host/User: %s/%s/%s\n' % (env['DOMAIN'], env['HOST'], env['USER']))

        # have to use UNICODE stings
        if env['UNICODE']:
            env['DOMAIN'] = utils.str2unicode(env['DOMAIN'])
            env['HOST'] = utils.str2unicode(env['HOST'])
            env['USER'] = utils.str2unicode(env['USER'])

            connection.logger.log('*** Using UNICODE stings.\n')


        if connection.config['NTLM_AUTH']['LM_HASHED_PW'] and connection.config['NTLM_AUTH']['NT_HASHED_PW']:
            env['LM_HASHED_PW'] = connection.config['NTLM_AUTH']['LM_HASHED_PW']
            env['NT_HASHED_PW'] = connection.config['NTLM_AUTH']['NT_HASHED_PW']

            connection.logger.log('*** NTLM hashed passwords found.\n')

        # Test params
        if connection.config['NTLM_AUTH'].has_key('NTLM_MODE'):
            env['NTLM_MODE'] = int(connection.config['NTLM_AUTH']['NTLM_MODE'])
        else:
            env['NTLM_MODE'] = 0

        # End of test params

        env['NTLM_TO_BASIC'] = connection.config['NTLM_AUTH']['NTLM_TO_BASIC']

        connection.logger.log('*** Environment has been built successfully.\n')

        return env
示例#14
0
    def build_env_dict(self, connection):
        ""
        connection.logger.log('*** Building environment for NTLM.\n')

        env = {}

        if connection.config['NTLM_AUTH']['NTLM_FLAGS']:
            env['FLAGS'] = connection.config['NTLM_AUTH']['NTLM_FLAGS']

            connection.logger.log('*** Using custom NTLM flags: %s\n' % env['FLAGS'])

        else:
            # I have seen flag field '\005\202' as well (with NT response).
            #0x8206 or 0x8207 or 0x8205
            env['FLAGS'] = "06820000"
            #flags = utils.hex2str(ed['NTLM_FLAGS'])

            connection.logger.log('*** Using default NTLM flags: %s\n' % env['FLAGS'])


        env['LM'] = connection.config['NTLM_AUTH']['LM_PART']
        env['NT'] = connection.config['NTLM_AUTH']['NT_PART']

        # we must have at least LM part
        if not (env['LM'] or env['NT']):
            env['LM'] = 1

        if env['LM'] == 1 and env['NT'] == 0:
            connection.logger.log('*** NTLM version with LM response only.\n')

        elif env['LM'] == 1 and env['NT'] == 1:
            connection.logger.log('*** NTLM version with LM and NT responses.\n')

        elif env['LM'] == 0 and env['NT'] == 1:
            connection.logger.log('*** NTLM version with NT response only.\n')

        #env['UNICODE'] = connection.config['NTLM_AUTH']['UNICODE']
        if env['NT']:
            env['UNICODE'] = 1
        else:
            env['UNICODE'] = 0

        # have to put these ones into [NTLM] section
        env['DOMAIN'] = string.upper(connection.config['NTLM_AUTH']['NT_DOMAIN'])

        # Check if there is explicit NT_Hostname in config, if there is one then take it,
        # if there is no one then take gethostname() result.
        if connection.config['NTLM_AUTH']['NT_HOSTNAME']:
            env['HOST'] = string.upper(connection.config['NTLM_AUTH']['NT_HOSTNAME'])
        else:
            env['HOST'] = string.upper(connection.config['GENERAL']['HOST'])

        env['USER'] = string.upper(connection.config['NTLM_AUTH']['USER'])

        connection.logger.log('*** NTLM Domain/Host/User: %s/%s/%s\n' % (env['DOMAIN'], env['HOST'], env['USER']))

        # have to use UNICODE stings
        if env['UNICODE']:
            env['DOMAIN'] = utils.str2unicode(env['DOMAIN'])
            env['HOST'] = utils.str2unicode(env['HOST'])
            env['USER'] = utils.str2unicode(env['USER'])

            connection.logger.log('*** Using UNICODE stings.\n')


        if connection.config['NTLM_AUTH']['LM_HASHED_PW'] and connection.config['NTLM_AUTH']['NT_HASHED_PW']:
            env['LM_HASHED_PW'] = connection.config['NTLM_AUTH']['LM_HASHED_PW']
            env['NT_HASHED_PW'] = connection.config['NTLM_AUTH']['NT_HASHED_PW']

            connection.logger.log('*** NTLM hashed passwords found.\n')

        # Test params
        if connection.config['NTLM_AUTH'].has_key('NTLM_MODE'):
            env['NTLM_MODE'] = int(connection.config['NTLM_AUTH']['NTLM_MODE'])
        else:
            env['NTLM_MODE'] = 0

        # End of test params

        env['NTLM_TO_BASIC'] = int(connection.config['NTLM_AUTH']['NTLM_TO_BASIC'])

        connection.logger.log('*** Environment has been built successfully.\n')

        return env
示例#15
0
def nt_password_hash(passwd):
    """NtPasswordHash"""
    pw = utils.str2unicode(passwd)
    md4_context = md4.new()
    md4_context.update(pw)
    return md4_context.digest()
示例#16
0
def nt_password_hash(passwd):
    """NtPasswordHash"""
    pw = utils.str2unicode(passwd)
    md4_context = md4.new()
    md4_context.update(pw)
    return md4_context.digest()