def preparePatterns(excludePatternsList): result = [] if excludePatternsList: patternList = excludePatternsList.split(";") wildcardValidationPattern = Pattern.compile("[\d*?.]+") wildcardSubstitutions = [(Pattern.compile("\."), "\\\\."), (Pattern.compile("\*+"), ".*"), (Pattern.compile("\?"), ".") ] for patternStr in patternList: if patternStr: patternStr = patternStr.strip() wildcardValidationMatcher = wildcardValidationPattern.matcher(String(patternStr)) if wildcardValidationMatcher.matches(): for (rPattern, rStr) in wildcardSubstitutions: rMatcher = rPattern.matcher(String(patternStr)) patternStr = rMatcher.replaceAll(rStr) try: pattern = Pattern.compile(patternStr) result.append(pattern) except: logger.warn("Exception '%s' when compiling pattern '%s', pattern is ignored" % (sys.exc_info()[0], patternStr)) else: logger.warn("Ignoring invalid wildcard pattern '%s'" % patternStr) return result
def initNewCRF(data, orderList, defaultLabel, gaussianPriorVariance, allowedPattern=".*", forbiddenPattern="\\s", connected=1, defaults=None): """ Create and initialize a CRF with states read from data, of order given by the sequence orderList (lower numbers are backoff levels), and a default label and with gaussian prior variance as given. The allowedPattern, forbiddenPattern and connected control what state transitions are permissible. Allowed transitions (from LABEL1 to LABEL2) are ones where "LABEL1,LABEL2" match the allowedPattern but not the forbiddenPattern. For example: forbiddenPattern="O,I-.*" would disallow transitions from state "O" to state "I-NP", "I-PP" and so on. allowedPattern="B-(.*),I-\\1|I-(.*),I-\\2|.*,B-.*|.*,O" would allow e.g. B-NP,I-NP but not B-NP,I-VP. """ # some default things that most users won't want to deal with forbiddenPattern = Pattern.compile(forbiddenPattern) allowedPattern = Pattern.compile(allowedPattern) orderArray = jarray.array(orderList,"i") crf = CRF4(data.getPipe(), None) startName = crf.addOrderNStates(data, orderArray, defaults, defaultLabel, forbiddenPattern, allowedPattern, connected); crf.setGaussianPriorVariance (gaussianPriorVariance); for i in range(0,crf.numStates()): crf.getState(i).setInitialCost(Double.POSITIVE_INFINITY) crf.getState(startName).setInitialCost(0.0); return crf
def preparePatterns(excludePatternsList): result = [] if excludePatternsList: patternList = excludePatternsList.split(";") wildcardValidationPattern = Pattern.compile("[\d*?.]+") wildcardSubstitutions = [(Pattern.compile("\."), "\\\\."), (Pattern.compile("\*+"), ".*"), (Pattern.compile("\?"), ".")] for patternStr in patternList: if patternStr: patternStr = patternStr.strip() wildcardValidationMatcher = wildcardValidationPattern.matcher( String(patternStr)) if wildcardValidationMatcher.matches(): for (rPattern, rStr) in wildcardSubstitutions: rMatcher = rPattern.matcher(String(patternStr)) patternStr = rMatcher.replaceAll(rStr) try: pattern = Pattern.compile(patternStr) result.append(pattern) except: logger.warn( "Exception '%s' when compiling pattern '%s', pattern is ignored" % (sys.exc_info()[0], patternStr)) else: logger.warn("Ignoring invalid wildcard pattern '%s'" % patternStr) return result
class database(core): regexp_connection = Pattern.compile('(\w+)/(\w+)@(.+)') regexp_word = Pattern.compile('\s*request(\d+)\.(\w+)\s*=') regexp_comment = Pattern.compile('^\s*#') regexp_select = Pattern.compile('^\s*select', Pattern.CASE_INSENSITIVE) def __init__(self, _dataFilePath, _templateFilePath): core.__init__(self, _dataFilePath, _templateFilePath) self.lastStmt = 'OTHER' self.db_type = None self.connections = {} str_conn = self.properties.get('db_connection') or None logger.info('db_connection=%s' % (str_conn)) self.connection = None if str_conn: self.connection = self.getConnection(str_conn) if self.connection: self.connections[str_conn] = self.connection self.cursor = self.connection.cursor() self.alias = None self.dictBind = {} def getConnection(self, str): if str in self.connections.keys(): return self.connections[str] found = self.regexp_connection.matcher(str) if found.find(): user = found.group(1) password = found.group(2) url = found.group(3) driver = None if url.find('oracle') > 0: # jdbc:oracle:thin: self.db_type = 'oracle' driver = "oracle.jdbc.driver.OracleDriver" if url.find('mysql') > 0: self.db_type = 'mysql' driver = "org.gjt.mm.mysql.Driver" try: logger.info('Trying to Connect to %s,%s,%s,%s' % (url, user, password, driver)) conn = zxJDBC().connect("%s" % url, user, password, driver) logger.info('Connected to %s,%s,%s,%s' % (url, user, password, driver)) except DatabaseError, e: raise e self.connections[str] = conn return conn else:
def __init__(self, fn, re, dict): # fn: if fn is not None: eval(fn) self.fn = fn self.evalfn = None # dict: if dict is not None: self.items = dict.items() else: self.items = None # re: if re is not None: Pattern.compile(re) self.re = re self.compiledRe = None
def initNewCRF(data, orderList, defaultLabel, gaussianPriorVariance, allowedPattern=".*", forbiddenPattern="\\s", connected=1): # some default things that most users won't want to deal with forbiddenPattern = Pattern.compile(forbiddenPattern) allowedPattern = Pattern.compile(allowedPattern) defaults = None orderArray = jarray.array(orderList,"i") crf = CRF4(data.getPipe(), None) startName = crf.addOrderNStates(data, orderArray, defaults, defaultLabel, forbiddenPattern, allowedPattern, connected); crf.setGaussianPriorVariance (gaussianPriorVariance); for i in range(0,crf.numStates()): crf.getState(i).setInitialCost(Double.POSITIVE_INFINITY) crf.getState(startName).setInitialCost(0.0); return crf
def LineGroupInstanceAdd(data,fileName, seperator="^\\s*$"): """ add some extra data to an instance list. """ data.add(LineGroupIterator(FileReader(File(fileName)), Pattern.compile(seperator), 1))
def LineGroupInstanceAdd(data, fileName, seperator="^\\s*$"): """ add some extra data to an instance list. """ data.add( LineGroupIterator(FileReader(File(fileName)), Pattern.compile(seperator), 1))
def get_constraints(sentence, pos): constraints = [] length = len(sentence) if pos == 'n': constraints.append( ParserConstraint(0, length, Pattern.compile("NP.*"))) return constraints
def resolveProperties(self, inputStr): # "${...}" patternStr = "\\$\\{[^\\{]+\\}" # Compile regular expression pattern = Pattern.compile(patternStr) input = java.lang.String(inputStr) # Replace all occurrences of pattern in input matcher = pattern.matcher(input) sb = java.lang.StringBuffer() last = 0 while matcher.find(): fragment = java.lang.String(matcher.group()) token = fragment.substring(2, fragment.length()-1) replacementStr = self.resources.get(token) if replacementStr is None: print 'WARNING: la macro ' + str(fragment) + ' n\'a pas de valeur!!!' self.error = true else: sb.append(input.substring(last, matcher.start())) sb.append(replacementStr) last = matcher.end() sb.append(input.substring(last, input.length())) result = sb.toString() if pattern.matcher(java.lang.String(result)).find(): result = self.resolveProperties(result) return result
def get_dataset_table_for_topic(self, topic: str, parser: BQParser) -> Optional[str]: if parser.version == "v2": dataset = parser.defaultDataset parts = topic.split(":") if len(parts) == 2: dataset = parts[0] table = parts[1] else: table = parts[0] else: dataset = self.get_dataset_for_topic_v1(topic, parser) if dataset is None: return None table = topic if parser.topicsToTables: topicregex_table_map: Dict[str, str] = dict( self.get_list(parser.topicsToTables) # type: ignore ) from java.util.regex import Pattern for pattern, tbl in topicregex_table_map.items(): patternMatcher = Pattern.compile(pattern).matcher(topic) if patternMatcher.matches(): table = tbl break if parser.sanitizeTopics: table = self.sanitize_table_name(table) return f"{dataset}.{table}"
def __init__(self, Framework): Netlinks_Service.NetlinksService.__init__(self, Framework) shouldIgnoreLocal = self.getParameterValue('ignoreLocalConnections') if shouldIgnoreLocal == None: shouldIgnoreLocal = 'false' self.ignoreLocalConnections = Boolean.parseBoolean(shouldIgnoreLocal) self.dependencyNameIsKey = modeling.checkIsKeyAttribute( 'dependency', 'dependency_name') self.dependencySourceIsKey = modeling.checkIsKeyAttribute( 'dependency', 'dependency_source') ignoredIpsList = self.getParameterValue('ignoredIps') self.ignoredIps = None if ignoredIpsList != None: ipPatterns = ignoredIpsList.split(',') if (len(ipPatterns) > 0) and (ipPatterns[0] != ''): for ipPattern in ipPatterns: pattern = String(ipPattern) pattern = String(pattern.replaceAll("\.", "\\\.")) pattern = String(pattern.replaceAll("\*", "\\\d+")) try: m = Pattern.compile(pattern) if self.ignoredIps == None: self.ignoredIps = ArrayList() self.ignoredIps.add(m) except: logger.debug('Failed to compile ip pattern:', ipPattern) self.initializeServices()
def __init__(self, Framework): Netlinks_Service.NetlinksService.__init__(self, Framework) shouldIgnoreLocal = self.getParameterValue('ignoreLocalConnections') if shouldIgnoreLocal == None: shouldIgnoreLocal = 'false' self.ignoreLocalConnections = Boolean.parseBoolean(shouldIgnoreLocal) self.dependencyNameIsKey = modeling.checkIsKeyAttribute('dependency', 'dependency_name') self.dependencySourceIsKey = modeling.checkIsKeyAttribute('dependency', 'dependency_source') ignoredIpsList = self.getParameterValue('ignoredIps') self.ignoredIps = None if ignoredIpsList != None: ipPatterns = ignoredIpsList.split(',') if (len(ipPatterns) > 0) and (ipPatterns[0] != ''): for ipPattern in ipPatterns: pattern = String(ipPattern) pattern = String(pattern.replaceAll("\.", "\\\.")) pattern = String(pattern.replaceAll("\*", "\\\d+")) try: m = Pattern.compile(pattern) if self.ignoredIps == None: self.ignoredIps = ArrayList() self.ignoredIps.add(m) except: logger.debug('Failed to compile ip pattern:', ipPattern) self.initializeServices()
def get_dataset_for_topic_v1(self, topic: str, parser: BQParser) -> Optional[str]: topicregex_dataset_map: Dict[str, str] = dict(self.get_list(parser.datasets)) # type: ignore from java.util.regex import Pattern for pattern, dataset in topicregex_dataset_map.items(): patternMatcher = Pattern.compile(pattern).matcher(topic) if patternMatcher.matches(): return dataset return None
def parseCommandLine(argv): fullArgPairPattern = Pattern.compile("--\\w+=\\S*") justArgNamePattern = Pattern.compile("--\\w+") cmdParamProps = {} if (len(argv) > 0): for param in argv: cmdParam = String(param) fullMatcher = fullArgPairPattern.matcher(cmdParam) if (fullMatcher.matches()): (paramName, paramValue) = cmdParam.split("=") cmdParamProps[paramName] = paramValue else: nameMatcher = justArgNamePattern.matcher(cmdParam) if (nameMatcher.matches()): cmdParamProps[param] = None else: print("This " + param + " is not a Command Line parameter") return cmdParamProps
def actionPerformed(self, event): messages = self.browser.getSelectedMessages() numMessages = messages.size() if numMessages == 0: self.browser.showInformationDialog("No messages selected") return if numMessages > 1: self.browser.showInformationDialog("%d messages selected, choose one" % numMessages) return message = messages.get(0) replyToId = message.getJMSMessageID() replyToQueue0 = message.getJMSReplyTo() if replyToQueue0 != None: replyToQueue0 = replyToQueue0.getQueueName() p = Pattern.compile("[^\\s:/]+://[^\\s:/]*/([^\\s:/?]+)\\??.*") m = p.matcher(replyToQueue0) if m.matches(): replyToQueue0 = m.group(1) else: replyToQueue0 = None dNode = self.browser.getBrowserTree().getFirstSelectedDestinationNode() hNode = self.browser.getBrowserTree().getSelectedHermesNode() if dNode == None or hNode == None: self.browser.showInformationDialog("Unknown destination, select destination queue") return hermes = hNode.getHermes() replyToQueue1 = dNode.getDestinationName() replyToDomain = dNode.getDomain() if replyToQueue0 == None and replyToQueue1 == None: self.browser.showInformationDialog("Unknown destination, select destination queue") return # show menu if replyToQueue0 != None and replyToQueue1 != None and replyToQueue0 != replyToQueue1: menu = JPopupMenu() q0item = JMenuItem(replyToQueue0) q0item.addActionListener(MenuItemHandler(self, hermes, replyToId, replyToQueue0, replyToDomain)) menu.add(q0item) q1item = JMenuItem(replyToQueue1) q1item.addActionListener(MenuItemHandler(self, hermes, replyToId, replyToQueue1, replyToDomain)) menu.add(q1item) menu.show(self.button, 0, self.button.getHeight()) return # show new message dialog else: if replyToQueue0 != None: replyToQueue = replyToQueue0 else: replyToQueue = replyToQueue1 self.replyTo(hermes, replyToId, replyToQueue, replyToDomain)
def get_agentId(hostID): ''' Extract any number from a simple string to get a unique agent identifier from the grinder.hostID return the string if not found (generally the hostname) :param hostID: the grinder.hostID string ''' m = Pattern.compile('([0-9]+)').matcher(hostID) if m.find(): return m.group(1) return hostID
def saveLastStoreAlias(self, command, meta_data): """ Saves the name of the alias of the last store. Maybe better to replace it by PigServer.getPigContext().getLastAlias(). """ if command.upper().startswith("STORE"): outputFile = Pattern.compile("STORE +([^']+) INTO.*", Pattern.CASE_INSENSITIVE) matcher = outputFile.matcher(command) if matcher.matches(): meta_data["LAST_STORE_ALIAS"] = matcher.group(1)
def match_regexp(result, regexp_str, opt='Pattern.CASE_INSENSITIVE', grp=1): """ Look for an identifier in a text body. The regexp must look for one group """ # 'SID=(\w+)' regExp = Pattern.compile(regexp_str, opt) body = manageGzipFormat(result) found = regExp.matcher(body) if found.find(): return found.group(grp) else: raise 'Pattern [%s] NOT found in the text:\n[%s]' % (regexp_str, body)
def convert2Vars(str2use): """ Optimized Java version conversion of an input string in format ${XX.string} to @VARXX where XX is 2 digits return the convert String""" replPattern = Pattern.compile('(\$\{(\d{1,2})\..+?})') match = replPattern.matcher(str2use) while (match.find()): replStr = '@VAR%02d' % int(match.group(2)) str2use = str2use.replace(match.group(1), replStr) return str2use
def LineGroupInstanceList(pipe, fileName, seperator="^\\s*$"): """ Takes a pipe and a file name and produces an instance list based on that pipe and a LineGroupIterator. The optional argument seperator specifies what seperates instances from eachother. For example, when doing part of speech tagging an instance is a sentence. Each word in the sentence would have a seperate line and a line matching the regular expression specified by seperator would terminate the current sentence. """ data = InstanceList(pipe) data.add(LineGroupIterator(FileReader(File(fileName)), Pattern.compile(seperator), 1)) return data
def LineGroupInstanceList(pipe, fileName, seperator="^\\s*$"): """ Takes a pipe and a file name and produces an instance list based on that pipe and a LineGroupIterator. The optional argument seperator specifies what seperates instances from eachother. For example, when doing part of speech tagging an instance is a sentence. Each word in the sentence would have a seperate line and a line matching the regular expression specified by seperator would terminate the current sentence. """ data = InstanceList(pipe) data.add( LineGroupIterator(FileReader(File(fileName)), Pattern.compile(seperator), 1)) return data
def initNewCRF(data, orderList, defaultLabel, gaussianPriorVariance, allowedPattern=".*", forbiddenPattern="\\s", connected=1, defaults=None): """ Create and initialize a CRF with states read from data, of order given by the sequence orderList (lower numbers are backoff levels), and a default label and with gaussian prior variance as given. The allowedPattern, forbiddenPattern and connected control what state transitions are permissible. Allowed transitions (from LABEL1 to LABEL2) are ones where "LABEL1,LABEL2" match the allowedPattern but not the forbiddenPattern. For example: forbiddenPattern="O,I-.*" would disallow transitions from state "O" to state "I-NP", "I-PP" and so on. allowedPattern="B-(.*),I-\\1|I-(.*),I-\\2|.*,B-.*|.*,O" would allow e.g. B-NP,I-NP but not B-NP,I-VP. """ # some default things that most users won't want to deal with forbiddenPattern = Pattern.compile(forbiddenPattern) allowedPattern = Pattern.compile(allowedPattern) orderArray = jarray.array(orderList, "i") crf = CRF4(data.getPipe(), None) startName = crf.addOrderNStates(data, orderArray, defaults, defaultLabel, forbiddenPattern, allowedPattern, connected) crf.setGaussianPriorVariance(gaussianPriorVariance) for i in range(0, crf.numStates()): crf.getState(i).setInitialCost(Double.POSITIVE_INFINITY) crf.getState(startName).setInitialCost(0.0) return crf
def doPassiveScan(self, ihrr): response = self.helpers.bytesToString(ihrr.getResponse()) p = Pattern.compile('.*integrity=\"(sha256|sha384|sha512)-[A-Za-z0-9+/=]+.*', Pattern.DOTALL) m = p.matcher(response) # Check match for html pages only # XXX: Java string are automatically boxed into python unicode objects, # therefore is not possible to use the contains method anymore. # In order to check if a substring is present in a string, we need # to use the in operator. if "<html" in response and not m.matches(): # The page does NOT contain any SRI attribute issues = ArrayList() issues.add(SRI(ihrr)) return issues return None
def __manageDefaultValue(s, d): ''' Add default value to templating For example: ${grindertool.msisdn.min},${grindertool.msisdn.max},${grinder.threads},${grindertool.msisdn.padding,5},${grindertool.msisdn.random,1},${grindertool.msisdn.debug,1} ${grindertool.msisdn.padding,5} is replaced by 5 if key grindertool.msisdn.padding is not defined. (...) :param s: the macros argument list with potential default template values :param d: the dictionary where the template key are defined ''' regexp = Pattern.compile('(\$\{([a-zA-Z0-9\.]+),([a-zA-Z0-9\.]+)}?)') m = regexp.matcher(s) while m.find(): rep = str(d.get(m.group(2), m.group(3))) s = s.replace(m.group(1), rep) return s
class GlobalPattern: ''' Factorization of some common regexp pattern ''' staticFieldPattern = Pattern.compile(r'(&&(\w+)\.(\w+)\(([^\)]*)\))') dynFieldPattern = Pattern.compile( r'&{0,1}([a-zA-Z]{1}[_a-zA-Z0-9]+)\.(\w+)\((.*)\)') templateDynamicPattern = Pattern.compile(r'(\w+)&(\w+)\.(\w+)\((.*)\)') memorizedVariablePattern = Pattern.compile(r'(@\w+@)') dynPlaceholderPattern = Pattern.compile(r'\$\{([\w\.]+)\}') evalPattern = Pattern.compile(r'\(([^\)]*)\)')
def __init__(self, patterns, ignore, input, scantype="standard"): """Initialize the scanner. Parameters: patterns : [(terminal, uncompiled regex), ...] or None ignore : [terminal,...] input : string If patterns is None, we assume that the subclass has defined self.patterns : [(terminal, compiled regex), ...]. Note that the patterns parameter expects uncompiled regexes, whereas the self.patterns field expects compiled regexes. """ self.tokens = [ ] # [(begin char pos, end char pos, token name, matched text), ...] self.restrictions = [] self.input = input self.pos = 0 self.ignore = ignore self.scantype = scantype self.first_line_number = 1 if self.scantype == "flex" and have_star_scan: StarScan.prepare(input) self.scan = self.compiled_scan self.token = self.compiled_token self.__del__ = StarScan.cleanup elif self.scantype == "flex": self.scantype = "standard" if self.scantype != "flex": self.scan = self.interp_scan self.token = self.interp_token if patterns is not None: # Compile the regex strings into regex objects self.patterns = [] for terminal, regex in patterns: self.patterns.append((terminal, re.compile(regex))) if self.using_java_regex: # different access method for regexes self.patterns = map(lambda a: (a[0], a[1].matcher(self.input)), self.patterns)
def run(self, jeb): self.jeb = jeb self.jebUi = self.jeb.getUI() if not self.jeb.isFileLoaded(): print "Please load a file" return # TODO: for dex: collapse all packages or third party packages. # for apk: collapse all packages first, than expand current package only. instance_tree_view = self.jebUi.getView(View.Type.CLASS_HIERARCHY) field_real_view = View.getDeclaredField("v") field_real_view.setAccessible(True) instance_real_view = field_real_view.get(instance_tree_view) field_pattern = None for field in instance_real_view.getClass().getDeclaredFields(): if field.getType().getName() == "java.util.regex.Pattern": field_pattern = field break if field_pattern is None: print "Pattern field is not found." return field_pattern.setAccessible(True) temp = "|".join(common_packages.strip().replace(".", "\.").splitlines()) temp = "^({0}).*".format(temp) print temp new_pattern = Pattern.compile(temp) field_pattern.set(instance_real_view, new_pattern) instance_tree_view.refresh() print "Done..."
def search(metaclasses, regexp, options): print "Searching ..." rawResults = HashSet() session = Modelio.getInstance().getModelingSession() #--- (1) Add all instances of selected metaclasses for metaclass in metaclasses: print " searching for instance of metaclass ",metaclass.getSimpleName()," ... ", metaclassInstances = session.findByClass(metaclass) print unicode(len(metaclassInstances)),"elements found" rawResults.addAll(metaclassInstances) # remove predefined types predefTypes = Modelio.getInstance().getModelingSession().getModel().getUmlTypes().getBOOLEAN().getOwner() rawResults.remove(predefTypes) rawResults.remove(predefTypes.getOwner()) print " ==>",unicode(len(rawResults)),"elements found (primitive types excluded)" #--- (2) Check for name matching filteredResults = [] try: if options[0] == 1: p = Pattern.compile(regexp) for result in rawResults: name = result.getName() if (p.matcher(name).matches()): filteredResults.append(result) else: for result in rawResults: if result.getName().find(regexp) != -1: filteredResults.append(result) except PatternSyntaxException: messageBox("The entered regular expression: '"+regexp+"' has a syntax error.") except IllegalArgumentException: messageBox("Illegal Argument Exception.") print " "+unicode(len(filteredResults))+" elements selected after name filtering" #--- (3) sort results by name filteredResults.sort(key=lambda x:x.getName()) return filteredResults
def __init__(self, patterns, ignore, input, scantype="standard"): """Initialize the scanner. Parameters: patterns : [(terminal, uncompiled regex), ...] or None ignore : [terminal,...] input : string If patterns is None, we assume that the subclass has defined self.patterns : [(terminal, compiled regex), ...]. Note that the patterns parameter expects uncompiled regexes, whereas the self.patterns field expects compiled regexes. """ self.tokens = [] # [(begin char pos, end char pos, token name, matched text), ...] self.restrictions = [] self.input = input self.pos = 0 self.ignore = ignore self.scantype = scantype self.first_line_number = 1 if self.scantype == "flex" and have_star_scan: StarScan.prepare(input) self.scan = self.compiled_scan self.token = self.compiled_token self.__del__ = StarScan.cleanup elif self.scantype == "flex": self.scantype = "standard" if self.scantype != "flex": self.scan = self.interp_scan self.token = self.interp_token if patterns is not None: # Compile the regex strings into regex objects self.patterns = [] for terminal, regex in patterns: self.patterns.append( (terminal, re.compile(regex)) ) if self.using_java_regex: # different access method for regexes self.patterns = map(lambda a: (a[0],a[1].matcher(self.input)),self.patterns)
def doPassiveScan(self, ihrr): # 1 - Convert byte[] response to String response = self.helpers.bytesToString(ihrr.getResponse()) # 2 - Check if the page includes a 'integrity="(sha256|sha384|sha512) ...' attribute (tip: use RegExp Pattern.compile and matcher) p = Pattern.compile('.*integrity=\"(sha256|sha384|sha512)-[A-Za-z0-9+/=]+.*', Pattern.DOTALL) m = p.matcher(response) # 3 - Based on the match and page type, determine whether the page is vulnerable or not # Check match for html pages only # XXX: Java string are automatically boxed into python unicode objects, # therefore is not possible to use the contains method anymore. # In order to check if a substring is present in a string, we need # to use the in operator. if "<html" in response and not m.matches(): # 4 - If vulnerable, create a new IScanIssue and return the List<IScanIssue> # TODO return None def doActiveScan(self, ihrr, isip): return None # Passive scanner check only def consolidateDuplicateIssues(self, isb, isa): return -1
def setProperties(self, configuration): ''' overload the corelibs::setProperties() - It's just for readability otherwise SOAP text must be set on only one line ''' if logger.isDebugEnabled(): foo=sys._getframe(0).f_code.co_name logger.debug('DEBUG. Function=%s.%s' % (self.__class__.__name__,foo)) objFile = StringIO(configuration) reg_param=Pattern.compile(r'^\s*([A-Za-z0-9\.\_]+)\s*=\s*(.*)$') props={} (value,param,addStr)=('','','') for line in objFile.readlines(): #ignore empty string & commentaries if line.strip()=='#' or (not line.strip()): continue res = reg_param.matcher(line) if res.find(): if value: props[param]=value param,value = res.group(1),res.group(2) addStr='\n' else: value=value+addStr+line addStr='' if value: props[param]=value objFile.close() return props
def replace(self, inputStr): self.error = false # "$...$" patternStr = "@[^@]+@" # Compile regular expression pattern = Pattern.compile(patternStr) input = java.lang.String(inputStr) # Replace all occurrences of pattern in input matcher = pattern.matcher(input) sb = java.lang.StringBuffer() last = 0 while matcher.find(): fragment = java.lang.String(matcher.group()) token = fragment.substring(1, fragment.length()-1) replacementStr = self.resources.get(token) if replacementStr is None: print 'WARNING: la macro ' + str(fragment) + ' n\'a pas de valeur!!!' self.error = true else: sb.append(input.substring(last, matcher.start())) sb.append(replacementStr) last = matcher.end() sb.append(input.substring(last, input.length())) return sb.toString()
def processAntProperties(self): self.error = false # include OSENV properties self.addOSEnvProperties() # "${...}" patternStr = "\\$\\{[^\\{]+\\}" # Compile regular expression pattern = Pattern.compile(patternStr) # Get all property keys keys = self.resources.keys() while keys.hasMoreElements(): key = keys.nextElement() inputStr = self.resources.get(key) input = java.lang.String(inputStr) sb = java.lang.StringBuffer() matcher = pattern.matcher(input) last = 0 while matcher.find(): fragment = java.lang.String(matcher.group()) token = fragment.substring(2, fragment.length()-1) replacementStr = self.resources.get(token) if replacementStr is None: print 'WARNING: ' + key + '=' + str(fragment) + ' n\'a pas de valeur!!!' self.error = true else: sb.append(input.substring(last, matcher.start())) sb.append(self.resolveProperties(replacementStr)) last = matcher.end() sb.append(input.substring(last, input.length())) # key value replacement self.resources.setProperty(key, sb.toString()) return self.error
class CheckRule: """ a simple checker object for assertion management on response """ KEYWORDS = [ 'matches', 'notmatches', 'contains', 'notcontains', 'equals', 'notequals', 'exists', 'macro', 'eval', 'maxduration' ] def __init__(self, rule, deferred=False): # all keys must be lower case self.ruleDefinition = dict( (k.lower(), v) for k, v in dict(rule).iteritems()) # Only one keyword is authorized intersect = set(self.KEYWORDS).intersection( set(self.ruleDefinition.keys())) if len(intersect) > 1: logger.error( 'We cannot have more than one keywords "%s" in the same rule among "%s"' % (intersect, self.KEYWORDS)) raise MySyntaxError( 'We cannot have than one keywords "%s" in the same rule among"%s"' % (intersect, self.KEYWORDS)) if len(intersect) == 0: logger.error( 'Invalid rule: missing mandatory comparison keywords. One of "%s"' % self.KEYWORDS) raise MySyntaxError( 'Invalid rule: missing mandatory comparison keywords. One of "%s"' % self.KEYWORDS) # the only one keyword self.ruleType = list(intersect)[0] # For Async assertion self.deferred = 'deferred' in self.ruleDefinition or deferred # ignoreCase for Xpath & Regexp self.ignoreCase = 'ignorecase' in self.ruleDefinition and str( self.ruleDefinition['ignorecase']).lower() in ('y', 'yes', 'true') # For "not" expression (notequals, notcontains ...) self.positiveCheck = True # if there is a ${VAR} template self.isPlaceholder = False # "equals" rule are special "contains" rule self.equalsRule = False # For Regexp rules self.isCompiledRule = False self.compiledRule = None self.regexpExpression = None # For Xpath rules self.hasXpath = False self.xpathExpression = None self.textQuery = False self.isXpathPlaceholder = False self.compiledXpathRule = None self.isXpathCompiledRule = False # duration rule if 'maxduration' in self.ruleDefinition: try: s_duration = self.ruleDefinition['maxduration'].strip() self.maxDuration = int(s_duration) # this means that we have a string # see if we have the except ValueError: try: if s_duration[-2:] == 'ms': self.maxDuration = int(s_duration[:-2]) elif s_duration[-1:] == 's': self.maxDuration = int(s_duration[:-1]) * 1000 except Exception, e: raise MySyntaxError( 'Invalid rule: maxDuration must be expressed in milliseconds (ms) or seconds (s), raised: %s' % str(e)) return # # quoted string may be forced in both "equals" and ("regexp","contains") keywords # default behavior is: # "equals" : literal_usage=True # "regexp","contains" : literal_usage=False # default behavior is superseded by the usage of the keywords "literal","quote_string","regexp" # # Change: for **equals** keyword, "quoted" is the default # # We force the literal usage (quoted string) in the case of equals self.equalsRule = self.ruleType.lower().find('equals') >= 0 self.literal_usage = True if self.equalsRule else False if len( list( set(self.ruleDefinition) & set(['literal', 'quote_string', 'regex']))) > 1: logger.error( 'Only 1 of [literal, quote_string, regex] is accepted - please review test scenario - assertion %s' % self.ruleDefinition) raise MySyntaxError( 'Only 1 of [literal, quote_string, regex] is accepted - please review test scenario - assertion %s' % self.ruleDefinition) # # This is a special case where you have complex literal values to compare (for instance quoted Http response) # Default is False. *** Activated if "literal: True" or quote_string: True in the Assertion rule *** # if 'literal' in self.ruleDefinition: self.literal_usage = self.ruleDefinition.get('literal') if 'quote_string' in self.ruleDefinition: self.literal_usage = self.ruleDefinition.get('quote_string') if 'regex' in self.ruleDefinition: # # for not literal_usage to False if regex is explictly specified in rule def # for example: - { response_key: toto, equals: 't[a-z]+', regex: True } # self.literal_usage = not self.ruleDefinition.get('regex') # ------------- # macro keyword # ------------------------- self.isMacroRule = False if self.ruleType == 'macro': # self.isMacroRule = True self.macroExpression = self.ruleDefinition['macro'] # We check the macro format if not GlobalPattern.dynFieldPattern.matcher( self.macroExpression).find(): raise SyntaxError( 'The macro "%s" format is incorrect, it is not a macro of the form module.function(parameter)' % self.ruleDefinition['macro']) logger.trace('"macro" rule to evaluate: %s' % (self.ruleDefinition['macro'])) return # ------------- # eval keyword # ------------------------- self.isEvalRule = False self.stringToEvaluate = '' if self.ruleType == 'eval': self.isEvalRule = True self.stringToEvaluate = self.ruleDefinition['eval'] logger.trace('"eval" rule to evaluate: %s' % (self.ruleDefinition['eval'])) return # Identify the response key to compare to self.responseKey=self.ruleDefinition['response_key'] if 'response_key' in self.ruleDefinition else \ self.ruleDefinition['from'] if 'from' in self.ruleDefinition else 'responseText' # we remove any placeholder in the response key m = (GlobalPattern.dynPlaceholderPattern).matcher(self.responseKey) if m.find(): self.responseKey = m.group(1) # ------------- # exists keyword # allows to check that a rule.responseKey exists or not # ------------------------- if self.ruleType == 'exists': return # ----------------- # Xpath rule # ----------------- if 'xpath' in self.ruleDefinition: self.hasXpath = True self.xpathExpression = self.ruleDefinition['xpath'] self.textQuery = self.xpathExpression.find('/text()') >= 0 # To avoid NOT_FOUND in scenario checkResponse self.xpathExpression = self.xpathExpression.replace('/text()', '') self.isXpathPlaceholder = GlobalPattern.dynPlaceholderPattern.matcher( self.xpathExpression).find() if not self.isXpathPlaceholder: self.isXpathCompiledRule = True try: xpathFactory = XPathFactory.newInstance() self.compiledXpathRule = xpathFactory.newXPath().compile( self.xpathExpression) logger.trace('Compiled Xpath %s has id: %s' % (self.xpathExpression, hex(id(self.compiledXpathRule)))) except: logger.error('Unable to compile xpath rule %s' % (self.xpathExpression)) raise MySyntaxError('Unable to compile xpath rule %s' % (self.xpathExpression)) # positive or not ? self.positiveCheck = not self.ruleType.find('not') >= 0 # In case of "equals", we may have "space" characters ... so we don't strip self.regexpExpression = self.ruleDefinition[ self.ruleType] if self.equalsRule else self.ruleDefinition[ self.ruleType].strip() self.isPlaceholder = (GlobalPattern.dynPlaceholderPattern).matcher( self.ruleDefinition[self.ruleType]).find() # --------------- # JSON rule # ---------------- self.jsonRule = self.ruleDefinition.get('json', False) self.jsonStrict = self.ruleDefinition.get('strict', False) if self.jsonRule: logger.trace('JSON Rule declared') # no compilation # force literal usage self.literal_usage = True return # --------------- # regexp rule # optimization : compile rule if there is no placeholder # ---------------- if not self.isPlaceholder: self.isCompiledRule = True logger.trace( '[CheckRule][No placeholder=>compiling rule][equals=%s][literal=%s][value=%s][positiveCheck=%s]' % (self.equalsRule, self.literal_usage, self.regexpExpression, str(self.positiveCheck))) tmp_regexpExpression = Pattern.quote(str( self.regexpExpression)) if self.literal_usage else str( self.regexpExpression) if self.equalsRule: tmp_regexpExpression = '^%s$' % (tmp_regexpExpression) logger.trace( '[CheckRule][equals=%s][literal=%s][tmp_regexp=%s]' % (self.equalsRule, self.literal_usage, tmp_regexpExpression)) self.compiledRule = Pattern.compile( tmp_regexpExpression, Pattern.CASE_INSENSITIVE | Pattern.DOTALL) if self.ignoreCase else Pattern.compile( tmp_regexpExpression, Pattern.DOTALL)
try: from java.util.regex import Pattern p = Pattern.compile("xxx") m = p.split("ABCDEFG") except ImportError, e: import support raise support.TestWarning("JVM version >= 1.4 needed to test PyString -> CharSequence")
''' Test for regular expressions in java (result of an interactive session) ''' from java.util.regex import Pattern from java.lang import String p = Pattern.compile("coding[:=]+[\\s]*[\\w[\\-]]+[\\s]*") assert p.matcher(String('coding:foo')).find() assert p.matcher(String('coding: foo ')).find() assert p.matcher(String('coding:foo_1')).find() assert p.matcher(String('coding:foo-1')).find() assert p.matcher(String('coding:foo_1')).find() assert not p.matcher(String('coding foo')).find() assert not p.matcher(String('encoding foo')).find() assert not p.matcher(String('coding')).find() assert not p.matcher(String('coding')).find()
from java.lang import String from com.mercury.topaz.cmdb.shared.model.object.id import CmdbObjectID from appilog.common.system.types import ObjectStateHolder from appilog.common.system.types.vectors import ObjectStateHolderVector from com.hp.ucmdb.discovery.library.clients import ClientsConsts from com.hp.ucmdb.discovery.probe.agents.probemgr.workflow.state import WorkflowStepStatus from com.hp.ucmdb.discovery.probe.agents.probemgr.accuratedependencies.processing import DependenciesDiscoveryConsts PROVIDER_IP = 'PROVIDER_IP' PROVIDER_PORT = 'PROVIDER_PORT' PORT_GROUP_FROM_PATTERN = 1 PORT_PATTERN = Pattern.compile('\s*(\d+)\s*') def StepMain(Framework): consumers = Framework.getProperty(DependenciesDiscoveryConsts.NEXT_HOP_PROVIDERS_RESULT_PROPERTY) OSHVResult = ObjectStateHolderVector() if (consumers is not None) and (consumers.size() > 0): ipPortconcepts = HashMap() localShell = None try: dnsServers = Framework.getParameter('dnsServers') or None if dnsServers: dnsServers = [dnsServer for dnsServer in dnsServers.split(',') if dnsServer and dnsServer.strip()] or None localShell = shellutils.ShellUtils(Framework.createClient(ClientsConsts.LOCAL_SHELL_PROTOCOL_NAME))
Color(0.85, 0.95, 0.85)) _frontPageNoteStyle = StyleSheet.style( Primitive.foreground(Color(0.0, 0.5, 0.0)), Primitive.fontSize(10)) _startupPageNoteBorder = SolidBorder(1.0, 1.0, 3.0, 3.0, Color(0.75, 0.5, 1.0), Color(0.925, 0.9, 0.95)) _startupPageNoteStyle = StyleSheet.style( Primitive.foreground(Color(0.25, 0.0, 0.5)), Primitive.fontSize(10)) _notesRowStyle = StyleSheet.style(Primitive.rowSpacing(10.0)) _notesGap = 15.0 _packageContentsIndentation = 20.0 _packageIcon = Image( Image.getResource('/LarchCore/Project/images/Package.png')) _nameRegex = Pattern.compile('[a-zA-Z_][a-zA-Z0-9_]*') _pythonPackageNameRegex = Pattern.compile( '([a-zA-Z_][a-zA-Z0-9_]*(\\.[a-zA-Z_][a-zA-Z0-9_]*)*)?') def _buildProjectJar(element, document): component = element.getRootElement().getComponent() larchJarURL = app_in_jar.getLarchJarURL() chosenJarURL = None if larchJarURL is None: openDialog = JFileChooser() openDialog.setFileFilter( FileNameExtensionFilter('Larch executable JAR (*.jar)', ['jar'])) response = openDialog.showDialog(component, 'Choose Larch JAR') if response == JFileChooser.APPROVE_OPTION:
# AUTOSCRIPT NAME: EX2POTERMRICHTOPLAIN # CREATEDDATE: 2015-05-21 05:48:28 # CREATEDBY: UVX3 # CHANGEDATE: 2017-01-29 23:04:06 # CHANGEBY: U03V # SCRIPTLANGUAGE: jython # STATUS: Draft from psdi.util import HTML from java.util.regex import Pattern # for POTERM.DESCRIPTION potermdesc = mbo.getString("DESCRIPTION") potermdesc = HTML.toPlainText(potermdesc) replace = Pattern.compile("[^\\p{ASCII}]") matcher2 = replace.matcher(potermdesc.strip()) potermdesc = matcher2.replaceAll("") replace = Pattern.compile("[&`~!?#?Y???[??<?-?}\\\\??]?{????????>????\\[\\]]") matcher2 = replace.matcher(potermdesc.strip()) potermdesc = matcher2.replaceAll("") # for POTERM.DESCRIPTION_LONGDESCRIPTION ldtext = mbo.getString("DESCRIPTION_LONGDESCRIPTION") ldtext = HTML.toPlainText(ldtext) replace = Pattern.compile("[^\\p{ASCII}]") matcher2 = replace.matcher(ldtext.strip()) ldtext = matcher2.replaceAll("") replace = Pattern.compile("[&`~!?#?Y???[??<?-?}\\\\??]?{????????>????\\[\\]]") matcher2 = replace.matcher(ldtext.strip()) ldtext = matcher2.replaceAll("")
def filterRe(self, key, value): if self.compiledRe is None: self.compiledRe = Pattern.compile(self.re) if self.compiledRe.matcher(value).find(): yield key, value
# Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: # # The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. # # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. # from java.util.regex import Pattern from xlr.github.GithubClient import GithubClient g_client = GithubClient(server) g = g_client.get_github_client(locals()) repo = g_client.get_repo(g, organization, repositoryName) file_contents = repo.get_file_contents(filePath, ref="refs/heads/%s" % branch) current_contents = file_contents.content flags = Pattern.MULTILINE ^ Pattern.DOTALL matcher = Pattern.compile(regex, flags).matcher(current_contents) if matcher.find(): new_contents = matcher.replaceAll(replacement) print "Replacing contents of %s/%s (%s) from:\n%s\nto:\n%s" % ( repositoryName, filePath, branch, current_contents, new_contents) result = repo.update_file(filePath, commitMessage, new_contents, file_contents.sha, branch) commitId = result['commit'].sha else: print "Did not find any occurrences of pattern [%s] in content:\n%s" % ( regex, current_contents) commitId = None
class Command: memoryPattern = Pattern.compile("@.+@") # old format is : [request|response]<indice>.<keyword>=<value> oldFormatPattern = Pattern.compile("(\w+\.)\w+") def __init__(self, templateFilePath, templateShortName, templateType): self.templateFilePath = templateFilePath self.templateShortName = templateShortName self.templateType = templateType # Yaml template file self.isYamlTemplate = False # The template in Yaml format self.yamlTemplate = None if templateType in ['string', 'text']: self.templateAsString = templateShortName return # the final template file name file2read = '' if templateType == 'yaml_inline': self.yamlTemplate = templateShortName self.isYamlTemplate = True else: # template in file file2read = '%s/%s.%s' % (self.templateFilePath, self.templateShortName, 'template') self._readTemplate(file2read) #---------------------- # Here we should have a Yaml file loaded in the variable self.yamlTemplate #------------------ # Store the template under the old string format self.templateAsString = self._convertTemplateOldFormat(file2read) def _convertOldFormat(self, index, typeStr, dic): ''' Factorization, for the Yaml conversion to the old format yaml format is : - request: uri: <an uri> body: <a body> headers: key1: value1 key2: value2 response: statuscode: 200 delay_ms: 50 - request: (...) :param index: this is the index sequence of the yaml, the sequence separator is the "-" character for a list in yaml :param typeStr: request or response key :param dic: the sub dictionary for the key request or the key response ''' #request={uri=/context/create, headers={Content-Type=application/json;charset=utf-8}, body={"contextKey" : "msisdn", "value" : 334, "host" : "10.10.153.126", "port" : 3000, "expirationtime": 1456789}}, response={statuscode=200, delay_ms=20} try: dataDict = dict(dic[typeStr]) except: logger.error( 'while loading "%s" - yaml key "%s" has to be at level0 a dictionary !' % (self.templateShortName, typeStr)) raise MySyntaxError( 'while loading "%s" - yaml key "%s" has to be at level0 a dictionary !' % (self.templateShortName, typeStr)) # Look if we have a json content isJson = False if 'headers' in dataDict: for name, value in dict(dataDict['headers']).iteritems(): if name.lower() == 'content-type': isJson = value.lower().find('application/json') >= 0 if logger.isDebugEnabled(): logger.debug('[name=%s][value=%s][isJson=%s]' % (name, value, isJson)) # Solve the problem of empty lines in a SOAP http request if typeStr == 'request': if 'body' in dataDict: dataDict['body'] = '\n'.join([ line for line in dataDict['body'].split('\n') if line.strip() ]) retStr = '' # Old format header separator (pipe) could be overloaded delimiter = dataDict.get('header_separator', '|') for name, value in dataDict.iteritems(): if logger.isTraceEnabled(): logger.trace( '[%s_convertOldFormat] - [name=%s][value=%s][type value=%s]' % (self.__class__.__name__, name, value, type(value))) # very unlikely # Avoid misunderstanding with the yaml format (if people use previous syntax request0.xxx keyword) m = self.oldFormatPattern.matcher(name) if m.find(): name = name.replace(m.group(1), '') if name in ('uri'): value = str(value) if isinstance(value, LinkedHashMap): # Yaml does a strange interpretation of string for Json if you don't have a single quote around your string if name == 'body': value = JSONSerializer.toJSON(value) if isJson else str( value) elif name in 'headers': s = '' for name1, value1 in dict(value).iteritems(): s += '%s:%s%s' % (name1, value1, delimiter) value = s retStr += '%s%d.%s=%s\n' % (typeStr, index, name, value) return retStr def _convertTemplateOldFormat(self, file2read): # now conversion to the old format #----------------------------------------- strTemplate = '' # first we have a list of templates with 2 keys: request and response for i, templateLine in enumerate(self.yamlTemplate): dictTemplateLine = dict(templateLine) if not all(name in ('request', 'response') for name in dictTemplateLine.keys()): logger.error( 'Incorrect format - each level 0 list has to be a dictionary with the keys "request" and "response" while reading "%s"!' % (file2read)) raise MySyntaxError( 'Incorrect format - each level 0 list has to be a dictionary with the keys "request" and "response" while reading "%s"!' % (file2read)) strTemplate += self._convertOldFormat(i, 'request', dictTemplateLine) strTemplate += self._convertOldFormat(i, 'response', dictTemplateLine) logger.trace('_readTemplate[YAML]="%s"' % (strTemplate)) return strTemplate def _readTemplate(self, file2read): ''' evaluate if the template has a yaml format ''' if not self.isYamlTemplate: initialFile = file2read logger.debug('[%s._readTemplate] Looking for file: "%s"' % (self.__class__.__name__, initialFile)) if not os.path.exists(file2read): # check both .yaml & .yml extension file2read = '%s.yaml' % (file2read) logger.debug('[%s._readTemplate] Looking for file: "%s"' % (self.__class__.__name__, file2read)) if os.path.exists(file2read): self.isYamlTemplate = True else: file2read = '%s.yml' % (file2read) logger.debug('[%s._readTemplate] Looking for file: "%s"' % (self.__class__.__name__, file2read)) if os.path.exists(file2read): self.isYamlTemplate = True else: logger.error( '[%s._readTemplate] Template File "%s" doesn\'t exist (even with yaml extension)' % (self.__class__.__name__, initialFile)) raise MySyntaxError( '[%s._readTemplate] Template File "%s" doesn\'t exist (even with yaml extension)' % (self.__class__.__name__, initialFile)) # So the template file exists try: logger.debug('[%s._readTemplate] Reading template file "%s"' % (self.__class__.__name__, file2read)) lines = open(file2read, 'r').readlines() except: logger.error( '[%s._readTemplate] failure opening template file "%s"' % (self.__class__.__name__, file2read)) raise MySyntaxError( '[%s._readTemplate] failure opening template file "%s"' % (self.__class__.__name__, file2read)) # Shebang testing for Yaml format if not self.isYamlTemplate and (lines[0].find('#!yaml') >= 0 or lines[0].find('#!yml') >= 0): self.isYamlTemplate = True if not self.isYamlTemplate: logger.error( '[%s._readTemplate] compatibility issue ! template must be YAML data', (self.__class__.__name__)) raise SyntaxError( '[%s._readTemplate] compatibility issue ! template must be YAML data', (self.__class__.__name__)) # Yaml format: load the string to Yaml if we don't have already if not self.yamlTemplate: yaml = Yaml(Constructor(), Representer(), DumperOptions(), CustomResolver()) try: self.yamlTemplate = yaml.load(''.join(lines).strip()) except (MarkedYAMLException, YAMLException, ParserException, ReaderException, ScannerException), e: logger.error('Error while parsing YAML-file "%s":\n%s' % (file2read, e)) raise MySyntaxError('Error while parsing YAML-file "%s":\n%s' % (file2read, e)) logger.trace("_readTemplate - Loaded Yaml : '''%s'''" % (self.yamlTemplate)) # Templates are list object if not isinstance(list(self.yamlTemplate), list): logger.error( 'Yaml template must be a list of dictionaries while reading "%s"!' % file2read) raise MySyntaxError( 'Yaml template must be a list of dictionaries while reading "%s"!' % file2read)
def getPrefix(self, keyString): p = Pattern.compile("(.+)\\.([0-9]+)\\..+") m = p.matcher(keyString) if m.matches(): return m.group(1) return None
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE IMPLIED WARRANTIES OF MERCHANTABILITY AND/OR FITNESS # FOR A PARTICULAR PURPOSE. THIS CODE AND INFORMATION ARE NOT SUPPORTED BY XEBIALABS. # from org.eclipse.egit.github.core import RepositoryId from org.eclipse.egit.github.core.client import GitHubClient from com.xebialabs.xlrelease.plugin.github import ContentsService from org.eclipse.jgit.util import Base64 from java.lang import String from java.util.regex import Pattern github_client = GitHubClient().setCredentials(gitRepository['username'], gitRepository['password']) repository_id = RepositoryId.createFromUrl(gitRepository['url'].replace('.git', '')) contents_service = ContentsService(github_client) contents_object = contents_service.getContents(repository_id, filePath, branch).get(0) current_contents_bytes = Base64.decode(contents_object.getContent()) current_contents = String(current_contents_bytes, "UTF-8") flags = Pattern.MULTILINE ^ Pattern.DOTALL matcher = Pattern.compile(regex, flags).matcher(current_contents) if matcher.find(): new_contents = matcher.replaceAll(replacement) print "Replacing contents of %s/%s (%s) from:\n%s\nto:\n%s" % (repository_id, filePath, branch, current_contents, new_contents) contents_object.setContent(Base64.encodeBytes(String(new_contents).getBytes("UTF-8"))) commit = contents_service.updateContents(repository_id, contents_object, commitMessage, branch) commitId = commit.getSha() else: print "Did not find any occurrences of pattern [%s] in content:\n%s" % (regex, current_contents) commitId = None
import java.util.regex.Pattern as Pattern queries = [ "this is a test of an email message that specifies the vm name of $(vm.name) with a metric value of $(metric_value[metric_key_id=4].metric_value) with some other text.", "this is a test of an email message that specifies the vm name of $(vm.name) with a metric value of $(metric_value[metric_key_id=4].metric_value) with some other text.", "this is a test of an email message that specifies the vm name of $(vm.name with a metric value of $(metric_value[metric_key_id=4].metric_value) with some other text.", "this is a test of an email message that specifies the vm name of $(vm.name) with a metric value of $(metric_value[metric_key_id=].metric_value) with some other text.", "this is a test of an email message that specifies the vm name of $(vm.name) with a metric value of $(metric_value[metric_key_id].metric_value) with some other text.", ] pattern = Pattern.compile(r"(\$\([\w.\[\]=]+\))") variablePattern = Pattern.compile(r"([\w]+)(\[[\w=]+\]){0,1}\.(\w+)") for query in queries: matcher = pattern.matcher(str(query)) lastMatch = 0 print '------------------------------------------------' while matcher.find(): variable = query[matcher.start() + 2:matcher.end() - 1] print 'variable', variable lastMatch = matcher.end() varMatcher = variablePattern.matcher(str(variable)) if varMatcher.find(): obj = varMatcher.group(1) matchSpec = varMatcher.group(2) value = varMatcher.group(3) print '\tobject type', obj if matchSpec is not None: print '\tmatch spec', matchSpec[1:-1] print '\tfield', value else: print '\tinvalid match spec'
'vm.tags = \'a\' | project vnic', 'vm.tags contains a', 'vm.tags contains \'a\' project vnic', 'vm.tags contains \'a\' | project vnic', 'host.tags contains \'a\' | project vnic', 'cluster.name = \'yomama\' project vm | vm.tags = a', 'cluster.name = \'yomama\' project vm | vm.tags = \'a\' project vnic', 'cluster.name = \'yomama\' project vm | vm.tags = \'a\' | project vnic', 'cluster.name = \'yomama\' project vm | vm.tags contains a', 'cluster.name = \'yomama\' project vm | vm.tags contains \'a\' project vnic', 'cluster.name = \'yomama\' project vm | vm.tags contains \'a\' | project vnic', 'cluster.name = \'yomama\' project host | host.tags contains \'a\' | project vnic', 'type = vm', ] pattern = Pattern.compile("(\\w+)\.tags\s*(=|\\w+)\s*(['\"]{0,1}\w+['\"]{0,1})(\s*\||\s*project){0,1}") for query in queries: matcher = pattern.matcher(str(query)) newStr = '' lastFind = 0 needsSet = False print '------------------------------------------------' print 'old:', query while matcher.find(): if matcher.start() > lastFind: newStr = '{' + newStr #if we are constructing a set, we might need to close it off correctly because of trailing time context. #find where it closes (by a |) and close off the set if needsSet: nextTerm = query.find('|', lastFind) if nextTerm < matcher.start() and nextTerm != -1:
def _extract_lineages(self): lineages: List[KafkaConnectLineage] = list() parser = self.get_parser(self.connector_manifest) source_platform = parser.source_platform database_name = parser.database_name query = parser.query topic_prefix = parser.topic_prefix transforms = parser.transforms self.connector_manifest.flow_property_bag = self.connector_manifest.config instance_name = get_instance_name(self.config, self.connector_manifest.name, source_platform) # Mask/Remove properties that may reveal credentials self.connector_manifest.flow_property_bag[ "connection.url"] = parser.db_connection_url if "connection.password" in self.connector_manifest.flow_property_bag: del self.connector_manifest.flow_property_bag[ "connection.password"] if "connection.user" in self.connector_manifest.flow_property_bag: del self.connector_manifest.flow_property_bag["connection.user"] logging.debug( f"Extracting source platform: {source_platform} and database name: {database_name} from connection url " ) if not self.connector_manifest.topic_names: self.connector_manifest.lineages = lineages return if query: # Lineage source_table can be extracted by parsing query for topic in self.connector_manifest.topic_names: # default method - as per earlier implementation dataset_name: str = get_dataset_name(database_name, instance_name, topic) lineage = KafkaConnectLineage( source_dataset=None, source_platform=source_platform, target_dataset=topic, target_platform="kafka", ) lineages.append(lineage) self.report_warning( self.connector_manifest.name, "could not find input dataset, the connector has query configuration set", ) self.connector_manifest.lineages = lineages return SINGLE_TRANSFORM = len(transforms) == 1 NO_TRANSFORM = len(transforms) == 0 UNKNOWN_TRANSFORM = any([ transform["type"] not in self.KNOWN_TOPICROUTING_TRANSFORMS + self.KNOWN_NONTOPICROUTING_TRANSFORMS for transform in transforms ]) ALL_TRANSFORMS_NON_TOPICROUTING = all([ transform["type"] in self.KNOWN_NONTOPICROUTING_TRANSFORMS for transform in transforms ]) if NO_TRANSFORM or ALL_TRANSFORMS_NON_TOPICROUTING: self.connector_manifest.lineages = self.default_get_lineages( database_name=database_name, source_platform=source_platform, topic_prefix=topic_prefix, instance_name=instance_name, ) return if SINGLE_TRANSFORM and transforms[0]["type"] == self.REGEXROUTER: tables = self.get_table_names() topic_names = list(self.connector_manifest.topic_names) from java.util.regex import Pattern for table in tables: source_table: str = table[-1] topic = topic_prefix + source_table if topic_prefix else source_table transform_regex = Pattern.compile(transforms[0]["regex"]) transform_replacement = transforms[0]["replacement"] matcher = transform_regex.matcher(topic) if matcher.matches(): topic = matcher.replaceFirst(transform_replacement) # Additional check to confirm that the topic present # in connector topics if topic in self.connector_manifest.topic_names: # include schema name for three-level hierarchies if has_three_level_hierarchy( source_platform) and len(table) > 1: source_table = f"{table[-2]}.{table[-1]}" dataset_name = get_dataset_name(database_name, instance_name, source_table) lineage = KafkaConnectLineage( source_dataset=dataset_name, source_platform=source_platform, target_dataset=topic, target_platform="kafka", ) topic_names.remove(topic) lineages.append(lineage) if topic_names: lineages.extend( self.default_get_lineages( database_name=database_name, source_platform=source_platform, topic_prefix=topic_prefix, topic_names=topic_names, include_source_dataset=False, )) self.report_warning( self.connector_manifest.name, f"could not find input dataset, for connector topics {topic_names}", ) self.connector_manifest.lineages = lineages return else: include_source_dataset = True if SINGLE_TRANSFORM and UNKNOWN_TRANSFORM: self.report_warning( self.connector_manifest.name, f"could not find input dataset, connector has unknown transform - {transforms[0]['type']}", ) include_source_dataset = False if not SINGLE_TRANSFORM and UNKNOWN_TRANSFORM: self.report_warning( self.connector_manifest.name, "could not find input dataset, connector has one or more unknown transforms", ) include_source_dataset = False lineages = self.default_get_lineages( database_name=database_name, source_platform=source_platform, topic_prefix=topic_prefix, include_source_dataset=include_source_dataset, instance_name=instance_name, ) self.connector_manifest.lineages = lineages return
return sub; } } return ""; } ``` --- **Method 3: Regular Expression.** Start from the whole of `str1`, keep decreasing the right bound, till we find the gcd. Note for regex: 1. `()` to make the string inside as a group; 2. `+` is quantifier, which means 1 or more of the group ahead of the `+`. . ``` import java.util.regex.Pattern; public String gcdOfStrings(String str1, String str2) { for (int i = str1.length(); i > 0; --i) { String gcd = str1.substring(0, i), ptn = "(" + gcd + ")+"; if (Pattern.compile(ptn).matcher(str1).matches() && Pattern.compile(ptn).matcher(str2).matches()) { return gcd; } } return ""; } ``` **Analysis:** Time: O(n ^ 2), space: O(n), where n = max(str1.length(), str2.length()).