Python VerEx示例，verbalexpressions.VerEx Python示例

示例#1

0

显示文件

 def eh_linha_vazia(self, linha):
     tester = VerEx().\
      start_of_line().\
      end_of_line()
     teste_1 = tester.match(linha)
     teste_2 = tester.match(VerEx().find(' ').replace(linha, ''))
     teste_3 = linha == ''
     return (teste_1 is not None) or (teste_2 is not None) or teste_3

示例#2

0

显示文件

文件： preprocess.py 项目： ramyabuva/ML4UVA-Business-Closures

    def sentiment_words(self, filename):
        """
        Parameters
        ----------
        filename : str
            file path for sentiment scores. Represented with a pos, neg, or both score. .

        Returns
        -------
        Dictionary of sentiment scores for words.
        
        """
        
        df = pd.read_table(filename, skiprows=26)
        df['score'] = df['PosScore'] - df['NegScore']
        df = df[['SynsetTerms', 'score']]
        df.columns = ['words', 'score']

        # remove neutral words
        mask = df['score'] != 0
        df = df[mask]

        # Regex to find number
        rx1 = re.compile('#([0-9])')
        
        
        # Regex to find words
        verEx = VerEx()
        exp = verEx.range('a', 'z', 'A', 'Z')
        rx2 = re.compile(exp.source())
        
        sent_dict = {}
        for i, row in df.iterrows():
            w = row['words']
            s = row['score']
            nums = re.findall(rx1, w)
            
            w = w.split(' ')
            words = []
            if len(w) == 1:
                words = ''.join(re.findall(rx2, str(w)))
            else:
                words = [''.join(re.findall(rx2, str(string))) for string in w]
                
                
            for nn, ww in zip(nums, words):
                # only sentiment for the most common meaning of the word
                if nn == '1':
                    sent_dict[ww] = s

        return sent_dict

示例#3

0

显示文件

文件： Block_Chain.py 项目： satori-koishi/Spider_All

# mylist = []
# if mylist:
#     print('zzzz')
# zzz = ['3'][0]
# zzz = str(zzz)
# print(zzz)
# website = '222222222222222222222'
# #
# # print(website.startswith('www'))
# # if website.startswith('www'):
# #     project_performance
# #     print('AAAAAAAAAAAAAAAAA')

from verbalexpressions import VerEx
verbal_expression = VerEx()

# Create a test string
replace_me = "Replace bird with a duck"

# Create an expression that looks for the word "bird"
expression = VerEx().find('bird')

# Execute the expression in VerEx
result_VerEx = expression.replace(replace_me, 'duck')
print(result_VerEx)

# Or we can compile and use the regular expression using re
import re
regexp = expression.compile()
result_re = regexp.sub('duck', replace_me)

示例#4

0

显示文件

class VerExTest(unittest.TestCase):
    '''
        Tests for verbal_expressions.py
    '''

    def setUp(self):
        self.v = VerEx()

    def tearDown(self):
        self.v = None
        self.exp = None

    def test_should_render_verex_as_string(self):
        self.assertEquals(str(self.v.add('^$')), '^$')

    def test_should_match_characters_in_range(self):
        self.exp = self.v.start_of_line().range('a', 'c').regex()
        for character in ['a', 'b', 'c']:
            self.assertRegexpMatches(character, self.exp)

    def test_should_not_match_characters_outside_of_range(self):
        self.exp = self.v.start_of_line().range('a', 'c').regex()
        self.assertNotRegexpMatches('d', self.exp)

    def test_should_match_characters_in_extended_range(self):
        self.exp = self.v.start_of_line().range('a', 'b', 'X', 'Z').regex()
        for character in ['a', 'b']:
            self.assertRegexpMatches(character, self.exp)
        for character in ['X', 'Y', 'Z']:
            self.assertRegexpMatches(character, self.exp)

    def test_should_not_match_characters_outside_of_extended_range(self):
        self.exp = self.v.start_of_line().range('a', 'b', 'X', 'Z').regex()
        self.assertNotRegexpMatches('c', self.exp)
        self.assertNotRegexpMatches('W', self.exp)


    def test_should_match_start_of_line(self):
        self.exp = self.v.start_of_line().regex()
        self.assertRegexpMatches('text  ', self.exp, 'Not started :(')

    def test_should_match_end_of_line(self):
        self.exp = self.v.start_of_line().end_of_line().regex()
        self.assertRegexpMatches('', self.exp, 'It\'s not the end!')

    def test_should_match_anything(self):
        self.exp = self.v.start_of_line().anything().end_of_line().regex()
        self.assertRegexpMatches('!@#$%¨&*()__+{}', self.exp, 'Not so anything...')

    def test_should_match_anything_but_specified_element_when_element_is_not_found(self):
        self.exp = self.v.start_of_line().anything_but('X').end_of_line().regex()
        self.assertRegexpMatches('Y Files', self.exp, 'Found the X!')

    def test_should_not_match_anything_but_specified_element_when_specified_element_is_found(self):
        self.exp = self.v.start_of_line().anything_but('X').end_of_line().regex()
        self.assertNotRegexpMatches('VerEX', self.exp, 'Didn\'t found the X :(')

    def test_should_find_element(self):
        self.exp = self.v.start_of_line().find('Wally').end_of_line().regex()
        self.assertRegexpMatches('Wally', self.exp, '404! Wally not Found!')

    def test_should_not_find_missing_element(self):
        self.exp = self.v.start_of_line().find('Wally').end_of_line().regex()
        self.assertNotRegexpMatches('Wall-e', self.exp, 'DAFUQ is Wall-e?')

    def test_should_match_when_maybe_element_is_present(self):
        self.exp = self.v.start_of_line().find('Python2.').maybe('7').end_of_line().regex()
        self.assertRegexpMatches('Python2.7', self.exp, 'Version doesn\'t match!')

    def test_should_match_when_maybe_element_is_missing(self):
        self.exp = self.v.start_of_line().find('Python2.').maybe('7').end_of_line().regex()
        self.assertRegexpMatches('Python2.', self.exp, 'Version doesn\'t match!')

    def test_should_match_on_any_when_element_is_found(self):
        self.exp = self.v.start_of_line().any('Q').anything().end_of_line().regex()
        self.assertRegexpMatches('Query', self.exp, 'No match found!')

    def test_should_not_match_on_any_when_element_is_not_found(self):
        self.exp = self.v.start_of_line().any('Q').anything().end_of_line().regex()
        self.assertNotRegexpMatches('W', self.exp, 'I\'ve found it!')

    def test_should_match_when_line_break_present(self):
        self.exp = self.v.start_of_line().anything().line_break().anything().end_of_line().regex()
        self.assertRegexpMatches('Marco \n Polo', self.exp, 'Give me a break!!')

    def test_should_match_when_line_break_and_carriage_return_present(self):
        self.exp = self.v.start_of_line().anything().line_break().anything().end_of_line().regex()
        self.assertRegexpMatches('Marco \r\n Polo', self.exp, 'Give me a break!!')

    def test_should_not_match_when_line_break_is_missing(self):
        self.exp = self.v.start_of_line().anything().line_break().anything().end_of_line().regex()
        self.assertNotRegexpMatches('Marco Polo', self.exp, 'There\'s a break here!')

    def test_should_match_when_tab_present(self):
        self.exp = self.v.start_of_line().anything().tab().end_of_line().regex()
        self.assertRegexpMatches('One tab only	', self.exp, 'No tab here!')

    def test_should_not_match_when_tab_is_missing(self):
        self.exp = self.v.start_of_line().anything().tab().end_of_line().regex()
        self.assertFalse(re.match(self.exp, 'No tab here'), 'There\'s a tab here!')

    def test_should_match_when_word_present(self):
        self.exp = self.v.start_of_line().anything().word().end_of_line().regex()
        self.assertRegexpMatches('Oneword', self.exp, 'Not just a word!')

    def test_not_match_when_two_words_are_present_instead_of_one(self):
        self.exp = self.v.start_of_line().anything().tab().end_of_line().regex()
        self.assertFalse(re.match(self.exp, 'Two words'), 'I\'ve found two of them')

    def test_should_match_when_or_condition_fulfilled(self):
        self.exp = self.v.start_of_line().anything().find('G').OR().find('h').end_of_line().regex()
        self.assertRegexpMatches('Github', self.exp, 'Octocat not found')

    def test_should_not_match_when_or_condition_not_fulfilled(self):
        self.exp = self.v.start_of_line().anything().find('G').OR().find('h').end_of_line().regex()
        self.assertFalse(re.match(self.exp, 'Bitbucket'), 'Bucket not found')

    def test_should_match_on_upper_case_when_lower_case_is_given_and_any_case_is_true(self):
        self.exp = self.v.start_of_line().find('THOR').end_of_line().with_any_case(True).regex()
        self.assertRegexpMatches('thor', self.exp, 'Upper case Thor, please!')

    def test_should_match_multiple_lines(self):
        self.exp = self.v.start_of_line().anything().find('Pong').anything().end_of_line().search_one_line(True).regex()
        self.assertRegexpMatches('Ping \n Pong \n Ping', self.exp, 'Pong didn\'t answer')

    def test_should_match_email_address(self):
        self.exp = self.v.start_of_line().word().then('@').word().then('.').word().end_of_line().regex()
        self.assertRegexpMatches('*****@*****.**', self.exp, 'Not a valid email')

    def test_should_match_url(self):
        self.exp = self.v.start_of_line().then('http').maybe('s').then('://').maybe('www.').word().then('.').word().maybe('/').end_of_line().regex()
        self.assertRegexpMatches('https://www.google.com/', self.exp, 'Not a valid email')
        
    def test_should_find_number(self):
        self.exp = self.v.start_of_line().number().end_of_line().regex()
        self.assertRegexpMatches('123', self.exp, 'Number not found')
        
    def test_word_should_find_named_groups(self):
        name = "Linus Torvalds"
        self.exp = self.v.start_of_line().word(name='first_name').then(' ').word(name='last_name').end_of_line().regex()
        match = self.exp.match(name)
        self.assertIsNotNone(match)
        self.assertEquals(match.group('first_name'), 'Linus')
        self.assertEquals(match.group('last_name'), 'Torvalds')
    
    def test_number_should_find_named_groups(self):
        self.exp = self.v.start_of_line().number('number').end_of_line().regex()
        match = self.exp.match('123')
        self.assertIsNotNone(match, self.exp.pattern)
        self.assertEquals(match.group('number'), '123')

示例#5

0

显示文件

 def setUp(self):
     self.v = VerEx()

示例#6

0

显示文件

#!/usr/bin/env python
# coding: utf-8

# In[198]:

from verbalexpressions import VerEx

# In[199]:

verEx = VerEx()

# In[200]:

strings = ['123Abdul233', '233Raja434', '223Ethan Hunt444']

# In[201]:

expression = verEx.range('a', 'z', 'A', 'Z', ' ')

# In[202]:

expression.source()

# In[204]:

import re

re_exp = expression.compile()

# In[205]:

示例#7

0

显示文件

文件： document.py 项目： dooteeen/QuickFlick

import os
import subprocess
import sys
from mdx_gfm import GithubFlavoredMarkdownExtension

DEBUG_MODE = json.loads(sys.argv[1].lower())


def log(s: str):
    print(" >> %s" % s)


url = 'file:///android_asset/'

# pre hyperlink fixer
re_disable_link = (VerEx().find("[").anything_but("]").find(
    "](http").anything_but(")").find(")").regex())
to_disable_link = r'\2'

# pre hyperlink fixer
re_fix_innerlink = (VerEx().find("[").anything_but("]").find(
    "](#").anything_but(")").find(")").regex())
# %s will be self markdown name
to_fix_innerlink = r'[\2](%s#\4)'

# fix hyperlink
re_fix_linkpath = (VerEx().find("[").anything_but("]").find("](").anything_but(
    '#').maybe("#").anything_but(")").find(")").regex())
to_fix_linkpath = r'[\2](%s\4.html\5\6)' % url

# img-tag to a-tag
re_img2a = (VerEx().find("<img").anything().find(' src="').anything().find(

示例#8

0

显示文件

# -*- coding:utf-8 -*-
import re
from verbalexpressions import VerEx

# Create an example of how to test for correctly formed URLs

verbal_expression = VerEx()
# word = (verbal_expression.start_of_line('#').anything_but("#").end_of_line("#"))
# print(word.source())

verbal_expression = VerEx()
tester = (verbal_expression.
          start_of_line().
          find('http').
          maybe('s').
          find('://').
          maybe('www.').
          anything_but(' ').
          end_of_line()
          )

# Create an example URL
# test_url = "https://www.google.com"

# Test if the URL is valid
# if tester.match(test_url):
#     print("Valid URL")

# Print the generated regex
print(tester.source())  # => ^(http)(s)?(\:\/\/)(www\.)?([^\ ]*)$

示例#9

0

显示文件

        parser.add_argument("--mode",
                            choices=['basic', 'website'],
                            default='basic',
                            help="Set the fetch mode to be used.")
        parser.add_argument(dest="uri", help="Target URI to be fetched")

        # Process arguments
        args = parser.parse_args()

        path = args.path
        if not os.path.isdir(path):
            raise Exception("Invalid Path: " + str(path))

        ua = str(args.ua)

        verbal_expression = VerEx()
        tester = (verbal_expression.start_of_line().find('http').maybe(
            's').find('://').maybe('www.').anything_but(' ').end_of_line())

        uri = args.uri
        if not tester.match(uri):
            raise Exception("Invalid URI: " + str(uri) +
                            " \n Hint - matching on: " + str(tester.source()))
        verbose = args.verbose

        user = getpass.getuser()

        # Logging
        logger = logging.getLogger('BoomLog')
        logger.setLevel(LOG_LEVEL)
        log_handler = RotatingFileHandler(LOGGING_PATH, maxBytes=100000)

示例#10

0

显示文件

import markdown
import os
import subprocess
import sys

DEBUG_MODE = json.loads(sys.argv[1].lower())


def log(s: str):
    print(" >> %s" % s)


url = 'file:///android_asset/'

# pre hyperlink fixer
re_disable_link = (VerEx().find("[").anything_but("]").find(
    "](http").anything_but(")").find(")").regex())
to_disable_link = r'\2'

# pre hyperlink fixer
re_fix_innerlink = (VerEx().find("[").anything_but("]").find(
    "](#").anything_but(")").find(")").regex())
# %s will be self markdown name
to_fix_innerlink = r'[\2](%s#\4)'

# fix hyperlink
re_fix_linkpath = (VerEx().find("[").anything_but("]").find("](").anything_but(
    '#').maybe("#").anything_but(")").find(")").regex())
to_fix_linkpath = r'[\2](%s\4.html\5\6)' % url

# img-tag to a-tag
re_img2a = (VerEx().find("<img").anything().find(' src="').anything().find(