def eh_linha_vazia(self, linha): tester = VerEx().\ start_of_line().\ end_of_line() teste_1 = tester.match(linha) teste_2 = tester.match(VerEx().find(' ').replace(linha, '')) teste_3 = linha == '' return (teste_1 is not None) or (teste_2 is not None) or teste_3
def sentiment_words(self, filename): """ Parameters ---------- filename : str file path for sentiment scores. Represented with a pos, neg, or both score. . Returns ------- Dictionary of sentiment scores for words. """ df = pd.read_table(filename, skiprows=26) df['score'] = df['PosScore'] - df['NegScore'] df = df[['SynsetTerms', 'score']] df.columns = ['words', 'score'] # remove neutral words mask = df['score'] != 0 df = df[mask] # Regex to find number rx1 = re.compile('#([0-9])') # Regex to find words verEx = VerEx() exp = verEx.range('a', 'z', 'A', 'Z') rx2 = re.compile(exp.source()) sent_dict = {} for i, row in df.iterrows(): w = row['words'] s = row['score'] nums = re.findall(rx1, w) w = w.split(' ') words = [] if len(w) == 1: words = ''.join(re.findall(rx2, str(w))) else: words = [''.join(re.findall(rx2, str(string))) for string in w] for nn, ww in zip(nums, words): # only sentiment for the most common meaning of the word if nn == '1': sent_dict[ww] = s return sent_dict
# mylist = [] # if mylist: # print('zzzz') # zzz = ['3'][0] # zzz = str(zzz) # print(zzz) # website = '222222222222222222222' # # # # print(website.startswith('www')) # # if website.startswith('www'): # # project_performance # # print('AAAAAAAAAAAAAAAAA') from verbalexpressions import VerEx verbal_expression = VerEx() # Create a test string replace_me = "Replace bird with a duck" # Create an expression that looks for the word "bird" expression = VerEx().find('bird') # Execute the expression in VerEx result_VerEx = expression.replace(replace_me, 'duck') print(result_VerEx) # Or we can compile and use the regular expression using re import re regexp = expression.compile() result_re = regexp.sub('duck', replace_me)
class VerExTest(unittest.TestCase): ''' Tests for verbal_expressions.py ''' def setUp(self): self.v = VerEx() def tearDown(self): self.v = None self.exp = None def test_should_render_verex_as_string(self): self.assertEquals(str(self.v.add('^$')), '^$') def test_should_match_characters_in_range(self): self.exp = self.v.start_of_line().range('a', 'c').regex() for character in ['a', 'b', 'c']: self.assertRegexpMatches(character, self.exp) def test_should_not_match_characters_outside_of_range(self): self.exp = self.v.start_of_line().range('a', 'c').regex() self.assertNotRegexpMatches('d', self.exp) def test_should_match_characters_in_extended_range(self): self.exp = self.v.start_of_line().range('a', 'b', 'X', 'Z').regex() for character in ['a', 'b']: self.assertRegexpMatches(character, self.exp) for character in ['X', 'Y', 'Z']: self.assertRegexpMatches(character, self.exp) def test_should_not_match_characters_outside_of_extended_range(self): self.exp = self.v.start_of_line().range('a', 'b', 'X', 'Z').regex() self.assertNotRegexpMatches('c', self.exp) self.assertNotRegexpMatches('W', self.exp) def test_should_match_start_of_line(self): self.exp = self.v.start_of_line().regex() self.assertRegexpMatches('text ', self.exp, 'Not started :(') def test_should_match_end_of_line(self): self.exp = self.v.start_of_line().end_of_line().regex() self.assertRegexpMatches('', self.exp, 'It\'s not the end!') def test_should_match_anything(self): self.exp = self.v.start_of_line().anything().end_of_line().regex() self.assertRegexpMatches('!@#$%¨&*()__+{}', self.exp, 'Not so anything...') def test_should_match_anything_but_specified_element_when_element_is_not_found(self): self.exp = self.v.start_of_line().anything_but('X').end_of_line().regex() self.assertRegexpMatches('Y Files', self.exp, 'Found the X!') def test_should_not_match_anything_but_specified_element_when_specified_element_is_found(self): self.exp = self.v.start_of_line().anything_but('X').end_of_line().regex() self.assertNotRegexpMatches('VerEX', self.exp, 'Didn\'t found the X :(') def test_should_find_element(self): self.exp = self.v.start_of_line().find('Wally').end_of_line().regex() self.assertRegexpMatches('Wally', self.exp, '404! Wally not Found!') def test_should_not_find_missing_element(self): self.exp = self.v.start_of_line().find('Wally').end_of_line().regex() self.assertNotRegexpMatches('Wall-e', self.exp, 'DAFUQ is Wall-e?') def test_should_match_when_maybe_element_is_present(self): self.exp = self.v.start_of_line().find('Python2.').maybe('7').end_of_line().regex() self.assertRegexpMatches('Python2.7', self.exp, 'Version doesn\'t match!') def test_should_match_when_maybe_element_is_missing(self): self.exp = self.v.start_of_line().find('Python2.').maybe('7').end_of_line().regex() self.assertRegexpMatches('Python2.', self.exp, 'Version doesn\'t match!') def test_should_match_on_any_when_element_is_found(self): self.exp = self.v.start_of_line().any('Q').anything().end_of_line().regex() self.assertRegexpMatches('Query', self.exp, 'No match found!') def test_should_not_match_on_any_when_element_is_not_found(self): self.exp = self.v.start_of_line().any('Q').anything().end_of_line().regex() self.assertNotRegexpMatches('W', self.exp, 'I\'ve found it!') def test_should_match_when_line_break_present(self): self.exp = self.v.start_of_line().anything().line_break().anything().end_of_line().regex() self.assertRegexpMatches('Marco \n Polo', self.exp, 'Give me a break!!') def test_should_match_when_line_break_and_carriage_return_present(self): self.exp = self.v.start_of_line().anything().line_break().anything().end_of_line().regex() self.assertRegexpMatches('Marco \r\n Polo', self.exp, 'Give me a break!!') def test_should_not_match_when_line_break_is_missing(self): self.exp = self.v.start_of_line().anything().line_break().anything().end_of_line().regex() self.assertNotRegexpMatches('Marco Polo', self.exp, 'There\'s a break here!') def test_should_match_when_tab_present(self): self.exp = self.v.start_of_line().anything().tab().end_of_line().regex() self.assertRegexpMatches('One tab only ', self.exp, 'No tab here!') def test_should_not_match_when_tab_is_missing(self): self.exp = self.v.start_of_line().anything().tab().end_of_line().regex() self.assertFalse(re.match(self.exp, 'No tab here'), 'There\'s a tab here!') def test_should_match_when_word_present(self): self.exp = self.v.start_of_line().anything().word().end_of_line().regex() self.assertRegexpMatches('Oneword', self.exp, 'Not just a word!') def test_not_match_when_two_words_are_present_instead_of_one(self): self.exp = self.v.start_of_line().anything().tab().end_of_line().regex() self.assertFalse(re.match(self.exp, 'Two words'), 'I\'ve found two of them') def test_should_match_when_or_condition_fulfilled(self): self.exp = self.v.start_of_line().anything().find('G').OR().find('h').end_of_line().regex() self.assertRegexpMatches('Github', self.exp, 'Octocat not found') def test_should_not_match_when_or_condition_not_fulfilled(self): self.exp = self.v.start_of_line().anything().find('G').OR().find('h').end_of_line().regex() self.assertFalse(re.match(self.exp, 'Bitbucket'), 'Bucket not found') def test_should_match_on_upper_case_when_lower_case_is_given_and_any_case_is_true(self): self.exp = self.v.start_of_line().find('THOR').end_of_line().with_any_case(True).regex() self.assertRegexpMatches('thor', self.exp, 'Upper case Thor, please!') def test_should_match_multiple_lines(self): self.exp = self.v.start_of_line().anything().find('Pong').anything().end_of_line().search_one_line(True).regex() self.assertRegexpMatches('Ping \n Pong \n Ping', self.exp, 'Pong didn\'t answer') def test_should_match_email_address(self): self.exp = self.v.start_of_line().word().then('@').word().then('.').word().end_of_line().regex() self.assertRegexpMatches('*****@*****.**', self.exp, 'Not a valid email') def test_should_match_url(self): self.exp = self.v.start_of_line().then('http').maybe('s').then('://').maybe('www.').word().then('.').word().maybe('/').end_of_line().regex() self.assertRegexpMatches('https://www.google.com/', self.exp, 'Not a valid email') def test_should_find_number(self): self.exp = self.v.start_of_line().number().end_of_line().regex() self.assertRegexpMatches('123', self.exp, 'Number not found') def test_word_should_find_named_groups(self): name = "Linus Torvalds" self.exp = self.v.start_of_line().word(name='first_name').then(' ').word(name='last_name').end_of_line().regex() match = self.exp.match(name) self.assertIsNotNone(match) self.assertEquals(match.group('first_name'), 'Linus') self.assertEquals(match.group('last_name'), 'Torvalds') def test_number_should_find_named_groups(self): self.exp = self.v.start_of_line().number('number').end_of_line().regex() match = self.exp.match('123') self.assertIsNotNone(match, self.exp.pattern) self.assertEquals(match.group('number'), '123')
def setUp(self): self.v = VerEx()
#!/usr/bin/env python # coding: utf-8 # In[198]: from verbalexpressions import VerEx # In[199]: verEx = VerEx() # In[200]: strings = ['123Abdul233', '233Raja434', '223Ethan Hunt444'] # In[201]: expression = verEx.range('a', 'z', 'A', 'Z', ' ') # In[202]: expression.source() # In[204]: import re re_exp = expression.compile() # In[205]:
import os import subprocess import sys from mdx_gfm import GithubFlavoredMarkdownExtension DEBUG_MODE = json.loads(sys.argv[1].lower()) def log(s: str): print(" >> %s" % s) url = 'file:///android_asset/' # pre hyperlink fixer re_disable_link = (VerEx().find("[").anything_but("]").find( "](http").anything_but(")").find(")").regex()) to_disable_link = r'\2' # pre hyperlink fixer re_fix_innerlink = (VerEx().find("[").anything_but("]").find( "](#").anything_but(")").find(")").regex()) # %s will be self markdown name to_fix_innerlink = r'[\2](%s#\4)' # fix hyperlink re_fix_linkpath = (VerEx().find("[").anything_but("]").find("](").anything_but( '#').maybe("#").anything_but(")").find(")").regex()) to_fix_linkpath = r'[\2](%s\4.html\5\6)' % url # img-tag to a-tag re_img2a = (VerEx().find("<img").anything().find(' src="').anything().find(
# -*- coding:utf-8 -*- import re from verbalexpressions import VerEx # Create an example of how to test for correctly formed URLs verbal_expression = VerEx() # word = (verbal_expression.start_of_line('#').anything_but("#").end_of_line("#")) # print(word.source()) verbal_expression = VerEx() tester = (verbal_expression. start_of_line(). find('http'). maybe('s'). find('://'). maybe('www.'). anything_but(' '). end_of_line() ) # Create an example URL # test_url = "https://www.google.com" # Test if the URL is valid # if tester.match(test_url): # print("Valid URL") # Print the generated regex print(tester.source()) # => ^(http)(s)?(\:\/\/)(www\.)?([^\ ]*)$
parser.add_argument("--mode", choices=['basic', 'website'], default='basic', help="Set the fetch mode to be used.") parser.add_argument(dest="uri", help="Target URI to be fetched") # Process arguments args = parser.parse_args() path = args.path if not os.path.isdir(path): raise Exception("Invalid Path: " + str(path)) ua = str(args.ua) verbal_expression = VerEx() tester = (verbal_expression.start_of_line().find('http').maybe( 's').find('://').maybe('www.').anything_but(' ').end_of_line()) uri = args.uri if not tester.match(uri): raise Exception("Invalid URI: " + str(uri) + " \n Hint - matching on: " + str(tester.source())) verbose = args.verbose user = getpass.getuser() # Logging logger = logging.getLogger('BoomLog') logger.setLevel(LOG_LEVEL) log_handler = RotatingFileHandler(LOGGING_PATH, maxBytes=100000)
import markdown import os import subprocess import sys DEBUG_MODE = json.loads(sys.argv[1].lower()) def log(s: str): print(" >> %s" % s) url = 'file:///android_asset/' # pre hyperlink fixer re_disable_link = (VerEx().find("[").anything_but("]").find( "](http").anything_but(")").find(")").regex()) to_disable_link = r'\2' # pre hyperlink fixer re_fix_innerlink = (VerEx().find("[").anything_but("]").find( "](#").anything_but(")").find(")").regex()) # %s will be self markdown name to_fix_innerlink = r'[\2](%s#\4)' # fix hyperlink re_fix_linkpath = (VerEx().find("[").anything_but("]").find("](").anything_but( '#').maybe("#").anything_but(")").find(")").regex()) to_fix_linkpath = r'[\2](%s\4.html\5\6)' % url # img-tag to a-tag re_img2a = (VerEx().find("<img").anything().find(' src="').anything().find(