Пример #1
0
 def filter(self, tree, col, descending):
     print('Row: {} & Column: {} '.format(
         re.sub(
             'I00', '',
             str(
                 tree.identify_row(tree.winfo_pointerxy()[1] -
                                   tree.winfo_rooty()))),
         re.sub(
             r'#', '',
             str(
                 tree.identify_column(tree.winfo_pointerxy()[0] -
                                      tree.winfo_rootx())))))
def clean_tweet(tweet):
    '''
    Utility function to clean tweet text by removing links, special characters using simple regex statements.
    '''
    return ' '.join(
        re.sub("(@[A-Za-z0-9]+)|([^0-9A-Za-z \t])|(\w+:\/\/\S+)", " ",
               tweet).split())
Пример #3
0
 def process(self, article):
     # your code for each article goes here
     page = self.client.pages[article]
     text = page.text()
     if self.regex:
         text = re.sub(self.search, self.replace, text)
     else:
         text = text.replace(self.search, self.replace)
     page.edit(text, self.reason)
Пример #4
0
    def mostCommonWordCounter(self, paragraph: str, banned: List[str]) -> str:
        """
        Runtime: 36 ms
        Memory Usage: 14.2 MB
        """
        paragraph = re.sub(r'[^\w]', ' ', paragraph).lower().split()

        words = [word for word in paragraph if word not in banned]

        count = collections.Counter(words)

        return count.most_common(1)[0][0]
Пример #5
0
    def mostCommonWord(self, paragraph: str, banned: List[str]) -> str:
        """
        Runtime: 28 ms
        Memory Usage: 14.3 MB
        """
        paragraph = re.sub(r'[^\w]', ' ', paragraph).lower().split()

        words = [word for word in paragraph if word not in banned]

        count = {}
        for word in words:
            if word in count.keys():
                count[word] += 1
            else:
                count[word] = 1

        count = sorted(count.items(), key=lambda x: x[1])

        return count[-1][0]
Пример #6
0
# defaultdict 딕셔너리 - {"key" : value}
counts = collections.defaultdict(int)  # default 가 int
max(counts, key=counts.get)  # 딕셔너리 안에서 max 값 구하기

anagrams = collections.defaultdict(list)
# defaultdict(<class 'list'>, {'aet': ['eat', 'tea', 'ate'], 'ant': ['tan', 'nat'], 'abt': ['bat']})
anagrams[''.join(sorted("ate"))].append(
    "ate")  # 정렬된 값이 key로 데이터가 들어간다. key가 같으면 value에 추가된다.

# 정규표현식을 사용하여 단어 문자인치 체크
paragraph = "Bob hit a ball, the hit BALL flew far after it was hit."
banned = ["hit"]

# 단어 문자가 아닌 모든 문자를 공백으로 치환한다.
words = [
    word for word in re.sub(r'[\W]', ' ', paragraph).lower().split()
    if word not in banned
]  # 금지단어 제외

# Counter 을 사용하여 리스트에 있는것을 key 값 : 개수 로 변환하기
counts = collections.Counter(words)
print(
    counts
)  # Counter({'ball': 2, 'bob': 1, 'a': 1, 'the': 1, 'flew': 1, 'far': 1, 'after': 1, 'it': 1, 'was': 1})
# most_common(1) : 가장 흔하게 등장하는 단어의 첫 번째 값 추출 : [('ball', 2)]
counts.most_common(1)[0][0]  # ball

# 리스트를 인덱스, 값으로 꺼내는 방법
for i, n in enumerate(param_list):
    pass
Пример #7
0
def extract_words(sentence):
    useless = ['a', 'is', 'the', 'an']
    words = re.sub(r"[.,:()!?~=\'@#$%^&*_+-/\\`<>\"\n123456789]", " ",
                   sentence).split()
    return [w.lower() for w in words if w not in useless]
Пример #8
0
    def isPalindrome(self, s: str) -> bool:
        s = s.lower()
        s = re.sub('[^a-z0-9]', '', s)

        return s == s[::-1]
def isPalindrome(self, s: str) -> bool:
    s = s.lower()
    # 정규식으로 불필요한 문자 필터링
    s = re.sub('[^a-z0-9]', '', s)
    return s == s[::-1]  # 슬라이싱