Exemplo n.º 1
0
---------
"""


# Stdlib:
import re
import unicodedata


# Internal:
from munin.provider import Provider
import munin.stopwords

# External
import guess_language
guess_language.use_enchant(True)


def normalize_unicode_glyphs(string):
    return unicodedata.normalize('NFKC', string)


def strip_stopwords(words):
    text = ' '.join(words)
    language_code = guess_language.guess_language(text)
    if language_code == 'UNKNOWN':
        return words

    stopwords = munin.stopwords.load_stopwords(language_code)
    if not stopwords:
        return words
Exemplo n.º 2
0
Reference
---------
"""

# Stdlib:
import re
import unicodedata

# Internal:
from munin.provider import Provider
import munin.stopwords

# External
import guess_language

guess_language.use_enchant(True)


def normalize_unicode_glyphs(string):
    return unicodedata.normalize('NFKC', string)


def strip_stopwords(words):
    text = ' '.join(words)
    language_code = guess_language.guess_language(text)
    if language_code == 'UNKNOWN':
        return words

    stopwords = munin.stopwords.load_stopwords(language_code)
    if not stopwords:
        return words
Exemplo n.º 3
0
import glob
import itertools
import os
import subprocess
import re
import sys
import time
import winreg

import pywintypes
import win32api
import win32gui
import psutil

import guess_language
guess_language.use_enchant(False)

import sublib
import common

from util.update_text import decompose_opcodes, undo_space_changes
from util.detect_encoding import detect_encoding


def get_name(path):
    return os.path.splitext(os.path.basename(path))[0]


def poll_window(path, app_name, timeout=30):
    class Checker:
        def __init__(self, filename, app_name):
Exemplo n.º 4
0
def find_title(ltit, otit, ltop, otop):
    '''
    Selects a random category.
    Selects a topic on that category.
    If the topic was used recently the previous steps are repeated.
    Removes the first used category.
    Selects a second random category.
    Removes topics in category 2 present on category 1.
    Selects a second random topic from the filtered topics list.
    Check topic not recently used.
    Gets a title from the selected second topic.
    Replaces the second topic in the title with the first topic.
    Checks that the generated title wasn't tweeted recently.
    Saves the original titles file.
    Returns the new title.
    '''
    use_enchant(True)
    categories = ['w', 'n', 'b', 't', 'e', 's']
    valid_topic = False
    count = 0
    while not valid_topic:
        category = random.choice(categories)
        topics_1 = get_topics(category)
        while len(topics_1) > 1 and not valid_topic:
            topic_1 = random.choice(topics_1)
            topics_1.remove(topic_1)
            if topic_1['name'] not in ltop:
                valid_topic = True
            else:
                log.info('Skipping recently used topic: %s',
                         topic_1['name'])
        count += 1
        if count >= 100:
            log.warning('Exited script with no new topics found')

    categories.remove(category)
    log.info('Category 1:  %s', category)
    log.info('Topic 1: %s', topic_1['name'])

    category = random.choice(categories)
    log.info('Category 2:  %s', category)
    topics_2 = get_topics(category)
    for top_1 in topics_1:
        for top_2 in topics_2:
            if top_1['name'] == top_2['name']:
                topics_2.remove(top_2)
                log.info('Remove duplicate topic: %s', top_2['name'])

    for i, top_2 in enumerate(topics_2):
        if topic_1['name'] == top_2['name']:
            topics_2.pop(i)
            log.info('Removed chosen topic 1 from topic list 2: %s',
                     topic_1['name'])
        elif top_2['name'] in otop:
            topics_2.pop(i)
            log.info('Removed recently used topic as original: %s',
                     top_2['name'])
    if len(topics_2) > 0:
        topic_2 = random.choice(topics_2)
    else:
        log.info('No topics left after filtering')
        return False
    log.info('Topic 2: %s', topic_2['name'])

    headline = get_headline(topic_2, otit, topic_1['name'])
    if headline is not False:
        log.info('Original title: %s', headline)
        regex = re.compile(r'\b{0}\b'.format(topic_2['name']), re.IGNORECASE)
        new_headline = re.sub(regex, topic_1['name'], headline, count=1)
        if new_headline not in ltit:
            otit.append(headline)
            ltop.append(topic_1['name'])
            otop.append(topic_2['name'])
            with open('original_titles.txt', 'w') as original_titles:
                json.dump(otit[-240:], original_titles, ensure_ascii=False)
            with open('last_topics.txt', 'w') as last_topics:
                json.dump(ltop[-40:], last_topics, ensure_ascii=False)
            with open('original_topics.txt', 'w') as original_topics:
                json.dump(otop[-30:], original_topics, ensure_ascii=False)
            return new_headline
        else:
            log.info('Generated headline tweeted recently: %s', headline)
            return False
    else:
        log.info('Invalid headline discarded')
        return False
    log.warning('Script should not reach this point')
    return False
Exemplo n.º 5
0
def find_title(ltit, otit, ltop, otop):
    '''
    Selects a random category.
    Selects a topic on that category.
    If the topic was used recently the previous steps are repeated.
    Removes the first used category.
    Selects a second random category.
    Removes topics in category 2 present on category 1.
    Selects a second random topic from the filtered topics list.
    Check topic not recently used.
    Gets a title from the selected second topic.
    Replaces the second topic in the title with the first topic.
    Checks that the generated title wasn't tweeted recently.
    Saves the original titles file.
    Returns the new title.
    '''
    use_enchant(True)
    categories = ['w', 'n', 'b', 't', 'e', 's']
    valid_topic = False
    count = 0
    while not valid_topic:
        category = random.choice(categories)
        topics_1 = get_topics(category)
        while len(topics_1) > 1 and not valid_topic:
            topic_1 = random.choice(topics_1)
            topics_1.remove(topic_1)
            if topic_1['name'] not in ltop:
                valid_topic = True
            else:
                log.info('Skipping recently used topic: %s', topic_1['name'])
        count += 1
        if count >= 100:
            log.warning('Exited script with no new topics found')

    categories.remove(category)
    log.info('Category 1:  %s', category)
    log.info('Topic 1: %s', topic_1['name'])

    category = random.choice(categories)
    log.info('Category 2:  %s', category)
    topics_2 = get_topics(category)
    for top_1 in topics_1:
        for top_2 in topics_2:
            if top_1['name'] == top_2['name']:
                topics_2.remove(top_2)
                log.info('Remove duplicate topic: %s', top_2['name'])

    for i, top_2 in enumerate(topics_2):
        if topic_1['name'] == top_2['name']:
            topics_2.pop(i)
            log.info('Removed chosen topic 1 from topic list 2: %s',
                     topic_1['name'])
        elif top_2['name'] in otop:
            topics_2.pop(i)
            log.info('Removed recently used topic as original: %s',
                     top_2['name'])
    if len(topics_2) > 0:
        topic_2 = random.choice(topics_2)
    else:
        log.info('No topics left after filtering')
        return False
    log.info('Topic 2: %s', topic_2['name'])

    headline = get_headline(topic_2, otit, topic_1['name'])
    if headline is not False:
        log.info('Original title: %s', headline)
        regex = re.compile(r'\b{0}\b'.format(topic_2['name']), re.IGNORECASE)
        new_headline = re.sub(regex, topic_1['name'], headline, count=1)
        if new_headline not in ltit:
            otit.append(headline)
            ltop.append(topic_1['name'])
            otop.append(topic_2['name'])
            with open('original_titles.txt', 'w') as original_titles:
                json.dump(otit[-240:], original_titles, ensure_ascii=False)
            with open('last_topics.txt', 'w') as last_topics:
                json.dump(ltop[-40:], last_topics, ensure_ascii=False)
            with open('original_topics.txt', 'w') as original_topics:
                json.dump(otop[-30:], original_topics, ensure_ascii=False)
            return new_headline
        else:
            log.info('Generated headline tweeted recently: %s', headline)
            return False
    else:
        log.info('Invalid headline discarded')
        return False
    log.warning('Script should not reach this point')
    return False