Exemplo n.º 1
0
def main(components=None):
    initials, vowels, finals, repeat_cnt, total_cnt = components or gibberish_components(
    )
    pf = ProfanityFilter()
    cnt = 0
    profane_cnt = 0
    with alive_bar(total_cnt) as bar:
        for i in initials:
            for v in vowels:
                for f in finals:
                    prefix = ''.join([i, v, f])
                    if pf.is_profane(prefix):
                        print(
                            cnt, 'All %s words beginning with "%s..."' %
                            (repeat_cnt, prefix))
                        cnt += repeat_cnt
                        profane_cnt += repeat_cnt
                        bar(incr=repeat_cnt)
                        continue
                    for v2 in vowels:
                        for f2 in finals:
                            cnt += 1
                            word = ''.join([prefix, v2, f2])
                            if pf.is_profane(word):
                                profane_cnt += 1
                                print(cnt, word)
                            bar()
    print('Done! Found %s profane words in %s total' % (profane_cnt, cnt))
Exemplo n.º 2
0
def main():
    parser = argparse.ArgumentParser(
        description='Profanity filter console utility')
    group = parser.add_mutually_exclusive_group()
    group.add_argument('-t',
                       '--text',
                       dest='text',
                       help='Test the given text for profanity')
    group.add_argument('-f',
                       '--file',
                       dest='path',
                       help='Test the given file for profanity')
    parser.add_argument(
        '-l',
        '--languages',
        dest='languages',
        default='en',
        help='Test for profanity using specified languages (comma separated)')
    parser.add_argument('-o',
                        '--output',
                        dest='output_file',
                        help='Write the censored output to a file')
    parser.add_argument('--show',
                        action='store_true',
                        help='Print the censored text')

    args = parser.parse_args()

    if args.text and args.path:
        parser.print_help()
        exit()

    if args.text:
        text = args.text
    elif args.path:
        with open(args.path) as f:
            text = ''.join(f.readlines())
    else:
        text = ''

    pf = ProfanityFilter(languages=args.languages.split(','))
    censored_text = pf.censor(text)

    if args.output_file:
        with open(args.output_file, 'w') as f:
            f.write(censored_text)
        print("Censored text written to output file at: " + args.output_file)

    if args.show:
        print("Censored text:\n")
        print(censored_text)

    if args.show or args.output_file:
        return

    if pf.is_clean(text):
        print("This text is clean.")
    else:
        print("This text is not clean!")
Exemplo n.º 3
0
def main():
    start_time = time()

    print("Running Basic Setup Steps....")
    config_loader = ConfigLoader()
    output_directory_path = config_loader.get_base_path(
    ) + config_loader.get_output_directory_name()
    if not os.path.exists(output_directory_path):
        os.makedirs(output_directory_path)
    parser = Parser(config_loader)
    profanity_filter = ProfanityFilter(config_loader, parser)
    de_duplicator = DeDuplicator(parser)
    keyword_dictionary_builder = KeywordDictionaryBuilder(parser)
    sym_spell_checker = SymSpellChecker(config_loader, parser)

    print("Running Parser....")
    parser.parse(config_loader.get_query_logs_file_path(),
                 config_loader.get_frequency_file_path(),
                 config_loader.get_max_total_queries())

    print("Running De-duplicator....")
    de_duplicator.remove_duplicates(
        config_loader.get_frequency_file_path(),
        config_loader.get_frequency_file_path(),
        config_loader.get_de_duplicated_keyword_ordered_1_file_path(),
        config_loader.get_de_duplicated_missing_space_1_file_path(),
        config_loader.get_de_duplicated_synonyms_1_file_path())

    print("Running Profanity Filter....")
    profanity_filter.remove_profane_queries(
        config_loader.get_frequency_file_path(),
        config_loader.get_frequency_file_path(),
        config_loader.get_filtered_profane_queries_file_path())

    print("Running Keyword Dictionary Builder....")
    keyword_dictionary_builder.build_dictionary_file_from_frequency_file(
        config_loader.get_frequency_file_path(),
        config_loader.get_dictionary_file_path())

    print("Running SymSpell Checker....")
    sym_spell_checker.run_sym_spell(config_loader.get_sym_spell_iterations(),
                                    config_loader.get_frequency_file_path(),
                                    config_loader.get_dictionary_file_path(),
                                    config_loader.get_dictionary_file_path())

    print("Running De-duplicator....")
    de_duplicator.remove_duplicates(
        config_loader.get_dictionary_file_path(),
        config_loader.get_dictionary_file_path(),
        config_loader.get_de_duplicated_keyword_ordered_2_file_path(),
        config_loader.get_de_duplicated_missing_space_2_file_path(),
        config_loader.get_de_duplicated_synonyms_2_file_path())

    print("Completed!!!")

    print("Total time taken: ", (time() - start_time) / 60, " minutes")
Exemplo n.º 4
0
def get_profanities(words, custom_profanities=None):
    pf = ProfanityFilter()
    if custom_profanities is not None:
        pf.custom_profane_word_dictionaries = {'en': custom_profanities}
    swears = []
    for w in words:
        cw = pf.censor_word(w)
        if cw.is_profane:
            swears.append(cw.original_profane_word)
    return swears
Exemplo n.º 5
0
 def __init__(self, profane_words_filepath: str):
     words = []
     with open(profane_words_filepath, encoding='utf8') as f:
         for line in f:
             word = line.strip()
             words.append(word)
             if word.count('ё') > 0:
                 word = word.replace('ё', 'е')
             words.append(word)
     self._ru_words = words
     self._ru_pf = ProfanityFilter()
     self._ru_pf.custom_profane_word_dictionaries = {'en': words}
     self._r = sr.Recognizer()
def applyProfanityFilter():
    pf = ProfanityFilter()
    pf.censor_char = '@'

    with open('media/recording1/transcript.csv', mode='w+') as csv_file:
        csv_writer = csv.writer(csv_file, delimiter=',', quotechar='"', quoting=csv.QUOTE_MINIMAL)
        csv_reader = csv.DictReader(csv_file)            
        for row in csv_reader:
            if pf.is_clean(row['sentence']):
                continue
            else:
                csv_writer.writerow(['***', '****', '****' , '*****', '*****'])
        csv_file.close
def Predict(texts):
    pf = ProfanityFilter()
    sid = SentimentIntensityAnalyzer()
    labels = []
    for text in texts:
        if (pf.is_profane(text)):
            labels.append(0)
        else:
            ss = sid.polarity_scores(text)
            if (ss['compound'] <= -0.05):
                labels.append(0)
            else:
                labels.append(1)
    return labels
Exemplo n.º 8
0
def test2(channel):
    import time
    start = time.time()
    from profanity_filter import ProfanityFilter
    pf = ProfanityFilter()
    end = time.time()
    #import json
    #j = json.load(open("/srv/CARL/channels/"+channel+".json",'r'))
    #s = ""
    #n = 0
    #for phrase in j["phrases"]:
    #clean = pf.is_clean(phrase)
    #s += str(clean) + " " + phrase + "<br/>\n"
    #if not clean: n += 1
    #return s + str(n) + "<br/>\n" + str(end-start)
    return str(end - start)
Exemplo n.º 9
0
def is_profane(url):

    if len(url) < 3:
        return False

    if getattr(settings, "ENABLE_FAST_PROFANITY_CHECKING", True):
        parts = urlparse(get_decodedurl(url))
        partslist = []
        if not (parts.path or parts.netloc):
            raise InvalidURLError(
                "Badly formatted URL passed to is_url_profane")
        splitters = r"\.|\/|\_|\-|\~|\$|\+|\!|\*|\(|\)|\,"  # all the URL-safe characters, escaped
        if parts.netloc:
            partslist = partslist + re.split(splitters, parts.netloc)
        if parts.path:
            partslist = partslist + re.split(splitters, parts.path)
        if parts.query:
            partslist = partslist + re.split(splitters, parts.query)

        # speed optimization
        check4btlw = True
        stringlist = []
        for item in partslist:
            if len(item) > 0:
                if len(item) > 5:
                    check4btlw = False
                for substring in get_all_substrings(item, 2):
                    if len(substring) > 0:
                        stringlist.append(substring)
        partslist = list(dict.fromkeys(stringlist))  # removes dupes

        if check4btlw:
            for part in partslist:
                if part in BAD_THREE_LETTER_WORDS:
                    return True

        score = PredictProfanity(partslist)
        if score.any() == 1:
            return True

        if getattr(settings, "ENABLE_DEEP_PROFANITY_CHECKING", True):
            pf = ProfanityFilter()
            for part in partslist:
                if pf.is_profane(part):
                    return True

    return False
Exemplo n.º 10
0
    def list(self, request):
        if all(k in request.query_params
               for k in ('comment', 'deep_flag', 'lang')):
            comment = request.query_params['comment']
            deep_flag = util.strtobool(request.query_params['deep_flag'])
            lang = request.query_params['lang']

            pf = ProfanityFilter(censor_whole_words=False,
                                 deep_analysis=deep_flag,
                                 languages=[lang])
            return Response({
                'comment': pf.censor(comment),
                'approved': pf.is_clean(comment)
            })
        else:
            return Response({'error_message': 'All params are required'},
                            status=status.HTTP_400_BAD_REQUEST)
Exemplo n.º 11
0
    def check_profanity_filter_text():

        pf = ProfanityFilter()

        #Opens the text file from the given location.
        file_location = open('/yourfilelocation/filename.txt')

        # Opens the file. This line should be added when if your text file is in same location a program.
        #file_location = open('profanity.txt')

        #Read is a built in function of python to read files.
        content_of_file = file_location.read()

        #Censor is a built in function of ProfanityFilter package to check profanity of a sentence.
        text = pf.censor(content_of_file)

        #Prints the contents of the file where offensive words are marked by "*".
        print(text)
Exemplo n.º 12
0
def chat():
    incoming_msg = request.values.get('Body', '')
    resp = MessagingResponse()
    msg = resp.message()
    msg.body("")
    if "!start" in incoming_msg:
        msg.body(
            "Greetings! I am ModBot, here to watch over this chat. \n\nNow that you're all here, feel free to introduce yourselves. To break the ice, answer the following question: "
            + choose_icebreaker())
    elif "!icebreaker" in incoming_msg:
        msg.body("Answer the question: " + choose_icebreaker())
    else:
        pf = ProfanityFilter()
        if not pf.is_clean(incoming_msg):
            msg.body(
                "Please refrain from using inappropriate language. This is meant to be a safe space."
            )
    return str(resp)
Exemplo n.º 13
0
    def process(self, message, **kwargs):

        pf = ProfanityFilter()

        text = message.text
        #text = "This is shit"  == True  | False if True:
        value = 'na'
        confidence = 0
        if pf.is_profane(text):
            tokens = text.split(" ")
            for token in tokens:
                if pf.is_profane(token):
                    value = token
                    confidence = 100
        if value != 'na':
            entity = self.convert_to_rasa(value, confidence)

            message.set("entities", [entity], add_to_output=True)
        else:
            pass
Exemplo n.º 14
0
    def process(self, message, **kwargs):
        # burada custom olarak ne yapmak istiyorsak tanimliyoruz
        pf = ProfanityFilter()
        text = message.text
        value = "na"
        confidence = 0
        #ornegin text = "This is shit."
        # Eger kelime kufurse confidence skor olarak 100 atiyoruz
        if pf.is_profane(text):
            tokens = text.split(" ")
            for token in tokens:
                if pf.is_profane(token):
                    value = token
                    confidence =100

        if value != 'na':
            entity = self.convert_to_rasa(value, confidence)
            message.set("entities", [entity], add_to_output=True)
        else:
            pass 
Exemplo n.º 15
0
import re
from profanity_filter import ProfanityFilter
from . import config
from . import lib

config = config.config
pr_config = config['profanity_rater']

pf = ProfanityFilter(analyses=pr_config['languages'],
                     languages=pr_config['languages'])
pf.censor_char = '*'
pf.extra_profane_word_dictionaries = pr_config['extra_profane_word_dictionary']


def profanity_severity_rating(content_text, turn_on=None):
    severity = 0
    if turn_on is None:
        turn_on = pr_config['turn_on']

    if turn_on == False:
        return severity

    cencored_text = pf.censor(content_text)
    profane_words = re.findall(f"\{pf.censor_char}+", cencored_text)
    total_words = lib.word_count(content_text)
    percent_profane = (len(profane_words) / total_words) * 100

    if percent_profane > pr_config['safe_profanity_percent']:
        severity = pr_config['severity_start']
        # Increase severity as the percentage goes up by the rate set in the config.
        # Max severity will be 10 (set in the config file)
Exemplo n.º 16
0
def test():
    from profanity_filter import ProfanityFilter
    pf = ProfanityFilter()
    return pf.censor("That's bullshit!")
Exemplo n.º 17
0
import logging
import configparser
import sys
import json
import os.path

from telegram import Update
from telegram.ext import Updater, CommandHandler, MessageHandler, Filters, CallbackContext
from profanity_filter import ProfanityFilter
from forex_python.converter import CurrencyRates
from forex_python.bitcoin import BtcConverter

config = configparser.ConfigParser()
config.read('bot_config.ini')

pf = ProfanityFilter(languages=['ru', 'en'])

token = config['DEFAULT']['BotToken']
updater = Updater(token=token, use_context=True)

dispatcher = updater.dispatcher
logging.basicConfig(
    format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
    level=logging.INFO)


class ChatMemberCensorRepository:
    repository_file = 'cencored_users'
    censored_users = set()

    def __init__(self):
Exemplo n.º 18
0
    def __init__(self, messageIN, censorChar='•'):

        self.message = messageIN
        self.filter = ProfanityFilter()
        self.filter.censor_char = censorChar
Exemplo n.º 19
0
    def post(self, request):
        nlp = en_core_web_sm.load()
        pf = ProfanityFilter(nlps={'en': nlp})
        # pf.custom_profane_word_dictionaries = {'en': {'sold down the river', 'dog'}}
        # pf.extra_profane_word_dictionaries = {'en': {'sold', 'orange'}}
        wordlist = []
        context = {}

        # FILE UPLOADED
        if 'doc' in request.FILES:

            doc = request.FILES['doc']

            if doc.name.endswith(".docx"):
                docx = docx2python(doc, extract_image=False)
                context['doc'] = docx.text

            elif doc.name.endswith(".txt"):
                print("This is a test")

                mytext = str(doc.read())
                context['doc'] = mytext

            return render(request, 'index.html', context=context)

        # RETRIEVE WORDS AND SPLIT
        document = request.POST['document']
        word_lines = document.splitlines()

        # CHECK EACH WORD IF PROFANITY
        for line in word_lines:
            if line == '':
                wordlist.append(r'\n')

            # NO LINE BREAK CONTINUE HERE
            else:
                words = line.split()
                temp_list = []
                original_list = []

                # LOOP THROUGH EACH WORD.
                for word in words:

                    clean_word = clear_punctuation(word).lower()

                    in_db = Words.objects.all().filter(
                        word__icontains=clean_word)

                    # WORD IS IN DATABASE
                    if in_db:
                        temp_list.append(clean_word)

                        temp_word = " ".join(temp_list)

                        starting_phrase = Words.objects.all().filter(
                            word__istartswith=temp_word)

                        # CURRENT WORD IS THE START OF THE PHRASE
                        if starting_phrase:

                            original_list.append(word)

                            completed = Words.objects.all().filter(
                                word__iexact=temp_word)

                            # CURRENT PHRASE IS COMPLETED
                            if completed:
                                original = " ".join(original_list)
                                original_list.clear()

                                new_word = format_word(original)
                                wordlist.append(new_word)

                                temp_list.clear()

                            # # TEMP WORD DID NOT COMPLETE THE PHRASE
                            # else:
                            #     print('now we here bish')
                            #     original = " ".join(original_list)
                            #     original_list.clear()

                            #     wordlist.append(original)

                            #     temp_list.clear()

                        # NOT START OF PHRASE KEEP GOING
                        else:
                            wordlist.append(word)
                            temp_list.clear()
                            original_list.clear()

                    # WORD IS A PROFANITY
                    elif pf._is_profane_word('en', clean_word):

                        temp_word = " ".join(temp_list)
                        wordlist.append(temp_word)

                        new_word = format_word(word)
                        wordlist.append(new_word)
                        temp_list.clear()

                    # JUST A REGULAR WORD
                    else:
                        temp_word = " ".join(temp_list)
                        wordlist.append(temp_word)

                        wordlist.append(word)

                        temp_list.clear()

        context["results"] = " ".join(wordlist)
        context['document'] = document

        return render(request, 'index.html', context=context)
def profanity_filter():
    return ProfanityFilter()
Exemplo n.º 21
0
import nltk
from nltk.tokenize import word_tokenize
from nltk.corpus import stopwords
from textblob import Word, TextBlob
from profanity_check import predict, predict_prob
from nltk.sentiment.vader import SentimentIntensityAnalyzer
import pickle
from profanity_filter import ProfanityFilter
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from keras.preprocessing.sequence import pad_sequences
import time
#----------------------------------------------------------
# Config ....
sid = SentimentIntensityAnalyzer()
nlp = spacy.load("en_core_web_sm")
pf = ProfanityFilter(nlps={'en': nlp})
nlp.add_pipe(pf.spacy_component, last=True)
stop = stopwords.words('english')
special_char = [
    '~', '@', '$', '#', '%', '^', '&', '*', '(', ')', '-', '_', ',', ';', '/',
    '\\', '>', '<', '|', '[', ']', '}', '{', '"', '\'', '`', '?', '!', '...'
]
path_dir = '/home/bassem/DataDriven_HatfulMemes/data/'
print('----------------------------------------------------------------------')
# Get features from text :


def getsentiment(text):
    # remove stopp words:
    text = text.replace('[^\w\s]', '')
    text = " ".join(x for x in text.split() if x not in stop)
Exemplo n.º 22
0
import os
from profanity_filter import ProfanityFilter
from flask import Flask, abort, make_response, request
from flask_httpauth import HTTPTokenAuth
from functools import lru_cache


app = Flask("pictario-profanity-check")
auth = HTTPTokenAuth(scheme="Bearer")
AUTH_TOKEN = os.environ["TOKEN"]
CACHE_SIZE = os.environ.get("CACHE_SIZE", 128)
pfilter = ProfanityFilter(languages=["en_core_web_sm"])


@auth.verify_token
def verify_token(token):
    return AUTH_TOKEN is not None and token == AUTH_TOKEN


@app.route("/")
def index():
    return {"api_version": "v1"}


@app.route("/v1/censor/", methods=["POST"])
@app.route("/v1/is-profane/", methods=["POST"])
@auth.login_required
def endpoint_handle():
    if request.content_type != "text/plain":
        abort(400)
Exemplo n.º 23
0
app.register_blueprint(app_funcs)

mail = Mail(app)

toolbar = DebugToolbarExtension(app)
csrf = CSRFProtect(app)
bcrypt = Bcrypt()
connect_db(app)
# db.drop_all()
db.create_all()

login_manager = LoginManager()
login_manager.init_app(app)
login_manager.login_view = 'login'

pf = ProfanityFilter()
nlp = spacy.load('en_core_web_sm')
profanity_filter = ProfanityFilter(nlps={'en': nlp})
nlp.add_pipe(profanity_filter.spacy_component, last=True)


@login_manager.user_loader
def load_user(user_id):
    return User.query.get(user_id)


@app.route('/')
def homepage():
    recent_picks = List_Entry.query.order_by(
        List_Entry.id.desc()).limit(10).all()
    comments = Comment.query.order_by(Comment.id.desc()).limit(5).all()
import os
from pycorenlp.corenlp import StanfordCoreNLP
from profanity_filter import ProfanityFilter
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
import rpy2.robjects as robjects
from rpy2.robjects.packages import importr

## INIT GLOBAL VARS
chunk_size = 5000
num_chunks = 0

## INIT REDDIT INSTANCE
reddit = praw.Reddit()

## INIT PROFANITY FILTER
pf = ProfanityFilter()
pf.censor_whole_words = False

## INIT VADER SENTIMENT
vader = SentimentIntensityAnalyzer()

## INIT TEXTCLEAN R
textclean = importr('textclean',
                    lib_loc="C:/Users/Ben/Documents/R/win-library/3.6")
importr('stringi', lib_loc="C:/Users/Ben/Documents/R/win-library/3.6")


## coreNLP sent. analysis
def getSentiment(text):
    ## connect to CoreNLP server
    host = "http://localhost"
Exemplo n.º 25
0
def profanity(text):
    # возвращаемое функцией значение — это то значение, которой подставится
    # к нам в шаблон
    pf_filter = ProfanityFilter(languages=['ru', 'en'])
    return pf_filter.censor(text)
def profanity_filter_ru_en():
    return ProfanityFilter(languages=['ru', 'en'])
Exemplo n.º 27
0
def organizeFile(filename: str,
                 sort_sections: bool = False,
                 skip_profanity_check: bool = None) -> None:
    global PF
    if skip_profanity_check is None:
        skip_profanity_check = not PROFANITY_CHECK
    if PF is None:
        PF = ProfanityFilter()

    phrases: Dict[str, List[Phrase]] = collections.OrderedDict({
        #EPhraseFlags.OLD_VOX.name: [],
        #EPhraseFlags.NOT_VOX: [],
        #EPhraseFlags.SFX.name:     [],
    })
    phrasesByID = {}
    for p in ParsePhraseListFrom(filename):
        if p.id.lower() in phrasesByID:
            log.warning('Skipping duplicate %s...', p.id)
            continue
        assignTo = ''
        if p.hasFlag(EPhraseFlags.SFX):
            assignTo = EPhraseFlags.SFX.name
        elif p.hasFlag(EPhraseFlags.OLD_VOX):
            assignTo = EPhraseFlags.OLD_VOX.name
        else:
            assignTo = p.category
        phrasesByID[p.id.lower()] = p
        if assignTo not in phrases:
            phrases[assignTo] = []
        phrases[assignTo] += [p]

    if sort_sections:
        newPhOD = collections.OrderedDict()
        for k in sorted(phrases.keys()):
            newPhOD[k] = phrases[k]
        phrases = newPhOD
    with open(filename + '.sorted', 'w') as w:
        divider_len = max([len(x) for x in phrases.keys()]) + 4
        divider = '#' * divider_len
        for section, sectionPhrases in phrases.items():
            if section != '':
                w.write(f'\n{divider}\n## {section}\n{divider}\n\n')
            for phrase in sorted(sectionPhrases, key=lambda x: x.id):
                for comm in phrase.comments_before:
                    comm = comm.rstrip()
                    w.write(f'#{comm}\n')
                key = newkey = phrase.id
                if '/' not in key:
                    newkey = key.lower()
                value = phrase.phrase
                if phrase.hasFlag(EPhraseFlags.SFX):
                    w.write(f'{newkey} = @{value}\n')
                else:
                    if not skip_profanity_check and PF.is_profane(value):
                        log.warning(
                            f'{filename}: Phrase {phrase.id} contains profanity.'
                        )
                    if key != value:
                        w.write(f'{newkey} = {value}\n')
                    else:
                        w.write(f'{newkey}\n')
Exemplo n.º 28
0
MAX_INITIAL_TEXT_LEN = 128

MAX_RETRY_DELAY_S = 10

CONTENT_FILTER_ENGINE = "content-filter-alpha-c4"

# False positive threshold for content filtering
TOXIC_THRESHOLD = -0.355
CONTENT_SAFE = "0"
CONTENT_SENSITIVE = "1"
CONTENT_HARMFUL = "2"

PREFIX_TEXT = "Looking for a mathNEWS article idea? How about:"
MODEL_ID = os.environ.get("OPENAI_FINETUNED_MODEL")

PROFANITY_FILTER = ProfanityFilter()


class RequestType(enum.Enum):
    GENERATE = "generate"
    STOP = "stop"


class ResponseType(enum.Enum):
    GENERATE = "generate"


class GenerateRequest:
    def __init__(self, initial_text, channel_id, message_id, user_id):
        self.type = RequestType.GENERATE
        self.initial_text = initial_text
Exemplo n.º 29
0
def answer(carlAsked, userAnswered, allowProfanity):
    if allowProfanity:
        channel = "E2"
    else:
        channel = "default"
        from profanity_filter import ProfanityFilter
        pf = ProfanityFilter()

    storageFile = ROOT_DIR + "/channels/" + channel + ".json"

    if os.path.isfile(storageFile):
        storage = json.load(open(storageFile, 'r'))
    else:
        storage = {
            'phrases': [],
            'links': [],
        }

    illegalChars = ('{', '}', '[', ']', '(', ')', '|', '\\', '<', '>', '/')

    for illegalChar in illegalChars:
        carlAsked = carlAsked.replace(illegalChar, "")
        userAnswered = userAnswered.replace(illegalChar, "")

    phrases = storage['phrases']  #a list of phrases
    links = storage[
        'links']  #a list of links to other phrases from each phrase

    if len(userAnswered) == 0 or userAnswered[-1] not in ('.', '!', '?', '"',
                                                          "'"):
        userAnswered += '.'

    if len(userAnswered) > 250: userAnswered = userAnswered[:250]

    if carlAsked in phrases:
        askIdx = phrases.index(carlAsked)
    else:
        askIdx = -1

    futureAskIdx = -1

    if userAnswered in phrases:
        answerIdx = phrases.index(userAnswered)
        if len(links[answerIdx]) > 0:
            futureAskIdx = random.choice(links[answerIdx])
        else:
            futureAskIdx = getLeastUsed(links, answerIdx)  #exclude answerIdx
        if askIdx != -1:
            links[askIdx].append(answerIdx)
    else:
        bestIdx, best = spellcheckPhrase(userAnswered, phrases)
        if best > 0.6:
            if len(links[bestIdx]) > 0:
                futureAskIdx = random.choice(links[bestIdx])
            else:
                futureAskIdx = getLeastUsed(links, bestIdx)  #exclude answerIdx
            if askIdx != -1:
                links[askIdx].append(bestIdx)
        else:
            futureAskIdx = getLeastUsed(links, bestIdx)  #exclude answerIdx
        if allowProfanity or pf.is_clean(userAnswered):
            if askIdx != -1:
                links[askIdx].append(len(phrases))
            links.append([])
            phrases.append(userAnswered)
    json.dump(storage, open(storageFile, 'w'))
    return phrases[futureAskIdx]
Exemplo n.º 30
0
 def __init__(self, *args, **kwargs):
     self.pf = ProfanityFilter()
     super(CommentForm, self).__init__(*args, **kwargs)