Пример #1
0
class CommentForm(forms.Form):

    name = forms.CharField(label='Your Name', max_length=100)
    email = forms.EmailField(label='Your Email')
    content = forms.CharField(label='Comment', widget=forms.Textarea)

    def __init__(self, *args, **kwargs):
        self.pf = ProfanityFilter()
        super(CommentForm, self).__init__(*args, **kwargs)

    def clean_name(self):
        name = self.cleaned_data['name']

        # The name can only have a certain size
        if len(name) > 80:
            raise ValidationError('The name cannot be longer than 80 characters')

        return name

    def clean_content(self):
        content = self.cleaned_data['content']

        # Profanity is not allowed
        if not self.pf.is_clean(content):
            raise ValidationError('Profanity is not allowed in the comments!')

        # TODO: Alternative is to use a html sanitizer
        # No html markup is allowed
        soup = BeautifulSoup(content, 'html.parser')
        if bool(soup.find()):
            raise ValidationError('No html markup allowed in the content of a comment! Please understand that '
                                  'permitting html markup in comments is risky and vulnerable to attacks.')

        return content
Пример #2
0
def main():
    parser = argparse.ArgumentParser(
        description='Profanity filter console utility')
    group = parser.add_mutually_exclusive_group()
    group.add_argument('-t',
                       '--text',
                       dest='text',
                       help='Test the given text for profanity')
    group.add_argument('-f',
                       '--file',
                       dest='path',
                       help='Test the given file for profanity')
    parser.add_argument(
        '-l',
        '--languages',
        dest='languages',
        default='en',
        help='Test for profanity using specified languages (comma separated)')
    parser.add_argument('-o',
                        '--output',
                        dest='output_file',
                        help='Write the censored output to a file')
    parser.add_argument('--show',
                        action='store_true',
                        help='Print the censored text')

    args = parser.parse_args()

    if args.text and args.path:
        parser.print_help()
        exit()

    if args.text:
        text = args.text
    elif args.path:
        with open(args.path) as f:
            text = ''.join(f.readlines())
    else:
        text = ''

    pf = ProfanityFilter(languages=args.languages.split(','))
    censored_text = pf.censor(text)

    if args.output_file:
        with open(args.output_file, 'w') as f:
            f.write(censored_text)
        print("Censored text written to output file at: " + args.output_file)

    if args.show:
        print("Censored text:\n")
        print(censored_text)

    if args.show or args.output_file:
        return

    if pf.is_clean(text):
        print("This text is clean.")
    else:
        print("This text is not clean!")
def applyProfanityFilter():
    pf = ProfanityFilter()
    pf.censor_char = '@'

    with open('media/recording1/transcript.csv', mode='w+') as csv_file:
        csv_writer = csv.writer(csv_file, delimiter=',', quotechar='"', quoting=csv.QUOTE_MINIMAL)
        csv_reader = csv.DictReader(csv_file)            
        for row in csv_reader:
            if pf.is_clean(row['sentence']):
                continue
            else:
                csv_writer.writerow(['***', '****', '****' , '*****', '*****'])
        csv_file.close
Пример #4
0
class Administration(commands.Cog):
    """Commands for server admins."""

    def __init__(self, bot: commands.Bot):
        self.bot = bot
        self.check_actions.start()

        self.pf = ProfanityFilter()

    @property
    def db(self):
        return self.bot.get_cog("Database")

    async def check_message(self, ctx: commands.Context):
        ignore = False
        delete = False

        if not self.pf.is_clean(ctx.message.content) and not (
            ctx.message.channel.id == 728830756071276665
        ):
            ignore = True
            delete = True

        return ignore, delete

    async def mute_member(self, member: discord.Member, duration: timedelta = None):
        role = next(filter(lambda x: x.name == "Muted", member.guild.roles))

        if role in member.roles:
            raise AlreadyDoneError()

        if duration is None:
            self.db.update_member(member, muted=True)
            await member.add_roles(role)
        else:
            self.db.create_temp_action(member, "mute", duration)
            self.db.update_member(member, muted=True)
            await member.add_roles(role)

    async def unmute_member(self, member: discord.Member):
        if (
            role := next(filter(lambda x: x.name == "Muted", member.guild.roles))
        ) in member.roles:
            models.TempAction.objects(member=self.db.fetch_member(member)).delete()
            self.db.update_member(member, muted=False)
            await member.remove_roles(role)
        else:
Пример #5
0
    def list(self, request):
        if all(k in request.query_params
               for k in ('comment', 'deep_flag', 'lang')):
            comment = request.query_params['comment']
            deep_flag = util.strtobool(request.query_params['deep_flag'])
            lang = request.query_params['lang']

            pf = ProfanityFilter(censor_whole_words=False,
                                 deep_analysis=deep_flag,
                                 languages=[lang])
            return Response({
                'comment': pf.censor(comment),
                'approved': pf.is_clean(comment)
            })
        else:
            return Response({'error_message': 'All params are required'},
                            status=status.HTTP_400_BAD_REQUEST)
Пример #6
0
def chat():
    incoming_msg = request.values.get('Body', '')
    resp = MessagingResponse()
    msg = resp.message()
    msg.body("")
    if "!start" in incoming_msg:
        msg.body(
            "Greetings! I am ModBot, here to watch over this chat. \n\nNow that you're all here, feel free to introduce yourselves. To break the ice, answer the following question: "
            + choose_icebreaker())
    elif "!icebreaker" in incoming_msg:
        msg.body("Answer the question: " + choose_icebreaker())
    else:
        pf = ProfanityFilter()
        if not pf.is_clean(incoming_msg):
            msg.body(
                "Please refrain from using inappropriate language. This is meant to be a safe space."
            )
    return str(resp)
Пример #7
0
def answer(carlAsked, userAnswered, allowProfanity):
    if allowProfanity:
        channel = "E2"
    else:
        channel = "default"
        from profanity_filter import ProfanityFilter
        pf = ProfanityFilter()

    storageFile = ROOT_DIR + "/channels/" + channel + ".json"

    if os.path.isfile(storageFile):
        storage = json.load(open(storageFile, 'r'))
    else:
        storage = {
            'phrases': [],
            'links': [],
        }

    illegalChars = ('{', '}', '[', ']', '(', ')', '|', '\\', '<', '>', '/')

    for illegalChar in illegalChars:
        carlAsked = carlAsked.replace(illegalChar, "")
        userAnswered = userAnswered.replace(illegalChar, "")

    phrases = storage['phrases']  #a list of phrases
    links = storage[
        'links']  #a list of links to other phrases from each phrase

    if len(userAnswered) == 0 or userAnswered[-1] not in ('.', '!', '?', '"',
                                                          "'"):
        userAnswered += '.'

    if len(userAnswered) > 250: userAnswered = userAnswered[:250]

    if carlAsked in phrases:
        askIdx = phrases.index(carlAsked)
    else:
        askIdx = -1

    futureAskIdx = -1

    if userAnswered in phrases:
        answerIdx = phrases.index(userAnswered)
        if len(links[answerIdx]) > 0:
            futureAskIdx = random.choice(links[answerIdx])
        else:
            futureAskIdx = getLeastUsed(links, answerIdx)  #exclude answerIdx
        if askIdx != -1:
            links[askIdx].append(answerIdx)
    else:
        bestIdx, best = spellcheckPhrase(userAnswered, phrases)
        if best > 0.6:
            if len(links[bestIdx]) > 0:
                futureAskIdx = random.choice(links[bestIdx])
            else:
                futureAskIdx = getLeastUsed(links, bestIdx)  #exclude answerIdx
            if askIdx != -1:
                links[askIdx].append(bestIdx)
        else:
            futureAskIdx = getLeastUsed(links, bestIdx)  #exclude answerIdx
        if allowProfanity or pf.is_clean(userAnswered):
            if askIdx != -1:
                links[askIdx].append(len(phrases))
            links.append([])
            phrases.append(userAnswered)
    json.dump(storage, open(storageFile, 'w'))
    return phrases[futureAskIdx]
Пример #8
0
if os.path.exists(OUTPUT_DIR + "/all_styles_clean.pkl"):
    with open(OUTPUT_DIR + "/all_styles_clean.pkl", "rb") as f:
        all_sents_clean = pickle.load(f)
else:
    print("Loading datasets for random cases...")
    all_sents = {}
    for style, data in data_style_mapping.items():
        with open("../samples/data_samples/{}.txt".format(data["data_file"]),
                  "r") as f:
            all_sents[style] = f.read().strip().split("\n")

    all_sents_clean = {}
    for style, sents in all_sents.items():
        all_sents_clean[style] = [
            x for x in tqdm.tqdm(sents)
            if pf.is_clean(x) and len(x.split()) < 25
        ]

    with open(OUTPUT_DIR + "/all_styles_clean.pkl", "wb") as f:
        pickle.dump(all_sents_clean, f)

random.seed(args.seed)


# class EventHandler(FileSystemEventHandler):
#     def on_any_event(self, event):
def generation_service():
    while True:
        with open(OUTPUT_DIR + "/generated_outputs/queue/queue.txt", "r") as f:
            queue = f.read().strip()
        if len(queue) == 0:
data_style_list = list(data_style_mapping.keys())
model_style_list = list(style_mapping.keys())

if os.path.exists(OUTPUT_DIR + "/all_styles_clean.pkl"):
    with open(OUTPUT_DIR + "/all_styles_clean.pkl", "rb") as f:
        all_sents_clean = pickle.load(f)
else:
    print("Loading datasets for random cases...")
    all_sents = {}
    for style, data in data_style_mapping.items():
        with open("../samples/data_samples/{}.txt".format(data["data_file"]), "r") as f:
            all_sents[style] = f.read().strip().split("\n")

    all_sents_clean = {}
    for style, sents in all_sents.items():
        all_sents_clean[style] = [x.strip().strip("\"").strip("\'").strip() for x in tqdm.tqdm(sents) if pf.is_clean(x) and len(x.split()) < 25]

    with open(OUTPUT_DIR + "/all_styles_clean.pkl", "wb") as f:
        pickle.dump(all_sents_clean, f)

random.seed(args.seed)


# class EventHandler(FileSystemEventHandler):
#     def on_any_event(self, event):
def generation_service():
    while True:
        with open(OUTPUT_DIR + "/generated_outputs/queue/queue.txt", "r") as f:
            queue = f.read().strip()
        if len(queue) == 0:
            time.sleep(0.2)