Exemplo n.º 1
0
def c3():
    x = '1b37373331363f78151b7f2b783431333d78397828372d363c78373e783a393b3736'
    tmp = binascii.unhexlify(x)
    for i in string.printable:
        y = ''.join([chr(ord(a)^ord(b)) for (a,b) in zip(tmp, i*len(tmp))])
        if(all(c in string.printable for c in y) and not nonsense(y)):
            print(i,y)
Exemplo n.º 2
0
def lang_features(story_sentences: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
    lang_list = []

    wtl = WhatTheLang()

    for sent_dict in story_sentences:

        text = sent_dict["text"]

        try:
            lang = wtl.predict_lang(text)

            if not isinstance(lang, str):
                lang = "UKN"

        except:
            lang = "UKN"
        try:
            if len(text) <= 10:
                is_nonsense = False
            else:
                is_nonsense = nonsense(text)
        except:
            is_nonsense = True

        is_eng = isAscii(text)

        lang_dict = dict(sentence_id=sent_dict["id"], lang=lang, nonsense=is_nonsense, ascii_chars=is_eng)

        lang_list.append(lang_dict)

    return lang_list
Exemplo n.º 3
0
def c3(x):
    tmp = binascii.unhexlify(x)
    res = []
    for i in string.printable:
        y = ''.join([chr(a^ord(b)) for (a,b) in zip(tmp, i*len(tmp))])
        if(all(c in string.printable for c in y) and not nonsense(y)):
            res.append((i,y.strip()))
    return res
Exemplo n.º 4
0
def is_meaningful(
    sentence
):  # exploits external library to determine if a sentence is nonsense/jibberish
    if len(sentence.replace(" ", "")) < 6:
        return False
    try:
        if nonsense(sentence):
            return False
    except:
        return False
    return True
Exemplo n.º 5
0
def _test():
    print(offensiveness("F**k you"))
    print(offensiveness("Little bitch"))
    print(offensiveness("You can find a coffee machine on the second floor"))
    print(
        offensiveness(
            "You're so dumb you can't even find a stupid coffee machine"))
    print(offensiveness("Can I fire that bitch, Charles?"))
    print(offensiveness("Where can I find a coffee machine?"))
    print(offensiveness("How can I file a complaint?"))
    # Models aren't perfect, this model does not cover all dialects
    print(offensiveness("Ray is a c**t"))

    # function nonsense returns true if a string has no meaning
    print(nonsense("This should return false."))
    print(nonsense("ZkIOMJSIOJEKLMZKJMELLKS"))
    # Even when concatenating words or using more complicated codes, the model can usually recognize meaningful strings.
    print(nonsense("ioFlXFndrInfo"))
    # according to the documentation the accuracy is 99%
    # text has to be long enough otherwise a ValueError is raised
    print(nonsense("t2shrt"))
Exemplo n.º 6
0
def ratings(text):
    table = []
    for word in text.split():
        if not word:
            continue
        if ignore(word):
            row = [word, '--', '--']
        else:
            row = [
                word, 'y' if in_dictionary(word) else 'n',
                'n' if nonsense(word) else 'y'
            ]
        table.append(row)
    return table
Exemplo n.º 7
0
def is_nonsense(sentence):
    """
    Checks if a sentence is nonsense or not.

    Args:
        sentence: The string that is to be checked.

    Returns: True if the string is not nonsense and it contains more than 6 characters

    """
    try:
        return nonsense(sentence)
    except ValueError:
        return False
Exemplo n.º 8
0
def main():

    res = {}
    
    cdump_path = "/media/tong/Elements/obfs/class-dump"
    file_gen = "res.json"
    cnt = 0
    
    for dirpath, dirnames, ifilenames in os.walk(cdump_path):
        for ifilename in ifilenames:
            file_path = os.path.abspath(os.path.join(dirpath, ifilename))
            # test from file
            try:
                with open(file_path) as frh:                    
                    c_nonsense = 0
                    c_real = 0
                    c_total = 0
                    for s in frh.readlines():
                        s = s.strip()
                        
                        if s.startswith("-") == False and s.startswith("+") == False :
                            continue
                                    
                        try:
                            if nonsense(s):
                                c_nonsense += 1
                                #print(s)
                    #               print(s)
                            else:
                                #print(s)
                                c_real += 1
                            c_total += 1  
                        except:
                            pass    
                #    print(c_total)       
                    if c_nonsense + c_real == 0:
                        continue
                    else:
                        res[ifilename] = float(c_real) / (c_nonsense + c_real)
                        cnt += 1
                        print("{}: {} {}".format(cnt, ifilename, res[ifilename]))
            except:
                continue            
    js_fwh = open(file_gen, 'w', encoding='utf-8')
    json.dump(res, js_fwh)
Exemplo n.º 9
0
        def lookup_tokens(token_text):

            add_token = False
            if self._py_dictionary.meaning(token_text) != None:
                print("Py Dictionary", token_text)
                add_token = True
            else:
                try:
                    if not nonsense(token_text):
                        print("Not Nonsense", token_text)
                        add_token = True
                except:
                    pass

            if add_token:
                self._allowed_tokens.add(token_text)
                self._allowed_to_insert.append({"token": token_text})

            self._tried_tokens.add(token_text)
            self._tried_to_insert.append({"token": token_text})
Exemplo n.º 10
0
    async def on_message(self, message):
        """
        Event message

        Args:
            message (discord.Message): Message content
        """

        # * Vars
        now = datetime.now()

        if message.author.id == self.bot.user.id:
            return

        if message.channel.type is discord.ChannelType.private:
            try:
                if nonsense(message.content):
                    await message.channel.send(
                        '<:Denied:807575178561191976> - Please provide a good text!'
                    )
                else:
                    jmail_channel = self.bot.get_channel(
                        Guild.mailling_channel)

                    mail_embed = discord.Embed(
                        title=f'New mail from {message.author.name}',
                        color=0x2c2f33)
                    mail_embed.add_field(name='Content:',
                                         value=f'```{message.content}```',
                                         inline=False)
                    mail_embed.set_footer(
                        text=now.strftime("%m/%d/%Y, %H:%M:%S"),
                        icon_url=message.author.avatar_url)
                    await jmail_channel.send(embed=mail_embed)
            except ValueError:
                await message.channel.send(
                    '<:Denied:807575178561191976> - Error, your text is to smart.'
                )
        else:
            pass
Exemplo n.º 11
0
def check_if_obfuscated(tmp_prog_arg):
    print("start to check obfuscated")
    extract_header_file(tmp_prog_arg)
    tmp_header_file = open(tmp_prog_arg.extract_header_file_path)
    tmp_nonsense = 0
    tmp_real = 0
    for tmp_s in tmp_header_file.readlines():
        #if(tmp_s.find("*/ ")==-1):
        #	continue
        #tmp_s = tmp_s[tmp_s.find("*/ ")+3:]
        #if(tmp_s.find("// ") == -1):
        #	print("header file extract not correct")
        #	sys.exit()
        #tmp_s = tmp_s[:tmp_s.find("// ")]

        tmp_s = tmp_s.replace("\n", "")
        for tmp_i in range(10):
            tmp_s = tmp_s.replace(str(tmp_i), '')

        if (len(tmp_s) <= 6):
            continue
        #print(tmp_s)
        #print(len(tmp_s))
        if nonsense(tmp_s):
            tmp_nonsense = tmp_nonsense + 1
        else:
            tmp_real = tmp_real + 1
    if tmp_nonsense + tmp_real == 0:
        print("didn't find useful name")
        sys.exit()
    tmp_result = float(tmp_real) / (tmp_nonsense + tmp_real)
    if tmp_result < 0.9:
        print("this application is obfuscated")
        tmp_prog_arg.my_app_info.is_obfuscated = True
    else:
        tmp_prog_arg.my_app_info.is_obfuscated = False
Exemplo n.º 12
0
ct_numbers = []

for letter in ct:
    number = ord(letter) - 65
    ct_numbers.append(number)

a_inv = []
b = []

for i in range(26):
    b.append(i)

for i in range(1, 26, 2):
    inv = modinv(i, 26)
    if inv is not None:
        a_inv.append(inv)

pt = ""

for i in b:
    for j in a_inv:
        for number in ct_numbers:
            if number >= 0:
                pt_letter = (j * (number - i)) % 26
                pt += str(chr(pt_letter + 65))
            else:
                pt += " "
        if not nonsense(pt):
            print("PT: {}  ->    A_inv: {}, B: {}".format(pt, j, i))
        pt = ""
Exemplo n.º 13
0
async def check_manually(message):
    # This command used to test new antispammers AI functions
    user, txt = await aio_get_user(message, allow_self=True)
    if not user:
        return

    user_id = user['user_id']

    name = user['first_name']
    user_pics = await tbot(GetUserPhotosRequest(
        int(user['user_id']),
        offset=0,
        max_id=0,
        limit=100))

    if user['last_name']:
        name += user['last_name']

    num = 0

    text = "User " + await user_link_html(user['user_id'])
    text += "\nName: " + name
    text += "\nID: <code>" + str(user['user_id']) + '</code>'

    text += '\n'

    gbanned = mongodb.blacklisted_users.find_one({'user': user_id})
    if gbanned:
        text += "\n<b>Warn! User gbanned in SophieBot!</b>"
        text += f"\nDate: <code>{gbanned['date']}</code>"
        text += f"\nReason: <code>{gbanned['reason']}</code>"
        text += '\n'
        num += 999
    else:
        text += "\nUser not gbanned in SophieBot"

    api_url = "https://api.unifiedban.solutions/blacklist/check/" + str(user_id)

    ubanned = requests.get(api_url, headers={'Authorization': CONFIG['advanced']['utoken']})

    if ubanned.text == '{"Error": "No data"}':
        text += "\nUser not ubanned."

    if user['first_name'].replace(' ', '').isdigit():
        text += "\n<b>Warn! User have name with only numbers!</b>"
        num += 80

    if user['first_name'].lower() in NAMES:
        text += "\n<b>Warn! User have real name (Mostly spammers try to be like real human)!</b>"
        num += 75

    if user_pics and len(user_pics.photos) == 1:
        text += "\n<b>Warn! User have only 1 display picture!</b>"
        num += 40
    if user_pics and len(user_pics.photos) == 0:
        text += "\n<b>Warn! User don't have any DP!</b>"
        num += 25

    try:
        check = nonsense(name)
        if check is True:
            text += "\n<b>Warn! User have noncence name!</b>"
            num += 85
        else:
            text += "\nUser have normal name"
    except ValueError:
        text += "\nName too short to analyse it"

    # Counterweight
    if '#' in name:
        text += "\nUser have hashtag in name, mostly only real users have it"
        num -= 20

    if "☭" in name:
        text += "\nGood soveit boi."
        num -= 20

    if "🌈" in name:
        text += "\nGei detected."
        num -= 20

    if "🦊" in name:
        text += "\nHa, this guy is a fox lover."
        num -= 20

    for owo in COUNTRY_EMOJIS:
        if owo in name:
            text += "\nHa, This guy love own country"
            num -= 20
            break
    #

    text += "\n\nDebug: Real suspicion numer: " + str(num)

    if num > 100:
        num = 100

    if num < 0:
        num = 0

    text += '\n\n<b>Suspicion: </b><code>' + str(num) + "%</code>"

    await message.reply(str(text))
Exemplo n.º 14
0
    for url in urls_list:
        try:
            response = requests.get(url).text
            for match in get_varnames(response):
                params.add(match)
        except:
            pass
else:
    input_data = ' '.join(read_in())
    for match in get_varnames(input_data):
        params.add(match)

if args.smart:
    print('using smart detection')
    for param in params:
        regex = re.compile(
            '[^a-zA-Z]'
        )  # pattern matches only alpha because nostril only works on alpha
        nostril_safe_param = regex.sub('', param)
        if len(nostril_safe_param) > 6:
            if not nonsense(nostril_safe_param):
                print(param)
        else:
            print(
                param
            )  # print everything under 7 chars, nostril won't work on anything shorter.

else:
    for param in params:
        print(param)
def classifier(stream_strings):
    """Rule based classification for the input string

	Args:
	  str: A string to be processed

	Returns:
	  str: category/ class of the string
	"""

    # Augment the string with splits and elimination of spaces and changing to lower case
    stream_strings_org = stream_strings
    stream_strings = stream_strings.lower()
    stream_strings = ' '.join(stream_strings.split()).replace(',', '')
    stream_strings_space = stream_strings.split(' ')
    stream_strings_mod = list(
        set(stream_strings_space + [stream_strings] + [stream_strings_org]))

    # Variables for rule based matching
    ents = []
    in_vocab = []
    poss = {}
    alphas = {}
    like_nums = {}

    for token in nlp(' '.join(stream_strings_org.split()).lower(
    )):  #Iterate through string after converting into a spacy doc
        poss[token.text] = str(token.pos_)  #Part of Speech for the token
        alphas[token.text] = str(token.is_alpha)  #Check if token is alphabetic
        like_nums[token.text] = str(token.like_num)  #Check if token is numeric
        if nlp.vocab.strings[token.text] in nlp.vocab:
            in_vocab.append(
                token.text)  #Check if token is present in vocabulary
    for stream_string in stream_strings_mod:  # Check the entity labels
        doc = nlp(stream_string)
        for ent in doc.ents:
            if ent.label_ not in ents:
                ents.append(ent.label_)
    ents = list(set(ents))

    #Rule for serial number
    if (len(ents) == 0 and len(in_vocab) == 0) or nonsense(
            ''.join(stream_strings_org.lower().split()) *
            3) == True or ''.join(
                stream_strings_org.split()).isnumeric() == True:
        return 'serial_number'

    #Rule for physical good/ Product
    if (len(nlp(' '.join(stream_strings_org.split())))
            == len(in_vocab)) and (len(ents) == 0 or 'PRODUCT' in ents):
        return 'product'

    #Rule for Address
    if ('CARDINAL' in ents) or ('FAC' in ents):
        return 'address'

    #Rule for Geographic location
    if ('GPE' in ents or 'LOC' in ents) and ('ORG' not in ents):
        return 'location'

    #Rule for Company Name
    if 'ORG' in ents:
        return 'company_name'
    return 'product'
from nostril import nonsense

INPUT_FILENAME = "brute_force_results.txt"
OUTPUT_FILENAME = "meaningful_results.txt"

with open(INPUT_FILENAME) as input_file, open(OUTPUT_FILENAME, "w") as output_file:
    for line in input_file:
        if not nonsense(line):
            output_file.write(line)
Exemplo n.º 17
0
    def run(self, record):
        '''
        The list of features will be:
        - The number of domain parts, for example, www.google.com is 3.
        - The overall length in characters.
        - The length of the longest domain part.
        - The length of the TLD, e.g. .online or .download is longer than .com.
        - The randomness level of the domain.
        '''
        if 'analysers' not in record:
            record['analysers'] = []

        x_samples = []
        Y_samples = []

        for analyser in record['analysers']:
            if analyser['analyser'] != 'WordSegmentation':
                continue

            for domain, segments in analyser['output'].items():
                # Remove wildcard domain
                domain = re.sub(r'^\*\.', '', domain)

                parts = domain.split('.')

                x = []
                # Compute the number of domain parts
                x.append(len(parts))

                # Compute the length of the whole domain
                x.append(len(domain))

                longest = ''
                # Compute the length of the longest domain parts
                for part in parts:
                    if len(part) > len(longest):
                        longest = part

                x.append(len(longest))

                # Compute the length of the TLD
                x.append(len(parts[-1]))

                randomness_count = 0
                # The nostril package which we are using to detect non-sense words
                # in the domain only returns a boolean verdict so may be we need to
                # think of how we want to quantify this
                for w in segments:
                    try:
                        if len(w) >= FeaturesGenerator.NOSTRIL_LENGTH_LIMIT and nonsense(w):
                            randomness_count += 1
                    except ValueError:
                        continue

                x.append(randomness_count / len(segments))

                x_samples.append(x)
                Y_samples.append(True if 'usual_suspect' in record else False)

            break

        record['analysers'].append({
            'analyser': type(self).__name__,
            'output': x_samples,
        })

        return record
Exemplo n.º 18
0
def chat():

    content = request.json
    history = content['history']
    query = content['query']
    task_name = content['task_name']
    lang_id = content['lang_id']
    top_p = content['top_p']
    temp = content['temp']
    rept = content['rept']
    lon = content['lon']
    lat = content['lat']

    if (type(history) == str or len(history) == 0 or history[0] == ""):
        history = []

    query = translate(query,
                      lang_id,
                      "en",
                      auto=True if task_name == "AUTOMODE" else False)
    emoji_user = get_emoji_score([query])

    history.append(query)
    if task_name == "CovidQA":  ## COVID QA
        if len(query) <= 20 or nonsense(query):
            resp = "Your input doesn't make sense, input something more meaninful"
        else:
            r2 = requests.get('https://covid19api.emos.ai/api/v1/summary/',
                              params={'q': query})
            if (r2 != "<Response [200]>"):
                resp = eval(r2.json()[0]['data'])["extractive"]
            else:
                resp = "This service is currenlty not available."
        meta = []
        viz_meta = []
        task_id = 20
    elif task_name == "debunker":  ## COVID DEBUNKER
        if len(query) <= 20 or nonsense(query):
            resp = "Your input doesn't make sense, input something more meaninful"
        else:
            ppl = get_perplexity(query)
            print(ppl)
            if (float(ppl) > 170.):  #### to be tuned
                resp = "This claim is likely to be fake"
            else:
                resp = "This claim is likely to be real"
        meta = []
        viz_meta = []
        task_id = 16
    else:
        resp, meta, task_name, task_id, viz_meta = generate(
            history=history,
            task_name=task_name,
            meta_seed=1,
            top_p=float(top_p),
            temperature=float(temp),
            repetion=float(rept),
            lon=float(lon),
            lat=float(lat))

    toxic_ = False
    if task_name == "Neural":
        toxic_resp = get_toxic_score(query, resp)
        if (toxic_resp != ""):
            resp = toxic_resp
            toxic_ = True

    history.append(resp)
    resp = translate(resp,
                     "en",
                     lang_id,
                     auto=True if task_name == "AUTOMODE" else False)

    emoji_resp = get_emoji_score([resp])
    jsonFormat = {
        'history': history,
        'response': resp,
        'user_emoji': emoji_user,
        'resp_emoji': emoji_resp,
        'toxic': toxic_,
        'task_name': task_name,
        'task_id': task_id,
        'meta': meta,
        'viz_meta': viz_meta,
        "lon": lon,
        "lat": lat
    }

    with open("data/conversation_history/" +
              "{}.json".format(timeStamped("conv")),
              "w",
              encoding="utf-8") as f:
        json.dump(jsonFormat, f, indent=4)

    return jsonify(jsonFormat)  # last one is to to allow CORS [important]