示例#1
0
文件: sample.py 项目: vhnuuh/pyutil
def test_piping():
    from sh import sort, du, glob, wc, ls

    # sort this directory by biggest file
    print sort(du(glob('*'), '-sb'), '-rn')

    # print the number of folders and files in /etc
    print wc(ls('/etc', '-l'), '-l')
示例#2
0
def max_file_hash(n=10, short=False):
    pack_path = glob('.git/objects/pack/*.idx')

    if not pack_path:
        git.gc()
        pack_path = glob('.git/objects/pack/*.idx')

    if short:
        return awk(
            tail(sort(git('verify-pack', '-v', pack_path), '-k', '3'), '-n',
                 '-{0:d}'.format(n)), '{print $1}')
    else:
        return tail(sort(git('verify-pack', '-v', pack_path), '-k', '3', '-n'),
                    '-{0:d}'.format(n))
示例#3
0
def main():
    #Get the parameters
    args = get_params()[0]
    debug = args.debug
    inputPath = '%s' % os.path.realpath(args.input)
    queryPath = targetPath = ''
    if args.query:
        queryPath = '%s' % os.path.realpath(args.query)
    if args.db:
        targetPath = '%s' % os.path.realpath(args.db)
    outPath = '%s' % os.path.realpath(args.output)
    cutoff = args.cutoff
    outName = os.path.basename(outPath)
    outDir = os.path.dirname(outPath)

    if debug:
        print('Alignment file:\t%s' % inputPath)
        print('Query file:\t%s' % queryPath)
        print('Target file:\t%s' % targetPath)
        print('Output file:\t%s' % outPath)
        print('Cutoff:\t%s' % str(cutoff))

    # Working version from MASTER
    qSeqLenPath = os.path.join(outDir,
                               '{:s}.len'.format(os.path.basename(queryPath)))
    tSeqLenPath = os.path.join(outDir,
                               '{:s}.len'.format(os.path.basename(targetPath)))

    ########## sort the BLAST output ###########
    # sort blast_output -k1,1 -k2,2 -k12nr > sorted_output
    bsName: str = os.path.basename(inputPath)
    sortPath: str = os.path.join(outDir, 'sorted_{:s}'.format(bsName))
    ofd = open(sortPath, 'w')
    sort(inputPath, '-k1,1', '-k2,2', '-k12nr', _out=ofd)
    ofd.close()
    # remove the unsorted output and rename
    os.remove(inputPath)
    os.rename(sortPath, inputPath)
    ############################################

    # Parse the MMseqs2 output
    mmseqs_parser_c.mmseqs_parser_f0_9flds(inputPath,
                                           qSeqLenPath,
                                           tSeqLenPath,
                                           outDir=outDir,
                                           outName=outName,
                                           scoreCoff=cutoff,
                                           debug=debug)
示例#4
0
    def generate_image_hash(self):
        """Generate hash of Docker context

        - Recursively list all files
        - Get sha1sum of each file in list
        - Sort list (ensures consistency from run to run)
        - Get final sha1sum of list
        """
        for context_dir in self.docker_context_cm():
            sha1sum = sh.sha1sum(
                sh.xargs(
                    sh.sort(
                        sh.find(
                            '.', '-type', 'f', '-print0',
                            _err=sys.stderr.buffer,
                            _cwd=context_dir),
                        '-z',
                        # Locale differences can affect sort order
                        _env = {**os.environ, 'LC_ALL': 'C.UTF-8'},
                        _err=sys.stderr.buffer),

                    '-0', 'sha1sum',
                    _err=sys.stderr.buffer,
                    _cwd=context_dir),
                _err=sys.stderr.buffer).split()[0]
        return sha1sum
示例#5
0
    def emails(self, format_arg=None):
        '''
        returns the emails of the authors either as a text or as a dict. The
        format is specified as an argument.
        
        :param format_arg: if "dict" is specified a dict will be returned
        :rtype: dict or arry of e-mails dependent on format_arg
        '''
        if format_arg is None:
            format_string = "'%aN' <%cE>"
        elif format_arg == 'dict':
            format_string = "%aN\t%cE"
        result = sort(git.log(
            "--all", "--format=" + format_string,
            _tty_in=True, _tty_out=False, _piped=True), "-u")

        if format_arg is None:
            return result
        elif format_arg == "dict":
            list = result.replace("\n", "\t").split("\t")[:-1]
            it = iter(list)
            emails = dict(zip(it, it))
            for name in emails:
                emails[name] = emails[name]
            return emails
示例#6
0
    def _runTest(self, shards, max_threads):
        for threads in range(1, max_threads + 1):
            for shard in range(0, shards):
                with sh.sudo:
                    outfile = output_file_name(shards, shard, threads)
                    zmap(p=80,
                         T=threads,
                         shards=shards,
                         shard=shard,
                         _out="tempfile")
                    parse("tempfile", _out=outfile)
                    dup_lines = int(wc(uniq(cat(outfile), "-d"), "-l"))
                    self.assertEqual(dup_lines, 0)
                    shard_file = shard_file_name(shards, threads)
                    if shard == 0:
                        cat(outfile, _out=shard_file)
                    else:
                        cat(shard_file, outfile, _out="tempfile")
                        mv("tempfile", shard_file)

        for threads in range(1, max_threads + 1):
            shard_file = shard_file_name(shards, threads)
            num_lines = int(wc(cat(shard_file), "-l"))
            self.assertEqual(num_lines, TestSharding.NUM_IPS)
            dup_lines = int(
                wc(uniq(sh.sort(cat(shard_file), "-n"), "-d"), "-l"))
            self.assertEqual(dup_lines, 0)
示例#7
0
def _get_last_two_deploys(environment):
    import sh
    git = sh.git.bake(_tty_out=False)
    pipe = git('tag')
    pipe = sh.grep(pipe, environment)
    pipe = sh.sort(pipe, '-rn')
    pipe = sh.head(pipe, '-n2')
    return pipe.strip().split('\n')
示例#8
0
def remote_tags(url, **subprocess_kwargs):
    """Returns a list of remote tags"""
    _tags = sh.sed(
        sh.sort(sh.git("ls-remote", "-t", "--refs", url), "-t", "/", "-k", 3,
                "-V"),
        "-E",
        "s/^[[:xdigit:]]+[[:space:]]+refs\\/tags\\/(.+)/\\1/g",
    ).stdout.decode()
    return _tags.split("\n")[:-1]
示例#9
0
def remote_branches(url):
    """ Returns a list of remote branches
    """
    _tags = sh.sed(
        sh.sort(sh.git("ls-remote", "-h", "--refs", url), "-t", "/", "-k", 3, "-V"),
        "-E",
        "s/^[[:xdigit:]]+[[:space:]]+refs\\/heads\\/(.+)/\\1/g",
    ).stdout.decode()
    return _tags.split("\n")[:-1]
示例#10
0
def parse(filename, **kwargs):
    # cat outfile | grep ip | cut -d '|' -f 2 | cut -d ' ' -f 3 | cut -d '.' -f 4 | sort -n | wc -l
    return sh.sort(cut(cut(cut(grep(cat(filename), "ip"), d="|", f=2),
                           d=" ",
                           f=3),
                       d=".",
                       f=4),
                   "-n",
                   _out=kwargs.get("_out"))
示例#11
0
def remote_branches(url, **subprocess_kwargs):
    """Returns a list of remote branches"""
    _tags = sh.sed(
        sh.sort(sh.git("ls-remote", "-h", "--refs", url), "-t", "/", "-k", 3,
                "-V"),
        "-E",
        "s/^[[:xdigit:]]+[[:space:]]+refs\\/heads\\/(.+)/\\1/g",
    ).stdout.decode()
    _tags = _tags.split("\n")[:-1]
    return [tag for tag in _tags if tag != "master"]
示例#12
0
def group_stage(input_dir, output_dir, num_workers):
    """Run group stage."""
    # Concatenate and sort input files to sorted.out
    sorted_output_filename = os.path.join(output_dir, 'sorted.out')
    print("+ cat {}/* | sort > {}".format(input_dir, sorted_output_filename))

    # Update locale to use traditional sort, TRAVIS required 'C.UTF-8' over 'C'
    os.environ.update({'LC_ALL': 'C.UTF-8'})
    sh.sort(
        sh.cat(glob.glob(os.path.join(input_dir, '*')), _piped=True),
        _out=sorted_output_filename,
    )

    # Open grouper output files.  Store the file handles in a circular buffer.
    grouper_files = collections.deque(maxlen=num_workers)
    for i in range(num_workers):
        filename = os.path.join(output_dir, part_filename(i))
        file = open(filename, 'w')
        grouper_files.append(file)

    # Write lines to grouper output files.  Round robin allocation by key.
    prev_key = None
    with open(sorted_output_filename, 'r') as sorted_output_file:
        for line in sorted_output_file:
            # Parse the line.  Must be two strings separated by a tab.
            assert '\t' in line, "Error: no TAB found in line."
            key, _ = line.split('\t', maxsplit=2)

            # If it's a new key, then rotate circular queue of grouper files
            if prev_key is not None and key != prev_key:
                grouper_files.rotate(1)

            # Write to grouper file
            grouper_files[0].write(line)

            # Update most recently seen key
            prev_key = key

    # Close grouper output file handles
    for file in grouper_files:
        file.close()
示例#13
0
def get_files(usaf, wban):
    output = sh.grep("%s %s" % (usaf, wban),
                     "isd-history.txt").strip().split(" ")
    end = int(output.pop()[0:4])
    start = int(output.pop()[0:4])
    sh.mkdir("-p", "%s-%s" % (usaf, wban))
    os.chdir("%s-%s" % (usaf, wban))
    for year in range(start, end + 1):
        fn = "%s-%s-%s.gz" % (usaf, wban, year)
        if not os.path.exists(fn):
            sh.wget("ftp://ftp.ncdc.noaa.gov/pub/data/noaa/%s/%s" % (year, fn))
            print(fn)
    output_fn = "%s-%s-data.csv" % (usaf, wban)
    h = open(output_fn, "w")
    sh.sort(sh.cut(
        sh.grep(
            sh.cut(sh.zcat(glob.glob("*.gz")), "--output-delimiter=,",
                   "-c16-27,88-92"), "-v", "\+9999"), "--output-delimiter=.",
        "-c1-17,18"),
            _out=h)
    sh.gzip(output_fn)
    sh.mv("%s.gz" % (output_fn), "..")
示例#14
0
    def _get_python_path(self):
        _python_paths = [
            sh.which('python'),
            sh.which('python3'),
            sh.which('python2')
        ]
        python_paths = [str(path) for path in _python_paths if path]
        if os.path.isfile('/usr/local/python-3.6.5/bin/python'):
            python_paths.append('/usr/local/python-3.6.5/bin/python')

        if os.path.isdir('/usr/local/Cellar/python'):
            out = sh.find('/usr/local/Cellar/python',
                          '-regex',
                          '.*/bin/python3[0-9.]*$',
                          '-type',
                          'f',
                          _piped=True)
            out = sh.sort(out, _piped=True)
            python_paths.append(sh.head(out, '-n1').strip())

        useable_pythons = []
        python_paths_set = set()
        for python_path in python_paths:
            if python_path in python_paths_set:
                continue
            python_paths_set.add(python_path)
            if os.path.realpath(python_path) in python_paths_set:
                continue
            python_paths_set.add(os.path.realpath(python_path))
            useable_pythons.append(
                (python_path, self._get_python_version(python_path)))

        if len(useable_pythons) == 0:
            print('Not found python!!')
            sys.exit(1)

        error = ''
        while True:
            message = '{}\n{}select python path [{}]: '.format(
                '\n'.join([
                    '{}. {} (v{})'.format(i, *e)
                    for i, e in enumerate(useable_pythons)
                ]), error,
                ','.join([str(i) for i in range(len(useable_pythons))]))
            num = int(input(message))
            if num < 0 or num >= len(useable_pythons):
                error = 'error: invalid input, try again!! '
                continue
            return useable_pythons[num]
示例#15
0
 def path_bins(self):
     PATH = os.getenv("PATH").split(":")
     try:
         sh.stest("-dqr", "-n", self.cache_bins, *PATH)  # if that
         for b in sh.tee(sh.sort(sh.stest("-flx", *PATH, _piped=True),
                                 _piped=True),
                         self.cache_bins,
                         _iter=True):
             b = b.strip()
             yield b
     except sh.ErrorReturnCode:  # else
         with open(self.cache_bins, "r") as f:
             for b in f.readlines():
                 b = b.strip()
                 yield b
示例#16
0
文件: info.py 项目: baoleary/manual
def get_default_modules(user,host):
    list = {}
    print host
    result = sort(ssh("{0}@{1}".format(user,host), "module", "-l", "avail", "2>&1"))
    print "ok"
    
    for line in result:
        if "default" in line:
            content = line.split()
            print content
            try:
                (module_package, module_version) = content[0].split("/")
                list[module_package] = module_version
            except:
                pass
    return list
示例#17
0
    def emails(self, format_arg=None):
        if format_arg is None:
            format_string = "'%aN' <%cE>"
        elif format_arg == 'dict':
            format_string = "%aN\t%cE"
        result = sort(git.log(
            "--all", "--format=" + format_string,
            _tty_in=True, _tty_out=False, _piped=True), "-u")

        if format_arg is None:
            return result
        elif format_arg == "dict":
            list = result.replace("\n", "\t").split("\t")[:-1]
            it = iter(list)
            authors = dict(zip(it, it))
            for name in authors:
                authors[name] = authors[name]
            return authors
示例#18
0
    def _runTest(self, shards, max_threads):
        for threads in range(1, max_threads + 1):
            for shard in range(0, shards):
                with sh.sudo:
                    outfile = output_file_name(shards, shard, threads)
                    zmap(p=80, T=threads, shards=shards, shard=shard, _out="tempfile")
                    parse("tempfile", _out=outfile)
                    dup_lines = int(wc(uniq(cat(outfile), "-d"), "-l"))
                    self.assertEqual(dup_lines, 0)
                    shard_file = shard_file_name(shards, threads)
                    if shard == 0:
                        cat(outfile, _out=shard_file)
                    else:
                        cat(shard_file, outfile, _out="tempfile")
                        mv("tempfile", shard_file)

        for threads in range(1, max_threads + 1):
            shard_file = shard_file_name(shards, threads)
            num_lines = int(wc(cat(shard_file), "-l"))
            self.assertEqual(num_lines, TestSharding.NUM_IPS)
            dup_lines = int(wc(uniq(sh.sort(cat(shard_file), "-n"), "-d"), "-l"))
            self.assertEqual(dup_lines, 0)
示例#19
0
    def register_error_in_database(self, session: Session):
        """
        This methode create database object associated to the statification with the result of the log
        that scrapy has generated.
        :param session
        :raise NoResultFound if there is no statification with empty commit sha
        """

        # finalization of the statification by removing unwanted files and directories and empty directories
        self.delete_files()
        self.delete_directories()
        self.delete_empty_directories()

        # get the statification with empty commit
        statification = Statification.get_statification(session, '')

        # open the log file that contain scrapy errors
        f_file = open(self.s_log_file)

        expecting_other_line_for_error_message = False
        s_error_message = ''

        # for each line will look for information that will be used to fill object of the database
        for line in f_file:

            # check if the line contain a warning or a info
            if re.match('(.*)WARNING(.*)', line) or re.match(
                    '(.*)INFO(.*)', line) or re.match('(.*) ERROR:(.*)', line):
                expecting_other_line_for_error_message = False

            if expecting_other_line_for_error_message:
                s_error_message += line

            if (not expecting_other_line_for_error_message
                ) and s_error_message != '':
                statification.add_object_to_statification(
                    ScrapyError, session, s_error_message)
                s_error_message = ''

            # in the case the line match an External link
            if re.match('(.*) INFO: External link detected(.*)', line):
                # we get the second part of the line there are also [] in the first part
                s_trunked_line = line[line.index('INFO: External link detected'
                                                 ):len(line)]

                # we get the position of begining of the URL
                i_start_url = s_trunked_line.index('[')
                # we ge the position of the end of the URL
                i_end_url = s_trunked_line.index(']')
                # we get the position of the begining of the source url
                i_start_source = s_trunked_line.index(' in ') + 4

                try:
                    # we create and add a new ExtenalLink to our statification
                    statification.add_object_to_statification(
                        ExternalLink, session,
                        s_trunked_line[i_start_source:len(s_trunked_line)],
                        s_trunked_line[i_start_url + 1:i_end_url])
                except ValueError as e:
                    self.logger.info(e)
            # in the case the line match a Scrapy Error
            elif re.match('(.*) ERROR:(.*)', line):
                expecting_other_line_for_error_message = True
                # retrieve the Scrapy Error
                s_trunked_line = line[line.index('ERROR: ') + 7:len(line)]
                s_error_message += s_trunked_line

            # in the case the line match an error for type MIME
            elif re.match('(.*) WARNING: Forbidden content (.*)', line):

                # we get the second part of the line where begin the information that interest us
                s_trunked_line = line[line.index('WARNING: Forbidden content '
                                                 ):len(line)]

                # get the starting position of the Error type MIME
                i_start_error_mime = s_trunked_line.index('[')
                # get the end position of the error type MIME
                i_end_error_mime = s_trunked_line.index(']')
                # get the error type MIME
                s_error_mime = s_trunked_line[i_start_error_mime +
                                              1:i_end_error_mime]
                # get the source of the error
                s_source_link = s_trunked_line[s_trunked_line.
                                               index('detected in') +
                                               12:len(s_trunked_line)]

                try:
                    # create an ErrorTypeMIME associated to the statification
                    statification.add_object_to_statification(
                        ErrorTypeMIME, session, s_error_mime, s_source_link)
                except ValueError as e:
                    self.logger.info(e)
            # in the case the line match an HTTP error
            elif re.match('(.*) WARNING: HTTP error (.*)', line):

                # we get the second part of the line where begin the information that interest us
                s_trunked_line = line[line.index('WARNING: HTTP error '
                                                 ):len(line)]

                # we get the starting position of the Error Code
                i_start_error_code = s_trunked_line.index('[')
                # we get the end position of the Error Code
                i_end_error_code = s_trunked_line.index(']')
                # we get the start position of the url source of the error
                i_start_url = s_trunked_line.index(' for ')
                # we get the end position of the url source of the error
                i_end_url = s_trunked_line.index(' from ')

                # we retrieve the error code
                s_error_code = s_trunked_line[i_start_error_code +
                                              1:i_end_error_code]

                # we retrieve the url that cause the error
                s_url = s_trunked_line[i_start_url + 5:i_end_url]

                # we retrieve the url of the source where was found the url that caused the error
                s_url_source = s_trunked_line[i_end_url +
                                              6:len(s_trunked_line) - 1]

                try:
                    # we create a new HtmlError associated to the statification
                    statification.add_object_to_statification(
                        HtmlError, session, s_error_code, s_url, s_url_source)
                except ValueError as e:
                    self.logger.info(e)
            elif re.match('(.*)response_received_count(.*)', line):

                # we get the second part of the line where begin the information that interest us
                s_value_item_scraped_count = line[line.index(': ') +
                                                  2:line.index(',')]

                try:
                    # set the number of crawled item into the statification object
                    statification.upd_nb_item(session, statification.commit,
                                              int(s_value_item_scraped_count))
                except ValueError as e:
                    self.logger.info(e)
        try:
            # retrieve the list of type file with number of file for each type
            s_result_type_files = sh.uniq(
                sh.sort(
                    sh.grep(
                        sh.find(sh.glob(self.s_repository_path + '/*'),
                                '-type', 'f'), '-o', '-E', '\.[a-zA-Z0-9]+$')),
                '-c')
            # the result is a string so we need to get a table,
            # here we get a table made of each line returned, we remove all space
            a_table_result_type_files = s_result_type_files.replace(
                ' ', '').split('\n')

            # browse the line of result
            for row in a_table_result_type_files:
                if row:
                    # a line is composed of a number followed by a type like "42.png",
                    # we separate the number and the type
                    s_type_file = row.split('.')

                    try:
                        # create a new ScannedFile associated to the statificaiton
                        statification.add_object_to_statification(
                            ScannedFile, session, s_type_file[1],
                            int(s_type_file[0]))
                    except ValueError as e:
                        self.logger.info(e)
        except sh.ErrorReturnCode_1:
            self.logger.info('There is no folder in the static repository')
        finally:
            # in all case we need to close the file
            f_file.close()

        # change the status of the statification (NEED TO BE DONE AT THE END !!)
        statification.upd_status(session, '', Status.STATIFIED)
示例#20
0
DATA_BASES = sh.mysql(sh.echo('show databases;'))
DATA_BASES = [el.strip() for el in DATA_BASES]
DATA_BASES = DATA_BASES[
    1:]  # first entry is 'Database' which is not a Database
DATA_BASES += ['All-Databases']
DATA_BASES = ['trading_oanda_d1']
DATESTAMP = sh.date("+%Y-%m-%d_%H:%M").strip()

for DB in DATA_BASES:
    for DD in [DATA_DIR, LOG_DIR]:
        # step a): delete all except the latest two files for each database
        print(f'database: {DB}; dir: {DD}')
        a = sh.find(DATA_DIR, '-maxdepth', '1', '-type', 'f', '-regextype',
                    'sed', '-regex', f'^/.*{DB}\-[0-9].*', '-printf',
                    '%Ts\t%p\n')
        b = sh.sort(a, '-n')
        c = sh.head(b, '-n', '-2')
        d = sh.cut(c, '-f', '2-')
        print(d.strip())
        e = sh.xargs(d, 'rm', '-rf')

    # step b): export the databases
    FILENAME = Path.joinpath(DATA_DIR, f'{DB}-{DATESTAMP}.sql.gz')
    print(f'FILENAME: {FILENAME}')
    LOGFILENAME = Path.joinpath(LOG_DIR, f'{DB}-{DATESTAMP}.log')
    print(f'LOGFILENAME: {LOGFILENAME}')

    # cmd = "mysqldump  -v --single-transaction --quick --lock-tables=false ${DB} 2>'${LOGFILENAME}' |  pigz > '${FILENAME}' "
    # sh.mysqldump('-v', '--single-transaction', '--quick', '--lock-tables=false', DB, _out=FILENAME, _err=LOGFILENAME)
    sh.ls(DATA_DIR, _out=FILENAME)
    print()
示例#21
0
def parse(filename, **kwargs):
    # cat outfile | grep ip | cut -d '|' -f 2 | cut -d ' ' -f 3 | cut -d '.' -f 4 | sort -n | wc -l
    return sh.sort(cut(cut(cut(grep(cat(filename), "ip"), d="|", f=2), d=" ", f=3), d=".", f=4), "-n", _out=kwargs.get("_out"))
示例#22
0
filters = [
    {
        'Name': 'instance-state-name',
        'Values': [
            'running',
        ]
    },
]
response = ec2.describe_instances(Filters=filters)
for reservation in response["Reservations"]:
    for instance in reservation["Instances"]:
        for tag in instance["Tags"]:
            if tag["Key"] in "Name":
                try:
                    if "devops" not in tag["Value"] and "vpn" not in tag[
                            "Value"]:
                        fp.write(
                            tag["Value"] + " ansible_host=" +
                            instance["PrivateIpAddress"] +
                            " ansible_user=conman ansible_ssh_private_key_file=~/.ssh/keys/conman_id_rsa\n"
                        )

                except KeyError:
                    print tag["Value"] + instance["KeyName"]

fp.close

#sort the invetory file
tee(sort("production-hosts"), "production-hosts")
示例#23
0
from sh import git, ssh, head, tail, wc, sort, grep, du


# Get list of files
grep('*')
# Get sizes of each
du('-hM', grep('*'))
# Sort, numerically
sort(du('-hM', grep('*')), '-n')
# And get the largest
tail(sort(du('-hM', grep('*')), '-n'), '-n' 5)

示例#24
0
def list_notebooks() -> str:
    nbs = sh.uniq(sh.sort(sh.awk('FNR==3 {print $2}', list(NOTE_FILES))))
    # 前提条件:笔记第 3 行 'Notebook:' 与名称之间有空格
    return nbs