def test_piping(): from sh import sort, du, glob, wc, ls # sort this directory by biggest file print sort(du(glob('*'), '-sb'), '-rn') # print the number of folders and files in /etc print wc(ls('/etc', '-l'), '-l')
def max_file_hash(n=10, short=False): pack_path = glob('.git/objects/pack/*.idx') if not pack_path: git.gc() pack_path = glob('.git/objects/pack/*.idx') if short: return awk( tail(sort(git('verify-pack', '-v', pack_path), '-k', '3'), '-n', '-{0:d}'.format(n)), '{print $1}') else: return tail(sort(git('verify-pack', '-v', pack_path), '-k', '3', '-n'), '-{0:d}'.format(n))
def main(): #Get the parameters args = get_params()[0] debug = args.debug inputPath = '%s' % os.path.realpath(args.input) queryPath = targetPath = '' if args.query: queryPath = '%s' % os.path.realpath(args.query) if args.db: targetPath = '%s' % os.path.realpath(args.db) outPath = '%s' % os.path.realpath(args.output) cutoff = args.cutoff outName = os.path.basename(outPath) outDir = os.path.dirname(outPath) if debug: print('Alignment file:\t%s' % inputPath) print('Query file:\t%s' % queryPath) print('Target file:\t%s' % targetPath) print('Output file:\t%s' % outPath) print('Cutoff:\t%s' % str(cutoff)) # Working version from MASTER qSeqLenPath = os.path.join(outDir, '{:s}.len'.format(os.path.basename(queryPath))) tSeqLenPath = os.path.join(outDir, '{:s}.len'.format(os.path.basename(targetPath))) ########## sort the BLAST output ########### # sort blast_output -k1,1 -k2,2 -k12nr > sorted_output bsName: str = os.path.basename(inputPath) sortPath: str = os.path.join(outDir, 'sorted_{:s}'.format(bsName)) ofd = open(sortPath, 'w') sort(inputPath, '-k1,1', '-k2,2', '-k12nr', _out=ofd) ofd.close() # remove the unsorted output and rename os.remove(inputPath) os.rename(sortPath, inputPath) ############################################ # Parse the MMseqs2 output mmseqs_parser_c.mmseqs_parser_f0_9flds(inputPath, qSeqLenPath, tSeqLenPath, outDir=outDir, outName=outName, scoreCoff=cutoff, debug=debug)
def generate_image_hash(self): """Generate hash of Docker context - Recursively list all files - Get sha1sum of each file in list - Sort list (ensures consistency from run to run) - Get final sha1sum of list """ for context_dir in self.docker_context_cm(): sha1sum = sh.sha1sum( sh.xargs( sh.sort( sh.find( '.', '-type', 'f', '-print0', _err=sys.stderr.buffer, _cwd=context_dir), '-z', # Locale differences can affect sort order _env = {**os.environ, 'LC_ALL': 'C.UTF-8'}, _err=sys.stderr.buffer), '-0', 'sha1sum', _err=sys.stderr.buffer, _cwd=context_dir), _err=sys.stderr.buffer).split()[0] return sha1sum
def emails(self, format_arg=None): ''' returns the emails of the authors either as a text or as a dict. The format is specified as an argument. :param format_arg: if "dict" is specified a dict will be returned :rtype: dict or arry of e-mails dependent on format_arg ''' if format_arg is None: format_string = "'%aN' <%cE>" elif format_arg == 'dict': format_string = "%aN\t%cE" result = sort(git.log( "--all", "--format=" + format_string, _tty_in=True, _tty_out=False, _piped=True), "-u") if format_arg is None: return result elif format_arg == "dict": list = result.replace("\n", "\t").split("\t")[:-1] it = iter(list) emails = dict(zip(it, it)) for name in emails: emails[name] = emails[name] return emails
def _runTest(self, shards, max_threads): for threads in range(1, max_threads + 1): for shard in range(0, shards): with sh.sudo: outfile = output_file_name(shards, shard, threads) zmap(p=80, T=threads, shards=shards, shard=shard, _out="tempfile") parse("tempfile", _out=outfile) dup_lines = int(wc(uniq(cat(outfile), "-d"), "-l")) self.assertEqual(dup_lines, 0) shard_file = shard_file_name(shards, threads) if shard == 0: cat(outfile, _out=shard_file) else: cat(shard_file, outfile, _out="tempfile") mv("tempfile", shard_file) for threads in range(1, max_threads + 1): shard_file = shard_file_name(shards, threads) num_lines = int(wc(cat(shard_file), "-l")) self.assertEqual(num_lines, TestSharding.NUM_IPS) dup_lines = int( wc(uniq(sh.sort(cat(shard_file), "-n"), "-d"), "-l")) self.assertEqual(dup_lines, 0)
def _get_last_two_deploys(environment): import sh git = sh.git.bake(_tty_out=False) pipe = git('tag') pipe = sh.grep(pipe, environment) pipe = sh.sort(pipe, '-rn') pipe = sh.head(pipe, '-n2') return pipe.strip().split('\n')
def remote_tags(url, **subprocess_kwargs): """Returns a list of remote tags""" _tags = sh.sed( sh.sort(sh.git("ls-remote", "-t", "--refs", url), "-t", "/", "-k", 3, "-V"), "-E", "s/^[[:xdigit:]]+[[:space:]]+refs\\/tags\\/(.+)/\\1/g", ).stdout.decode() return _tags.split("\n")[:-1]
def remote_branches(url): """ Returns a list of remote branches """ _tags = sh.sed( sh.sort(sh.git("ls-remote", "-h", "--refs", url), "-t", "/", "-k", 3, "-V"), "-E", "s/^[[:xdigit:]]+[[:space:]]+refs\\/heads\\/(.+)/\\1/g", ).stdout.decode() return _tags.split("\n")[:-1]
def parse(filename, **kwargs): # cat outfile | grep ip | cut -d '|' -f 2 | cut -d ' ' -f 3 | cut -d '.' -f 4 | sort -n | wc -l return sh.sort(cut(cut(cut(grep(cat(filename), "ip"), d="|", f=2), d=" ", f=3), d=".", f=4), "-n", _out=kwargs.get("_out"))
def remote_branches(url, **subprocess_kwargs): """Returns a list of remote branches""" _tags = sh.sed( sh.sort(sh.git("ls-remote", "-h", "--refs", url), "-t", "/", "-k", 3, "-V"), "-E", "s/^[[:xdigit:]]+[[:space:]]+refs\\/heads\\/(.+)/\\1/g", ).stdout.decode() _tags = _tags.split("\n")[:-1] return [tag for tag in _tags if tag != "master"]
def group_stage(input_dir, output_dir, num_workers): """Run group stage.""" # Concatenate and sort input files to sorted.out sorted_output_filename = os.path.join(output_dir, 'sorted.out') print("+ cat {}/* | sort > {}".format(input_dir, sorted_output_filename)) # Update locale to use traditional sort, TRAVIS required 'C.UTF-8' over 'C' os.environ.update({'LC_ALL': 'C.UTF-8'}) sh.sort( sh.cat(glob.glob(os.path.join(input_dir, '*')), _piped=True), _out=sorted_output_filename, ) # Open grouper output files. Store the file handles in a circular buffer. grouper_files = collections.deque(maxlen=num_workers) for i in range(num_workers): filename = os.path.join(output_dir, part_filename(i)) file = open(filename, 'w') grouper_files.append(file) # Write lines to grouper output files. Round robin allocation by key. prev_key = None with open(sorted_output_filename, 'r') as sorted_output_file: for line in sorted_output_file: # Parse the line. Must be two strings separated by a tab. assert '\t' in line, "Error: no TAB found in line." key, _ = line.split('\t', maxsplit=2) # If it's a new key, then rotate circular queue of grouper files if prev_key is not None and key != prev_key: grouper_files.rotate(1) # Write to grouper file grouper_files[0].write(line) # Update most recently seen key prev_key = key # Close grouper output file handles for file in grouper_files: file.close()
def get_files(usaf, wban): output = sh.grep("%s %s" % (usaf, wban), "isd-history.txt").strip().split(" ") end = int(output.pop()[0:4]) start = int(output.pop()[0:4]) sh.mkdir("-p", "%s-%s" % (usaf, wban)) os.chdir("%s-%s" % (usaf, wban)) for year in range(start, end + 1): fn = "%s-%s-%s.gz" % (usaf, wban, year) if not os.path.exists(fn): sh.wget("ftp://ftp.ncdc.noaa.gov/pub/data/noaa/%s/%s" % (year, fn)) print(fn) output_fn = "%s-%s-data.csv" % (usaf, wban) h = open(output_fn, "w") sh.sort(sh.cut( sh.grep( sh.cut(sh.zcat(glob.glob("*.gz")), "--output-delimiter=,", "-c16-27,88-92"), "-v", "\+9999"), "--output-delimiter=.", "-c1-17,18"), _out=h) sh.gzip(output_fn) sh.mv("%s.gz" % (output_fn), "..")
def _get_python_path(self): _python_paths = [ sh.which('python'), sh.which('python3'), sh.which('python2') ] python_paths = [str(path) for path in _python_paths if path] if os.path.isfile('/usr/local/python-3.6.5/bin/python'): python_paths.append('/usr/local/python-3.6.5/bin/python') if os.path.isdir('/usr/local/Cellar/python'): out = sh.find('/usr/local/Cellar/python', '-regex', '.*/bin/python3[0-9.]*$', '-type', 'f', _piped=True) out = sh.sort(out, _piped=True) python_paths.append(sh.head(out, '-n1').strip()) useable_pythons = [] python_paths_set = set() for python_path in python_paths: if python_path in python_paths_set: continue python_paths_set.add(python_path) if os.path.realpath(python_path) in python_paths_set: continue python_paths_set.add(os.path.realpath(python_path)) useable_pythons.append( (python_path, self._get_python_version(python_path))) if len(useable_pythons) == 0: print('Not found python!!') sys.exit(1) error = '' while True: message = '{}\n{}select python path [{}]: '.format( '\n'.join([ '{}. {} (v{})'.format(i, *e) for i, e in enumerate(useable_pythons) ]), error, ','.join([str(i) for i in range(len(useable_pythons))])) num = int(input(message)) if num < 0 or num >= len(useable_pythons): error = 'error: invalid input, try again!! ' continue return useable_pythons[num]
def path_bins(self): PATH = os.getenv("PATH").split(":") try: sh.stest("-dqr", "-n", self.cache_bins, *PATH) # if that for b in sh.tee(sh.sort(sh.stest("-flx", *PATH, _piped=True), _piped=True), self.cache_bins, _iter=True): b = b.strip() yield b except sh.ErrorReturnCode: # else with open(self.cache_bins, "r") as f: for b in f.readlines(): b = b.strip() yield b
def get_default_modules(user,host): list = {} print host result = sort(ssh("{0}@{1}".format(user,host), "module", "-l", "avail", "2>&1")) print "ok" for line in result: if "default" in line: content = line.split() print content try: (module_package, module_version) = content[0].split("/") list[module_package] = module_version except: pass return list
def emails(self, format_arg=None): if format_arg is None: format_string = "'%aN' <%cE>" elif format_arg == 'dict': format_string = "%aN\t%cE" result = sort(git.log( "--all", "--format=" + format_string, _tty_in=True, _tty_out=False, _piped=True), "-u") if format_arg is None: return result elif format_arg == "dict": list = result.replace("\n", "\t").split("\t")[:-1] it = iter(list) authors = dict(zip(it, it)) for name in authors: authors[name] = authors[name] return authors
def _runTest(self, shards, max_threads): for threads in range(1, max_threads + 1): for shard in range(0, shards): with sh.sudo: outfile = output_file_name(shards, shard, threads) zmap(p=80, T=threads, shards=shards, shard=shard, _out="tempfile") parse("tempfile", _out=outfile) dup_lines = int(wc(uniq(cat(outfile), "-d"), "-l")) self.assertEqual(dup_lines, 0) shard_file = shard_file_name(shards, threads) if shard == 0: cat(outfile, _out=shard_file) else: cat(shard_file, outfile, _out="tempfile") mv("tempfile", shard_file) for threads in range(1, max_threads + 1): shard_file = shard_file_name(shards, threads) num_lines = int(wc(cat(shard_file), "-l")) self.assertEqual(num_lines, TestSharding.NUM_IPS) dup_lines = int(wc(uniq(sh.sort(cat(shard_file), "-n"), "-d"), "-l")) self.assertEqual(dup_lines, 0)
def register_error_in_database(self, session: Session): """ This methode create database object associated to the statification with the result of the log that scrapy has generated. :param session :raise NoResultFound if there is no statification with empty commit sha """ # finalization of the statification by removing unwanted files and directories and empty directories self.delete_files() self.delete_directories() self.delete_empty_directories() # get the statification with empty commit statification = Statification.get_statification(session, '') # open the log file that contain scrapy errors f_file = open(self.s_log_file) expecting_other_line_for_error_message = False s_error_message = '' # for each line will look for information that will be used to fill object of the database for line in f_file: # check if the line contain a warning or a info if re.match('(.*)WARNING(.*)', line) or re.match( '(.*)INFO(.*)', line) or re.match('(.*) ERROR:(.*)', line): expecting_other_line_for_error_message = False if expecting_other_line_for_error_message: s_error_message += line if (not expecting_other_line_for_error_message ) and s_error_message != '': statification.add_object_to_statification( ScrapyError, session, s_error_message) s_error_message = '' # in the case the line match an External link if re.match('(.*) INFO: External link detected(.*)', line): # we get the second part of the line there are also [] in the first part s_trunked_line = line[line.index('INFO: External link detected' ):len(line)] # we get the position of begining of the URL i_start_url = s_trunked_line.index('[') # we ge the position of the end of the URL i_end_url = s_trunked_line.index(']') # we get the position of the begining of the source url i_start_source = s_trunked_line.index(' in ') + 4 try: # we create and add a new ExtenalLink to our statification statification.add_object_to_statification( ExternalLink, session, s_trunked_line[i_start_source:len(s_trunked_line)], s_trunked_line[i_start_url + 1:i_end_url]) except ValueError as e: self.logger.info(e) # in the case the line match a Scrapy Error elif re.match('(.*) ERROR:(.*)', line): expecting_other_line_for_error_message = True # retrieve the Scrapy Error s_trunked_line = line[line.index('ERROR: ') + 7:len(line)] s_error_message += s_trunked_line # in the case the line match an error for type MIME elif re.match('(.*) WARNING: Forbidden content (.*)', line): # we get the second part of the line where begin the information that interest us s_trunked_line = line[line.index('WARNING: Forbidden content ' ):len(line)] # get the starting position of the Error type MIME i_start_error_mime = s_trunked_line.index('[') # get the end position of the error type MIME i_end_error_mime = s_trunked_line.index(']') # get the error type MIME s_error_mime = s_trunked_line[i_start_error_mime + 1:i_end_error_mime] # get the source of the error s_source_link = s_trunked_line[s_trunked_line. index('detected in') + 12:len(s_trunked_line)] try: # create an ErrorTypeMIME associated to the statification statification.add_object_to_statification( ErrorTypeMIME, session, s_error_mime, s_source_link) except ValueError as e: self.logger.info(e) # in the case the line match an HTTP error elif re.match('(.*) WARNING: HTTP error (.*)', line): # we get the second part of the line where begin the information that interest us s_trunked_line = line[line.index('WARNING: HTTP error ' ):len(line)] # we get the starting position of the Error Code i_start_error_code = s_trunked_line.index('[') # we get the end position of the Error Code i_end_error_code = s_trunked_line.index(']') # we get the start position of the url source of the error i_start_url = s_trunked_line.index(' for ') # we get the end position of the url source of the error i_end_url = s_trunked_line.index(' from ') # we retrieve the error code s_error_code = s_trunked_line[i_start_error_code + 1:i_end_error_code] # we retrieve the url that cause the error s_url = s_trunked_line[i_start_url + 5:i_end_url] # we retrieve the url of the source where was found the url that caused the error s_url_source = s_trunked_line[i_end_url + 6:len(s_trunked_line) - 1] try: # we create a new HtmlError associated to the statification statification.add_object_to_statification( HtmlError, session, s_error_code, s_url, s_url_source) except ValueError as e: self.logger.info(e) elif re.match('(.*)response_received_count(.*)', line): # we get the second part of the line where begin the information that interest us s_value_item_scraped_count = line[line.index(': ') + 2:line.index(',')] try: # set the number of crawled item into the statification object statification.upd_nb_item(session, statification.commit, int(s_value_item_scraped_count)) except ValueError as e: self.logger.info(e) try: # retrieve the list of type file with number of file for each type s_result_type_files = sh.uniq( sh.sort( sh.grep( sh.find(sh.glob(self.s_repository_path + '/*'), '-type', 'f'), '-o', '-E', '\.[a-zA-Z0-9]+$')), '-c') # the result is a string so we need to get a table, # here we get a table made of each line returned, we remove all space a_table_result_type_files = s_result_type_files.replace( ' ', '').split('\n') # browse the line of result for row in a_table_result_type_files: if row: # a line is composed of a number followed by a type like "42.png", # we separate the number and the type s_type_file = row.split('.') try: # create a new ScannedFile associated to the statificaiton statification.add_object_to_statification( ScannedFile, session, s_type_file[1], int(s_type_file[0])) except ValueError as e: self.logger.info(e) except sh.ErrorReturnCode_1: self.logger.info('There is no folder in the static repository') finally: # in all case we need to close the file f_file.close() # change the status of the statification (NEED TO BE DONE AT THE END !!) statification.upd_status(session, '', Status.STATIFIED)
DATA_BASES = sh.mysql(sh.echo('show databases;')) DATA_BASES = [el.strip() for el in DATA_BASES] DATA_BASES = DATA_BASES[ 1:] # first entry is 'Database' which is not a Database DATA_BASES += ['All-Databases'] DATA_BASES = ['trading_oanda_d1'] DATESTAMP = sh.date("+%Y-%m-%d_%H:%M").strip() for DB in DATA_BASES: for DD in [DATA_DIR, LOG_DIR]: # step a): delete all except the latest two files for each database print(f'database: {DB}; dir: {DD}') a = sh.find(DATA_DIR, '-maxdepth', '1', '-type', 'f', '-regextype', 'sed', '-regex', f'^/.*{DB}\-[0-9].*', '-printf', '%Ts\t%p\n') b = sh.sort(a, '-n') c = sh.head(b, '-n', '-2') d = sh.cut(c, '-f', '2-') print(d.strip()) e = sh.xargs(d, 'rm', '-rf') # step b): export the databases FILENAME = Path.joinpath(DATA_DIR, f'{DB}-{DATESTAMP}.sql.gz') print(f'FILENAME: {FILENAME}') LOGFILENAME = Path.joinpath(LOG_DIR, f'{DB}-{DATESTAMP}.log') print(f'LOGFILENAME: {LOGFILENAME}') # cmd = "mysqldump -v --single-transaction --quick --lock-tables=false ${DB} 2>'${LOGFILENAME}' | pigz > '${FILENAME}' " # sh.mysqldump('-v', '--single-transaction', '--quick', '--lock-tables=false', DB, _out=FILENAME, _err=LOGFILENAME) sh.ls(DATA_DIR, _out=FILENAME) print()
filters = [ { 'Name': 'instance-state-name', 'Values': [ 'running', ] }, ] response = ec2.describe_instances(Filters=filters) for reservation in response["Reservations"]: for instance in reservation["Instances"]: for tag in instance["Tags"]: if tag["Key"] in "Name": try: if "devops" not in tag["Value"] and "vpn" not in tag[ "Value"]: fp.write( tag["Value"] + " ansible_host=" + instance["PrivateIpAddress"] + " ansible_user=conman ansible_ssh_private_key_file=~/.ssh/keys/conman_id_rsa\n" ) except KeyError: print tag["Value"] + instance["KeyName"] fp.close #sort the invetory file tee(sort("production-hosts"), "production-hosts")
from sh import git, ssh, head, tail, wc, sort, grep, du # Get list of files grep('*') # Get sizes of each du('-hM', grep('*')) # Sort, numerically sort(du('-hM', grep('*')), '-n') # And get the largest tail(sort(du('-hM', grep('*')), '-n'), '-n' 5)
def list_notebooks() -> str: nbs = sh.uniq(sh.sort(sh.awk('FNR==3 {print $2}', list(NOTE_FILES)))) # 前提条件:笔记第 3 行 'Notebook:' 与名称之间有空格 return nbs