def cli_lines_with_dir(input_): try: lines = None tmp_dir = None tmp_dir = tempfile.mkdtemp() main([tmp_dir, input_]) filename = cdx_filename(os.path.basename(input_)) print(filename) with open(os.path.join(tmp_dir, filename), 'rb') as fh: lines = fh.read(8192).rstrip().split(b'\n') finally: try: if tmp_dir: shutil.rmtree(tmp_dir) except OSError as exc: if exc.errno != 2: raise if not lines: return # print first, last, num lines print(lines[1].decode('utf-8')) print(lines[-1].decode('utf-8')) print('Total: ' + str(len(lines)))
def cli_lines_with_dir(input_): try: lines = None tmp_dir = None tmp_dir = tempfile.mkdtemp() main([tmp_dir, input_]) filename = cdx_filename(os.path.basename(input_)) print filename with open(os.path.join(tmp_dir, filename), 'rb') as fh: lines = fh.read(8192).rstrip().split('\n') finally: try: if tmp_dir: shutil.rmtree(tmp_dir) except OSError as exc: if exc.errno != 2: raise if not lines: return # print first, last, num lines print (lines[1]) print (lines[-1]) print('Total: ' + str(len(lines)))
def cli_lines(cmds): buff = BytesIO() orig = sys.stdout.buffer if hasattr(sys.stdout, 'buffer') else None sys.stdout.buffer = buff main(cmds) sys.stdout.buffer = orig lines = buff.getvalue().rstrip().split(b'\n') # print first, last, num lines print(lines[1].decode('utf-8')) print(lines[-1].decode('utf-8')) print('Total: ' + str(len(lines)))
def cli_lines(cmds): buff = BytesIO() orig = sys.stdout sys.stdout = buff main(cmds) sys.stdout = orig lines = buff.getvalue().rstrip().split('\n') # print first, last, num lines print(lines[1]) print(lines[-1]) print('Total: ' + str(len(lines)))
def index_cdx(self, output_cdx, input_): """ Output sorted, post-query resolving cdx from 'input_' warc(s) to 'output_cdx'. Write cdx to temp and rename to output_cdx when completed to ensure atomic updates of the cdx. """ # Run cdx indexer temp_cdx = output_cdx + '.tmp.' + timestamp20() indexer_args = ['-s', '-p', temp_cdx, input_] try: cdxindexer.main(indexer_args) except Exception as exc: import traceback err_details = traceback.format_exc(exc) print err_details os.remove(temp_cdx) return False else: shutil.move(temp_cdx, output_cdx) return True
def index_cdx(self, output_cdx, input_): """ Output sorted, post-query resolving cdx from 'input_' warc(s) to 'output_cdx'. Write cdx to temp and rename to output_cdx when completed to ensure atomic updates of the cdx. """ # Run cdx indexer temp_cdx = output_cdx + '.tmp.' + timestamp20() indexer_args = ['-s', '-p', temp_cdx, input_] try: cdxindexer.main(indexer_args) except Exception as exc: import traceback err_details = traceback.format_exc(exc) print err_details os.remove(temp_cdx) return False else: os.rename(temp_cdx, output_cdx) return True