def main(argv=None): '''this is called if run from command line''' (prog, args) = interpretCmdLine() parser = argparse.ArgumentParser(prog, description='seq2tsv') # parser.add_argument() parser.add_argument("pathname") args = parser.parse_args(args) outputPathname = args.pathname + ".tsv" count = 0 start = datetime.datetime.now() with open(outputPathname, 'w') as f: reader = SequenceFile.Reader(args.pathname) key_class = reader.getKeyClass() value_class = reader.getValueClass() key = key_class() value = value_class() # reader.sync(4042) position = reader.getPosition() while reader.next(key, value): # print '*' if reader.syncSeen() else ' ', print >> f, '%s\t%s' % (key.toString(), value.toString()) position = reader.getPosition() reader.close() end = datetime.datetime.now() delta = end - start print >> sys.stderr, "ELAPSED seq2tsv is %s" % elapsed(delta) return count
def main(argv=None): '''this is called if run from command line''' (prog, args) = interpretCmdLine() parser = argparse.ArgumentParser(prog, description='tsv2seq') # parser.add_argument() parser.add_argument("pathname") args = parser.parse_args(args) outputPathname = args.pathname + ".seq" writer = SequenceFile.createWriter(outputPathname, Text, Text) count = 0 start = datetime.datetime.now() with open(args.pathname, 'r') as f: print f for line in f: try: (url, payload) = line.split('\t') key = Text() key.set(url) value = Text() # I'm not at all sure why we would want to decode, not encode here # this is the only thing that worked value.set(Text.decode(json.dumps(payload))) writer.append(key, value) count += 1 except ValueError as e: pass writer.close() end = datetime.datetime.now() delta = end - start print >> sys.stderr, "ELAPSED tsv2seq is %s" % elapsed(delta) return count
def main(argv=None): '''this is called if run from command line''' start = datetime.datetime.now() (prog, args) = interpretCmdLine() parser = argparse.ArgumentParser(prog, description='azure_publish') parser.add_argument('-d', '--directory', help='directory to publish', required=False, action="append", default=[]) parser.add_argument('-f', '--file', help='file to publish', required=False, action="append", default=[]) parser.add_argument( '-t', '--type', help='content type', required=False, choices=["text/html", "image/jpeg", "image/gif", "image/png"], default="text/html") parser.add_argument('-v', '--verbose', help='print to stderr', required=False, default=VERBOSE) args = parser.parse_args(args) files = args.file directories = args.directory verbose = args.verbose count = 0 for pathname in files: azure_publish_file(pathname, content_type=args.type) count += 1 for directory in directories: for file in os.listdir(directory): azure_publish_file(file, content_type=args.type) count += 1 end = datetime.datetime.now() delta = end - start if verbose: print >> sys.stderr, "ELAPSED azure_publish is %s" % elapsed(delta) print >> sys.stderr, "%d files uploaded" % (count)
def main(argv=None): '''this is called if run from command line''' start = datetime.datetime.now() (prog, args) = interpretCmdLine() parser = argparse.ArgumentParser(prog, description='azure_publish') parser.add_argument('-d', '--directory', help='directory to publish', required=False, action="append", default=[]) parser.add_argument('-f', '--file', help='file to publish', required=False, action="append", default=[]) parser.add_argument('-t', '--type', help='content type', required=False, choices=["text/html", "image/jpeg", "image/gif", "image/png"], default="text/html") parser.add_argument('-v', '--verbose', help='print to stderr', required=False, default=VERBOSE) args = parser.parse_args(args) files = args.file directories = args.directory verbose = args.verbose count = 0 for pathname in files: azure_publish_file(pathname, content_type=args.type) count += 1 for directory in directories: for file in os.listdir(directory): azure_publish_file(file, content_type=args.type) count += 1 end = datetime.datetime.now() delta = end - start if verbose: print >> sys.stderr, "ELAPSED azure_publish is %s" % elapsed(delta) print >> sys.stderr, "%d files uploaded" % (count)