Пример #1
0
def main(argv=None):
    '''this is called if run from command line'''
    (prog, args) = interpretCmdLine()
    parser = argparse.ArgumentParser(prog, description='seq2tsv')
    # parser.add_argument()
    parser.add_argument("pathname")
    args = parser.parse_args(args)
    outputPathname = args.pathname + ".tsv"
    count = 0
    start = datetime.datetime.now()
    with open(outputPathname, 'w') as f:
        reader = SequenceFile.Reader(args.pathname)

        key_class = reader.getKeyClass()
        value_class = reader.getValueClass()

        key = key_class()
        value = value_class()

        # reader.sync(4042)
        position = reader.getPosition()
        while reader.next(key, value):
            # print '*' if reader.syncSeen() else ' ',
            print >> f, '%s\t%s' % (key.toString(), value.toString())
            position = reader.getPosition()

        reader.close()
    end = datetime.datetime.now()
    delta = end - start
    print >> sys.stderr, "ELAPSED seq2tsv is %s" % elapsed(delta)
    return count
Пример #2
0
def main(argv=None):
    '''this is called if run from command line'''
    (prog, args) = interpretCmdLine()
    parser = argparse.ArgumentParser(prog, description='tsv2seq')
    # parser.add_argument()
    parser.add_argument("pathname")
    args = parser.parse_args(args)
    
    outputPathname = args.pathname + ".seq"
    writer = SequenceFile.createWriter(outputPathname, Text, Text)
    count = 0
    start = datetime.datetime.now()
    with open(args.pathname, 'r') as f:
        print f
        for line in f:
            try:
                (url, payload) = line.split('\t')
                key = Text()
                key.set(url)
                value = Text()
                # I'm not at all sure why we would want to decode, not encode here
                # this is the only thing that worked
                value.set(Text.decode(json.dumps(payload)))
                writer.append(key, value)
                count += 1
            except ValueError as e:
                pass
    writer.close()
    end = datetime.datetime.now()
    delta = end - start
    print >> sys.stderr, "ELAPSED tsv2seq is %s" % elapsed(delta)
    return count
Пример #3
0
def main(argv=None):
    '''this is called if run from command line'''
    (prog, args) = interpretCmdLine()
    parser = argparse.ArgumentParser(prog, description='seq2tsv')
    # parser.add_argument()
    parser.add_argument("pathname")
    args = parser.parse_args(args)
    outputPathname = args.pathname + ".tsv"
    count = 0
    start = datetime.datetime.now()
    with open(outputPathname, 'w') as f:
        reader = SequenceFile.Reader(args.pathname)

        key_class = reader.getKeyClass()
        value_class = reader.getValueClass()

        key = key_class()
        value = value_class()

        # reader.sync(4042)
        position = reader.getPosition()
        while reader.next(key, value):
            # print '*' if reader.syncSeen() else ' ',
            print >> f, '%s\t%s' % (key.toString(), value.toString())
            position = reader.getPosition()

        reader.close()
    end = datetime.datetime.now()
    delta = end - start
    print >> sys.stderr, "ELAPSED seq2tsv is %s" % elapsed(delta)
    return count
Пример #4
0
def main(argv=None):
    '''this is called if run from command line'''
    start = datetime.datetime.now()
    (prog, args) = interpretCmdLine()
    parser = argparse.ArgumentParser(prog, description='azure_publish')
    parser.add_argument('-d',
                        '--directory',
                        help='directory to publish',
                        required=False,
                        action="append",
                        default=[])
    parser.add_argument('-f',
                        '--file',
                        help='file to publish',
                        required=False,
                        action="append",
                        default=[])
    parser.add_argument(
        '-t',
        '--type',
        help='content type',
        required=False,
        choices=["text/html", "image/jpeg", "image/gif", "image/png"],
        default="text/html")
    parser.add_argument('-v',
                        '--verbose',
                        help='print to stderr',
                        required=False,
                        default=VERBOSE)

    args = parser.parse_args(args)
    files = args.file
    directories = args.directory
    verbose = args.verbose
    count = 0
    for pathname in files:
        azure_publish_file(pathname, content_type=args.type)
        count += 1
    for directory in directories:
        for file in os.listdir(directory):
            azure_publish_file(file, content_type=args.type)
            count += 1
    end = datetime.datetime.now()
    delta = end - start
    if verbose:
        print >> sys.stderr, "ELAPSED azure_publish is %s" % elapsed(delta)
        print >> sys.stderr, "%d files uploaded" % (count)
Пример #5
0
def main(argv=None):
    '''this is called if run from command line'''
    start = datetime.datetime.now()
    (prog, args) = interpretCmdLine()
    parser = argparse.ArgumentParser(prog, description='azure_publish')
    parser.add_argument('-d', '--directory', help='directory to publish', 
                        required=False, 
                        action="append",
                        default=[])
    parser.add_argument('-f', '--file', help='file to publish', 
                        required=False, 
                        action="append",
                        default=[])
    parser.add_argument('-t', '--type', help='content type', 
                        required=False,
                        choices=["text/html", "image/jpeg", "image/gif", "image/png"],
                        default="text/html")
    parser.add_argument('-v', '--verbose', help='print to stderr',
                        required=False,
                        default=VERBOSE)

    args = parser.parse_args(args)
    files = args.file
    directories = args.directory
    verbose = args.verbose
    count = 0
    for pathname in files:
        azure_publish_file(pathname, content_type=args.type)
        count += 1
    for directory in directories:
        for file in os.listdir(directory):
            azure_publish_file(file, content_type=args.type)
            count += 1
    end = datetime.datetime.now()
    delta = end - start
    if verbose:
        print >> sys.stderr, "ELAPSED azure_publish is %s" % elapsed(delta)
        print >> sys.stderr, "%d files uploaded" % (count)