Пример #1
0
accesskey = ""
secretkey = ""
conn = S3Connection(accesskey, secretkey)

bucketname = "dataiap.mit.edu.ap"
try:
    bucket = conn.create_bucket(bucketname)
except:
    print "could not create bucket ", bucketname
    bucket = conn.get_bucket(bucketname)


def upload(category, fname, root):
    if fname.startswith("urn"):
        key = Key(bucket)
        key.key = "%s_%s" % (category, fname)
        key.set_contents_from_filename("%s/%s" % (root, fname))


root = os.path.abspath(sys.argv[1])
walk_news(root, upload)

# retrieve ALL articles in World
bucket.get_all_keys(prefix="World")
conn.close()


# return self._get_all([('CommonPrefixes', Prefix)],
#                       '', None, {})
        that we have no information about this documents' categories
    id -- a unique ID for the document (any kind of JSON-able value should
        work). If not specified, we'll auto-generate one.
    """
    text = unicode(text, errors='ignore')
    cats = dict((unicode(cat), bool(is_in_cat))
                for cat, is_in_cat
                in (cats or {}).iteritems())

    return JSONValueProtocol.write(
        None, {'document': text, 'cats': cats, 'docid': id, 'type' : 'document'}) + '\n'



root = os.path.abspath(sys.argv[1])
outroot = os.path.abspath(sys.argv[2])

def encode(category, fname, root):
    global outroot
    try:
        os.mkdir(os.path.join(outroot, category))
    except:
        pass

    with file(os.path.join(root, fname), 'r') as f:
        with file(os.path.join(outroot, category, fname), 'w') as outf:
            outf.write(encode_document(f.read(), {category:1}, fname))


walk_news(root, encode)
Пример #3
0
    """
    text = unicode(text, errors='ignore')
    cats = dict((unicode(cat), bool(is_in_cat))
                for cat, is_in_cat in (cats or {}).iteritems())

    return JSONValueProtocol.write(None, {
        'document': text,
        'cats': cats,
        'docid': id,
        'type': 'document'
    }) + '\n'


root = os.path.abspath(sys.argv[1])
outroot = os.path.abspath(sys.argv[2])


def encode(category, fname, root):
    global outroot
    try:
        os.mkdir(os.path.join(outroot, category))
    except:
        pass

    with file(os.path.join(root, fname), 'r') as f:
        with file(os.path.join(outroot, category, fname), 'w') as outf:
            outf.write(encode_document(f.read(), {category: 1}, fname))


walk_news(root, encode)
Пример #4
0
from boto.s3.connection import S3Connection
from boto.s3.key import Key
accesskey = ''
secretkey = ''
conn = S3Connection(accesskey, secretkey)

bucketname = 'dataiap.mit.edu.ap'
try:
    bucket = conn.create_bucket(bucketname)
except:
    print "could not create bucket ", bucketname
    bucket = conn.get_bucket(bucketname)


def upload(category, fname, root):
    if fname.startswith('urn'):
        key = Key(bucket)
        key.key = '%s_%s' % (category, fname)
        key.set_contents_from_filename('%s/%s' % (root, fname))


root = os.path.abspath(sys.argv[1])
walk_news(root, upload)

# retrieve ALL articles in World
bucket.get_all_keys(prefix='World')
conn.close()

# return self._get_all([('CommonPrefixes', Prefix)],
#                       '', None, {})