Пример #1
0
def reducefn1(k, vs):
    term_plist = {}
    for term in vs:
        with open("tmp/plist/%s" % term) as f:
            term_plist[term] = f.read().split("\n")

    import sys

    sys.path.append("../dfs/")

    import client as dfs
    import json

    # Ваш псевдоним в виде строковой константы
    # USERNAME=
    with dfs.file_appender("/%s/posting_list/%s" % (USERNAME, k)) as buf:
        buf.write(json.JSONEncoder().encode(term_plist))
Пример #2
0
import mwclient
import mwparserfromhell as mwparser
import sys
sys.path.append("../dfs/")

import client as dfs

site = mwclient.Site('en.wikipedia.org')
category = site.Pages['Category:Big_data']
counter = 0

with dfs.file_appender("/wikipedia/__toc__") as toc:
	for page in category:
		page_filename = "/wikipedia/page%d" % counter
		with dfs.file_appender(page_filename) as f:
			f.write(mwparser.parse(page.text()).strip_code().encode('utf-8'))
		toc.write("%s %s" % (page_filename, page.name))
		counter += 1