def reducefn1(k, vs): term_plist = {} for term in vs: with open("tmp/plist/%s" % term) as f: term_plist[term] = f.read().split("\n") import sys sys.path.append("../dfs/") import client as dfs import json # Ваш псевдоним в виде строковой константы # USERNAME= with dfs.file_appender("/%s/posting_list/%s" % (USERNAME, k)) as buf: buf.write(json.JSONEncoder().encode(term_plist))
import mwclient import mwparserfromhell as mwparser import sys sys.path.append("../dfs/") import client as dfs site = mwclient.Site('en.wikipedia.org') category = site.Pages['Category:Big_data'] counter = 0 with dfs.file_appender("/wikipedia/__toc__") as toc: for page in category: page_filename = "/wikipedia/page%d" % counter with dfs.file_appender(page_filename) as f: f.write(mwparser.parse(page.text()).strip_code().encode('utf-8')) toc.write("%s %s" % (page_filename, page.name)) counter += 1