예제 #1
0
파일: spider.py 프로젝트: hexuotzo/khufu
def indb(title,url,body):
	url = "http://baby.sina.com.cn%s" % url
	title = title.encode("utf8")
	kid = uid.getKid(url)
	text = html2text(body)
	pinyin = addpinyin(text)
	tag = c1.classify(body)
	print kid,title,url,tag
	os.popen(cmd % (kid,title,now.date(),tag))

	mc = memcache.Client(['114.113.30.29:11211'])
	dbvalue=cjson.encode({"title":title.decode("utf8"),"url":url,"html":body,"text":text,"datetime":str(now),"addpinyin":pinyin,"body":text,"kid":kid})
	mc.set(str(kid),dbvalue)
예제 #2
0
# encoding: utf-8
try:
    import cmemcache as memcache
except:
    import memcache
from addpinyin import *
import datetime
import cjson
import os
import uid

url = ''
tag = '专家咨询'
title = u''
text = u'''

'''
pinyin = addpinyin(text)
now = datetime.datetime.now()

kid = uid.getKid(url)
print kid,title

mc = memcache.Client(['114.113.30.29:11211'])
dbvalue=cjson.encode({"title":title,"url":url,"html":text,"text":text,"datetime":str(now),"addpinyin":pinyin,"body":text,"kid":kid})
mc.set(str(kid),dbvalue)

cmd = 'tctmgr put infodb/infodb %s "title" "%s" "savedate" "%s" "tag1" "%s"' % (kid,title.encode("utf8"),now.date(),tag)
print os.popen(cmd).read()