Пример #1
0
 def parse_year(x):
     dom=fromHTML(x.data) 
     if len(dom) == 0:
          return
     for uri in  [x.get("href") for x in dom.xpath("//table/tr/td/a")]:
          fetch.addUrl('http://logs.zub.cc'+uri)
     pass
    def parse(x):
	if "post" in x.save:
	    header=dict([xx.strip().split(': ') for xx in filter(lambda x: x.find(":") > 0,x.head.getvalue().split("\n"))])
	    header=dict(map(lambda h:[h.lower(),header[h]],header))
	    if "location" in header and header["location"].find("comment"):
		print "забеись",x.save["original"],x.save["comment"]
	    else:
		print "плохо",x.save["url"],header,x.data

	else:
	    if  len(x.data) == 0:
		return
	
	    dom=fromHTML(x.data) 
	    if len(dom) == 0:
		return
	
	    form=filter(lambda x: x.get("action") and x.get("action").find("wp-comments-post.php"),dom.xpath("//form[@method='post']"))[0].forms[0]
	    fields =  form.form_values()
	    commentName=False
    	    for field in fields:
		if field[0] in ('author','login','name'):
		    form.fields[field[0]]=genName()
		elif field[0] in ('email','e-mail','mail'):
		    form.fields[field[0]]=genEmail()
		#elif field[0] in ('site','url'):
	#		form.fields[field[0]]=genURL()
	        elif field[0] in ('comment','text','body'):
		    commentName=field[0]
	    submit= filter(lambda x: x.get('type') == "submit",form.inputs)[0]
	    for comment in comments:
		form.fields[commentName]=comment
	        fetch.addUrl({"url":getRealAction(x.url,form.action),"post":urlencode(form.form_values()+[(submit.name,submit.value)]),"original":x.url,"comment":comment})
Пример #3
0
 def parse_page(x):
     dom=fromHTML(x.data.decode('cp1251')) 
     if len(dom) == 0:
         return
     for msg in dom.xpath("/html/body/font")[0].text_content().split("\n"):
         if msg.find("<misskitten>") > 0:
              save("".join(msg.split("> ")[1:]))
     for uri in [el.get("href") for el in dom.xpath("/html/body/a")]:
          if uri.startswith("/viewlog/") and int(x.url.split("=")[-1])+1 == int(str(uri).split("=")[-1]):
               fetch.addUrl('http://logs.zub.cc'+uri)
     pass