class SearchFlightPage(object): __url = "http://blazedemo.com/" def open(self): self._web.open(self.__url) def __init__(self, browser): self._web = Web(browser) def select_departure_city(self, city): time.sleep(5) self._web.get_web_element_by_xpath( "//select[@name='fromPort']/option[@value='{}']".format( city)).click() def select_destination_city(self, city): time.sleep(5) self._web.get_web_element_by_xpath( "//select[@name='toPort']/option[@value='{}']".format( city)).click() def search_for_flights(self): time.sleep(5) self._web.get_web_element_by_xpath("//input[@type='submit']").click() def get_found_flights(self): time.sleep(5) return self._web.get_web_elements_by_xpath( "//table[@class='table']/tbody/tr") def close(self): self._web.close_all()
def glasshammers_form_test(): rando = Random() name = "DJdjango" email = "*****@*****.**" phone = "1230984756" inquiries = ["Customer Support","Commission","Questions","Comments"] reason = rando.choice(inquiries) message = "callllvinnnn!!" web = Web() web.glass_hammers_form(name,email,phone,reason,message)
def __init__(self, url, debugging=True, location=None): self.DB = None if MongoClient: self.DB = MongoClient() self.debugging = debugging self.web = Web(debugging=self.debugging) # Web object for downloading/parsing self.base_dir = RIP_DIRECTORY if not os.path.exists(self.base_dir): os.mkdir(self.base_dir) self.original_url = url #self.debug('class: %s' % self.__class__.__name__) #self.debug('basesite.__init__init__ url:' + url) self.imgsrcpwd = "" self.username = "" if 'pwd=' in url: self.imgsrcpwd = url[url.find('pwd')+4:] #self.debug('imgsrcpwd:%s'%self.imgsrcpwd) self.title="" self.url = self.sanitize_url(url) # Directory to store images in galldir = self.get_gallery_dir(self.url) if galldir == '': galldir = self.get_dir(self.url) self.working_dir = '%s%s%s' % (self.base_dir, os.sep, galldir) self.max_threads = MAX_THREADS self.thread_count = 0 self.image_count = 0 self.max_images = MAX_IMAGES self.logfile = '%s%s%s' % (self.working_dir, os.sep, self.get_gallery_dir(self.url) + LOG_NAME) self.first_log = True
def __init__(self, url, urls_only=False): self.web = Web() # Web object for downloading/parsing self.base_dir = RIP_DIRECTORY if not os.path.exists(self.base_dir): os.mkdir(self.base_dir) self.url = self.sanitize_url(url) # Directory to store images in self.working_dir = '%s%s%s' % (self.base_dir, os.sep, self.get_dir(self.url)) self.max_threads = MAX_THREADS self.thread_count = 0 self.image_count = 0 self.max_images = MAX_IMAGES self.logfile = '%s%s%s' % (self.working_dir, os.sep, LOG_NAME) self.first_log = True self.urls_only = urls_only
from Web import Web url = 'http://www.qqride.com/category/scheduled/page/1/' cnrun = Web(url, 'site-main', 'entry-title', 'single-content') #print(cnrun.soup) cnrun.getlist() def p(L): return 'http://www.chinarun.com' + L #cnrun.map(p) def getGen(): return cnrun.process() if __name__ == "__main__": #this is for test only for i in getGen(): print(i)
from site_imagefap import imagefap from pymongo import MongoClient for gall in sys.argv[1:]: print gall #whynot 494927 #jimmy 1764181 asd = 'http://www.imagefap.com/ajax/newsdata.php?userid=1764181&status=&galleries=&comments=&ts=' mongo = MongoClient() rGall = re.compile('href="\/gallery\/(\d+)"><b>[^<]+<\/b>') web = Web() ts = datetime.now() ts2 = ts - timedelta(days=5) sss = calendar.timegm(ts2.utctimetuple()) print sss #exit() ar = set() feed = web.get('http://www.imagefap.com/newsfeed.php?user=JimmyPerv77') for a in re.findall(rGall, feed): a2 = 'http://www.imagefap.com/gallery/' + str(a) print a2 if mongo.feed.url.find_one({"url": a2}): print "1found %s" % a2 continue ar.add(a2)
class basesite(object): """ Constructs object using overriding methods. Throws Exception if: * URL is invalid (not appropriate for site class), * Working directory could not be created. """ def __init__(self, url, urls_only=False, debugging=False): self.debugging = debugging self.web = Web() # Web object for downloading/parsing self.base_dir = RIP_DIRECTORY if not os.path.exists(self.base_dir): os.mkdir(self.base_dir) self.url = self.sanitize_url(url) # Directory to store images in self.working_dir = '%s%s%s' % (self.base_dir, os.sep, self.get_dir(self.url)) self.max_threads = MAX_THREADS self.thread_count = 0 self.image_count = 0 self.max_images = MAX_IMAGES self.logfile = '%s%s%s' % (self.working_dir, os.sep, LOG_NAME) self.first_log = True self.urls_only = urls_only """ To be overridden """ def sanitize_url(self, url): raise Exception("Method 'sanitize_url' was not overridden!") """ Return directory name to store photos in """ def get_dir(self, url): raise Exception("Method 'get_dir' was not overridden!") """ Creates working dir if zip does not exist """ def init_dir(self): if not os.path.exists(self.working_dir) and \ self.existing_zip_path() == None: os.mkdir(self.working_dir) """ Returns true if we hit the image limit, false otherwise """ def hit_image_limit(self): return self.image_count >= self.max_images """ To be overridden """ def download(self): raise Exception("Method 'download' was not overridden!") """ Checks if album is already being downloaded """ def is_downloading(self): return os.path.exists(self.logfile) """ Appends line to log file """ def log(self, text, overwrite=False): if self.first_log: self.first_log = False self.log('http://rip.rarchives.com - file log for URL %s' % self.url, overwrite=True) sys.stderr.write('%s\n' % text) text = text.replace('"', '\\"') if overwrite: f = open(self.logfile, 'w') else: f = open(self.logfile, 'a') f.write("%s\n" % text) f.flush() f.close() """ Gets last line(s) from log """ def get_log(self, tail_lines=1): if not os.path.exists(self.logfile): return '' f = open(self.logfile, 'r') r = f.read().strip() f.close() while r.endswith('\n'): r = r[:-1] lines = r.split('\n') return lines[len(lines)-tail_lines:] """ Starts separate thread to download image from URL """ def download_image(self, url, index, total='?', subdir='', saveas=None): if saveas == None: saveas = url[url.rfind('/')+1:] # Strip extraneous / non FS safe characters if '?' in saveas: saveas = saveas[:saveas.find('?')] if ':' in saveas: saveas = saveas[:saveas.find(':')] # Add a file extension if necessary if not '.' in saveas: m = self.web.get_meta(url) ct = 'image/jpeg' # Default to jpg if 'Content-Type' in m: ct = m['Content-Type'] ext = ct[ct.rfind('/')+1:] if ext == 'jpeg': ext = 'jpg' saveas = '%s.%s' % (saveas, ext) # Setup subdirectory saves if subdir != '': subdir = '/%s' % subdir savedir = '%s%s' % (self.working_dir, subdir) if not os.path.exists(savedir): os.mkdir(savedir) saveas = '%s/%03d_%s' % (savedir, index, saveas) if os.path.exists(saveas): self.log('file exists: %s' % saveas) self.image_count += 1 else: while self.thread_count > self.max_threads: time.sleep(0.1) self.thread_count += 1 args = (url, saveas, index, total) t = Thread(target=self.download_image_thread, args=args) t.start() """ Multi-threaded download of image """ def download_image_thread(self, url, saveas, index, total): m = self.web.get_meta(url) if 'Content-Type' not in m: text = 'no Content-Type found at URL %s' % (url) elif ('image' not in m['Content-Type'] and \ 'video' not in m['Content-Type'] and \ 'octet-stream' not in m['Content-Type']): text = 'no "image"/"video"/"octet-stream" in Content-Type (found "%s") for URL %s' % (m['Content-Type'], url) else: if self.web.download(url, saveas): self.image_count += 1 text = 'downloaded (%d' % index if total != '?': text += '/%s' % total text += ') (%s) - %s' % (self.get_size(saveas), url) else: text = 'download failed (%d' % index if total != '?': text += '/%s' % total text += ') - %s' % url self.log(text) self.thread_count -= 1 def wait_for_threads(self): while self.thread_count > 0: time.sleep(0.1) if os.path.exists(self.working_dir): if not self.urls_only and len(os.listdir(self.working_dir)) <= 1 \ or self.urls_only and len(os.listdir(self.working_dir)) == 0: rmtree(self.working_dir) # Delete everything in working dir """ Returns human-readable filesize for file """ def get_size(self, filename): try: bytes = os.path.getsize(filename) except: return '?b' b = 1024 * 1024 * 1024 a = ['g','m','k',''] for i in a: if bytes >= b: return '%.2f%sb' % (float(bytes) / float(b), i) b /= 1024 return '0b' """ Returns path to zip file if it exists, otherwise None. """ def existing_zip_path(self): extension = 'zip' if self.urls_only: extension = 'txt' zipfile = '%s.%s' % (self.working_dir, extension) if os.path.exists(zipfile) and not os.path.exists(self.working_dir): return zipfile else: return None """ Zips site's working directory, Deletes zipped files after zip is created Returns path to zip file """ def zip(self): if self.urls_only: if not os.path.exists('%s/log.txt' % self.working_dir): raise Exception('no log found') url_filename = '%s.txt' % self.working_dir f = open('%s/log.txt' % self.working_dir, 'r') lines = f.read().split('\n')[1:] tuples = [] for line in lines: if line.strip() == '' or ' - ' not in line: continue if line.count('|') < 1: continue line = line[line.find(' - ')+3:] splits = line.split('|') index = splits[0] url = '|'.join(splits[1:]) tuples.append( (index, url) ) tuples = sorted(tuples, key=lambda tup: int(tup[0])) f = open(url_filename, 'w') for (index, url) in tuples: f.write('%s\n' % url) f.close() rmtree(self.working_dir) # Delete everything in working dir return url_filename self.log('zipping album...') zip_filename = '%s.zip' % self.working_dir z = ZipFile(zip_filename, "w", ZIP_DEFLATED) for root, dirs, files in os.walk(self.working_dir): # NOTE: ignore empty directories for fn in files: #if 'log.txt' in fn: continue absfn = os.path.join(root, fn) zfn = absfn[len(self.working_dir)+len(os.sep):] #XXX: relative path z.write(absfn, zfn) z.close() rmtree(self.working_dir) # Delete everything in working dir return zip_filename def add_url(self, index, url, total=0): self.image_count += 1 string = '(%d' % index if total > 0: string += '/%d' % total string += ')' self.log('%s - %d|%s' % (string, index, url)) def debug(self, text): if not self.debugging: return sys.stderr.write('%s\n' % text)
class EnquiryFormHelper(): #__url = "http://blazedemo.com/" __url = "https://<url>/front/default/newhome" def Open(self): self._web.open(self.__url) def __init__(self, browser): self._web = Web(browser) def click_customize_button(self): self._web.get_web_element_by_xpath( "(//*[contains(text(),'Customize Your Trip')])[1]").click() time.sleep(3) def enter_name(self, name): self._web.get_web_element_by_xpath("//*[@name='name']").send_keys(name) def enter_phone(self, phone): self._web.get_web_element_by_xpath("//*[@name='phone']").send_keys( phone) def enter_email(self, email): self._web.get_web_element_by_xpath("//*[@name='email']").send_keys( email) time.sleep def select_no_of_people(self, noOfPeople): try: self._web.get_web_element_by_xpath( "//input[@value='No. of People']").click() time.sleep(3) self._web.get_web_element_by_xpath( "//div[contains(@class,'NoOfAdults')]//li/span[contains(text(),'{}')]" .format(noOfPeople)).click() time.sleep(2) except Exception as e: print(e) def select_trip_begin(self, trip_begin): try: self._web.get_web_element_by_xpath( "//input[@value='I will book']").click() time.sleep(3) self._web.get_web_element_by_xpath( "//div[contains(@class,'duration')]//li/span[contains(text(),'{}')]" .format(trip_begin)).click() time.sleep(2) except Exception as e: print(e) def select_trip_location(self, location): self._web.get_web_element_by_xpath( "//input[@value='Trip Location*']").click() time.sleep(2) self._web.get_web_element_by_xpath( "//div[contains(@class,'location1')]//li/span[contains(text(),'{}')]" .format(location)).click() time.sleep(2) def select_random_date(self): self._web.selectDate() def Close(self): self._web.close_all()
class basesite(object): """ Constructs object using overriding methods. Throws Exception if: * URL is invalid (not appropriate for site class), * Working directory could not be created. """ def __init__(self, url, debugging=True, location=None): self.DB = None if MongoClient: self.DB = MongoClient() self.debugging = debugging self.web = Web(debugging=self.debugging) # Web object for downloading/parsing self.base_dir = RIP_DIRECTORY if not os.path.exists(self.base_dir): os.mkdir(self.base_dir) self.original_url = url #self.debug('class: %s' % self.__class__.__name__) #self.debug('basesite.__init__init__ url:' + url) self.imgsrcpwd = "" self.username = "" if 'pwd=' in url: self.imgsrcpwd = url[url.find('pwd')+4:] #self.debug('imgsrcpwd:%s'%self.imgsrcpwd) self.title="" self.url = self.sanitize_url(url) # Directory to store images in galldir = self.get_gallery_dir(self.url) if galldir == '': galldir = self.get_dir(self.url) self.working_dir = '%s%s%s' % (self.base_dir, os.sep, galldir) self.max_threads = MAX_THREADS self.thread_count = 0 self.image_count = 0 self.max_images = MAX_IMAGES self.logfile = '%s%s%s' % (self.working_dir, os.sep, self.get_gallery_dir(self.url) + LOG_NAME) self.first_log = True def get_gallery_dir(self, url, r='', gallno2=''): return '' """ To be overridden """ def sanitize_url(self, url): raise Exception("Method 'sanitize_url' was not overridden!") """ Return directory name to store photos in """ def get_dir(self, url): raise Exception("Method 'get_dir' was not overridden!") """ Creates working dir if zip does not exist """ def init_dir(self): if not os.path.exists(self.working_dir) and \ self.existing_zip_path() == None: os.mkdir(self.working_dir) """ Returns true if we hit the image limit, false otherwise """ def hit_image_limit(self): if self.image_count >= self.max_images: self.log('hit image limit: %d >= %d' % (self.image_count, self.max_images)) return True return False """ To be overridden """ def download(self): raise Exception("Method 'download' was not overridden!") """ Checks if album is already being downloaded """ def is_downloading(self): return os.path.exists(self.logfile) """ Appends line to log file """ def log(self, text, overwrite=False): if self.first_log: self.first_log = False self.log('URL %s @ %s' % (self.original_url, strftime('%Y-%m-%dT%H:%M:%S PDT')), overwrite=False) if self.debugging: sys.stderr.write('%s\n' % text) text = text.replace('"', '\\"') if overwrite: f = open(self.logfile, 'w') else: f = open(self.logfile, 'a') f.write("%s\n" % text) f.flush() f.close() """ Gets last line(s) from log """ def get_log(self, tail_lines=1): if not os.path.exists(self.logfile): return '' f = open(self.logfile, 'r') r = f.read().strip() f.close() while r.endswith('\n'): r = r[:-1] lines = r.split('\n') return lines[len(lines)-tail_lines:] """ Starts separate thread to download image from URL """ def download_image(self, url, index, total='?', subdir='', saveas=None, gallname=''): # nur in imgSRc !!!!!! url = url.replace('http://b', 'http://o') gallname = gallname.replace('/', '_').replace('.', '_') unique_saveas = True if saveas == None: unique_saveas = False saveas = url[url.rfind('/')+1:] if gallname != '': saveas = gallname + '_' + saveas.replace('\/', '_') #self.debug('Pic:' + saveas) # Strip extraneous / non FS safe characters saveas = saveas.replace('?:\\', '') #if '?' in saveas: saveas = saveas[:saveas.find('?')] #if ':' in saveas: saveas = saveas[:saveas.find(':')] # Add a file extension if necessary if saveas[len(saveas)-4] != '.': m = self.web.get_meta(url) ct = 'image/jpeg' # Default to jpg if 'Content-Type' in m: ct = m['Content-Type'] ext = ct[ct.rfind('/')+1:] if ext == 'jpeg': ext = 'jpg' saveas = '%s.%s' % (saveas, ext) # Setup subdirectory saves savedir = '' if subdir != '': subdir = '/%s' % subdir savedir = '%s%s' % (self.base_dir, subdir) else: savedir = '%s%s' % (self.working_dir, subdir) if not os.path.exists(savedir): os.mkdir(savedir) if unique_saveas: saveas = '%s/%s' % (savedir, saveas) else: saveas = '%s/%03d_%s' % (savedir, index, saveas) if os.path.exists(saveas): self.debug('file exists: %s' % saveas) self.image_count += 1 else: while self.thread_count > self.max_threads: time.sleep(0.1) self.thread_count += 1 args = (url, saveas, index, total) t = Thread(target=self.download_image_thread, args=args) t.start() """ Multi-threaded download of image """ def download_image_thread(self, url, saveas, index, total): m = self.web.get_meta(url) if 'Content-Type' not in m: text = 'no Content-Type found at URL %s' % (url) if url.startswith('http://o'): url = url.replace('http://o', 'http://b') #self.debug("Switching back from O to B:%s m:%s\n" % (url,m)) args = (url, saveas, index, total) return self.download_image_thread(url, saveas, index, total) elif ('image' not in m['Content-Type'] and \ 'video' not in m['Content-Type'] and \ 'octet-stream' not in m['Content-Type']): text = 'no image/video/octet-stream in Content-Type (found "%s") for URL %s' % (m['Content-Type'], url) else: indextotal = self.get_index_total(index, total) hmm = None text = "" if self.DB: hmm = self.DB.imgsrc.imgurls.find_one({"url": str(url)}) if hmm != None: self.debug('(thread) DB: url found: %s'%str(hmm)) return 0 if hmm == None: if self.web.download(url, saveas): self.image_count += 1 # Create thumbnail thumbnail = self.create_thumb(saveas) if self.DB: imgurls = self.DB.imgsrc.imgurls u96 = imgurls.find_one({"url": str(url)}) if u96 != None: text = 'DB u96 %s/%s: (%s)' % (indextotal, self.get_size(saveas), saveas) else: imgurls.insert({"url": str(url)}) text = 'vv %s/%s: (%s)' % (indextotal, self.get_size(saveas), saveas) else: text = 'download failed %s - %s' % (indextotal, url) if text != "": self.log(text) self.thread_count -= 1 return self.get_size(saveas) """ Same-thread downlod/save (does not launch new thread) """ def save_image(self, url, saveas, index, total='?'): indextotal = self.get_index_total(index, total) if os.path.exists(saveas): self.image_count += 1 self.log('file exists: %s' % saveas) elif self.web.download(url, saveas): self.image_count += 1 thumbnail = self.create_thumb(saveas) self.log('Download %s (%s) - source: (%s)' % (indextotal, self.get_size(saveas), url)) else: self.log('Download %s ERROR << %s' % (indextotal, url)) """ Wait for threads to finish downloading. Delete working dir if no images are downloaded """ def wait_for_threads(self): i = 0 while (self.thread_count > 0) and (i < 1000): time.sleep(0.1) i = i + 1 if os.path.exists(self.working_dir): if len(os.listdir(self.working_dir)) <= 1: rmtree(self.working_dir) # Delete everything in working dir """ Returns human-readable filesize for file """ def get_size(self, filename): try: bytes = os.path.getsize(filename) except: return '?b' b = 1024 * 1024 * 1024 a = ['g','m','k',''] for i in a: if bytes >= b: return '%.2f%sb' % (float(bytes) / float(b), i) b /= 1024 return '0b' """ Returns path to zip file if it exists, otherwise None. Does not return path if zipping is in progress. """ def existing_zip_path(self): zipfile = '%s.zip' % (self.working_dir) if os.path.exists(zipfile): if not os.path.exists(self.working_dir): # No direcotry; only zip exists return zipfile else: if not os.path.exists('%s%szipping.txt' % (self.working_dir, os.sep)): # 'zipping' file/flag does not exist return zipfile return None """ Zips site's working directory, Deletes zipped files after zip is created Returns path to zip file """ def zip(self): x = self.working_dir x = x + "-" + self.get_dir(self.url) self.log('zipping album... %s ' % x) zip_filename = '%s.zip' % x z = ZipFile(zip_filename, "w", ZIP_DEFLATED) for root, dirs, files in os.walk(self.working_dir): if root.endswith('/thumbs'): continue # Do not zip thumbnails for fn in files: # Ignore files used by service: if fn.endswith('zipping.txt'): continue # Album is currently zipping if fn.endswith('complete.txt'): continue # Album download completed if fn.endswith('ip.txt'): continue # IP address of ripper if fn.endswith('reports.txt'): continue # Number of reports, report messages absfn = os.path.join(root, fn) zfn = absfn[len(self.working_dir)+len(os.sep):] #XXX: relative path z.write(absfn, zfn) z.close() return zip_filename """ Creates thumbnail based on file path. Creates /thumbs/ sub dir & stores thumbnail. Returns thumbnail path on success, empty string on failure. """ def create_thumb(self, inp): return if inp.lower().endswith('.mp4'): return self.create_video_thumb(inp) if Image == None: sys.stderr.write('Python Image Library (PIL) not installed; unable to create thumbnail for %s\n' % inp) sys.stderr.write('Go to http://www.pythonware.com/products/pil/ to install PIL\n') sys.stderr.flush() return 'rips/nothumb.png' fields = inp.split(os.sep) fields.insert(-1, 'thumbs') saveas = os.sep.join(fields) if os.path.exists(saveas): return '' thumbpath = os.sep.join(fields[:-1]) if not os.path.exists(thumbpath): try: os.mkdir(thumbpath) except: pass try: im = Image.open(inp) (width, height) = im.size if width > MAX_THUMB_DIM or height > MAX_THUMB_DIM: # Image too large to create thumbnail self.log('unable to create thumbnail, %dx%d > %d' % (width, height, MAX_THUMB_DIM)) return 'rips/nothumb.png' if os.path.getsize(inp) > MAX_THUMB_SIZE: self.log('unable to create thumbnail, %db > %db' % (os.path.getsize(inp), MAX_THUMB_SIZE)) return 'rips/nothumb.png' if im.mode != 'RGB': im = im.convert('RGB') im.thumbnail( (200,200), Image.ANTIALIAS) im.save(saveas, 'JPEG') return saveas except Exception, e: self.log('failed to create thumb: %s' % str(e)) pass return 'rips/nothumb.png'
from Web import Web #this file will thow some errors, remember to catch it url='http://ps.zestbike.com/match/preview' cnrun=Web(url,'pagecontent','commonlist yahei','commonarticle yahei') #print(cnrun.soup) cnrun.getlist() def p(L): return 'http://ps.zestbike.com/'+L print(cnrun.list) cnrun.map(p) def getGen(): return cnrun.process() if __name__=="__main__": #this is for test only number=0 for i in getGen(): print(i) number+=1 print(number)
from Web import Web cnrun = Web( 'http://www.chinarun.com/html/event_k_%20%E8%87%AA%E8%A1%8C%E8%BD%A6%E8%B5%9B_0_.html#cnt', 'ulHdList', 'n', 'divCnt divHA') cnrun.getlist() def p(L): return 'http://www.chinarun.com' + L cnrun.map(p) def getGen(): return cnrun.process() if __name__ == "__main__": #this is for test only for i in getGen(): print(i[0])
import ujson import WifiConnect from LedController import LedController from Scheduler import Scheduler from Web import Web hostname = open("config.hostname.txt").read() print("hostname = {}".format(hostname)) config_file = "config.{0}.json".format(hostname) config = ujson.loads(open(config_file).read()) print("config = {}".format(config)) ssid = config.get("ssid") password = open("wifi-password.txt").read() WifiConnect.connect(ssid, password, hostname) led_data_pin = int(config.get("led_data_pin")) led_count = int(config.get("led_count")) led_bpp = int(config.get("led_bpp")) ledController = LedController(led_data_pin, led_count, led_bpp) ledController.all_off() ledController.pulse_status_led(0, 12, 32, 0) scheduler = Scheduler(ledController) web = Web(ledController, scheduler) scheduler.run()
def main(): web = Web() web.run_ftp_server()
from Web import Web import re url = 'http://zuicool.com/news/archives/category/user-submitted/%E8%87%AA%E8%A1%8C%E8%BD%A6' cnrun = Web(url, 'container', 'zuicool-index-post clearfix', 'entry-content') cnrun.getlist() def p(L): return 'http://zuicool.com' + L cnrun.map(p) def getGen(): return w(cnrun.process()) def w(process): for i in process: title = re.findall('报名 | (.*) - 最酷ZUICOOL - 马拉松赛事第一站_最COOL', i[0]) if len(title) is 1: title = title[0] else: title = title[1] yield (title, i[1])
#!/usr/bin/python # coding=utf-8 from Utils import Utils __author__ = "Aleksandr Shyshatsky" from Battle import Battle from Flash import Flash from Callbacks import Callbacks from Events import Events from Web import Web callbacks = Callbacks() flash = Flash() battle = Battle() events = Events() web = Web() utils = Utils __all__ = ['flash', 'battle', 'callbacks', 'events', 'web', 'utils']
def __init__(self, browser): self._web = Web(browser)
from Web import Web url = 'http://www.wildto.com/event/' cnrun = Web(url, 'cmptList clearfix', 'pic', 'leftBox') #print(cnrun.soup) cnrun.getlist() def p(L): return 'http://www.wildto.com' + L cnrun.map(p) def getGen(): return cnrun.process() if __name__ == "__main__": #this is for test only for i in getGen(): print(i)
class basesite(object): """ Constructs object using overriding methods. Throws Exception if: * URL is invalid (not appropriate for site class), * Working directory could not be created. """ def __init__(self, url, urls_only=False, debugging=False): self.debugging = debugging self.web = Web(debugging=self.debugging) # Web object for downloading/parsing self.base_dir = RIP_DIRECTORY if not os.path.exists(self.base_dir): os.mkdir(self.base_dir) self.url = self.sanitize_url(url) # Directory to store images in self.working_dir = '%s%s%s' % (self.base_dir, os.sep, self.get_dir(self.url)) self.max_threads = MAX_THREADS self.thread_count = 0 self.image_count = 0 self.max_images = MAX_IMAGES self.logfile = '%s%s%s' % (self.working_dir, os.sep, LOG_NAME) self.first_log = True self.urls_only = urls_only """ To be overridden """ def sanitize_url(self, url): raise Exception("Method 'sanitize_url' was not overridden!") """ Return directory name to store photos in """ def get_dir(self, url): raise Exception("Method 'get_dir' was not overridden!") """ Creates working dir if zip does not exist """ def init_dir(self): if not os.path.exists(self.working_dir) and \ self.existing_zip_path() == None: os.mkdir(self.working_dir) """ Returns true if we hit the image limit, false otherwise """ def hit_image_limit(self): return self.image_count >= self.max_images """ To be overridden """ def download(self): raise Exception("Method 'download' was not overridden!") """ Checks if album is already being downloaded """ def is_downloading(self): return os.path.exists(self.logfile) """ Appends line to log file """ def log(self, text, overwrite=False): if self.first_log: self.first_log = False self.log('http://rip.rarchives.com - file log for URL %s' % self.url, overwrite=True) if self.debugging: sys.stderr.write('%s\n' % text) text = text.replace('"', '\\"') if overwrite: f = open(self.logfile, 'w') else: f = open(self.logfile, 'a') f.write("%s\n" % text) f.flush() f.close() """ Gets last line(s) from log """ def get_log(self, tail_lines=1): if not os.path.exists(self.logfile): return '' f = open(self.logfile, 'r') r = f.read().strip() f.close() while r.endswith('\n'): r = r[:-1] lines = r.split('\n') return lines[len(lines)-tail_lines:] """ Starts separate thread to download image from URL """ def download_image(self, url, index, total='?', subdir='', saveas=None): unique_saveas = True if saveas == None: unique_saveas = False saveas = url[url.rfind('/')+1:] # Strip extraneous / non FS safe characters if '?' in saveas: saveas = saveas[:saveas.find('?')] if ':' in saveas: saveas = saveas[:saveas.find(':')] # Add a file extension if necessary if not '.' in saveas: m = self.web.get_meta(url) ct = 'image/jpeg' # Default to jpg if 'Content-Type' in m: ct = m['Content-Type'] ext = ct[ct.rfind('/')+1:] if ext == 'jpeg': ext = 'jpg' saveas = '%s.%s' % (saveas, ext) # Setup subdirectory saves if subdir != '': subdir = '/%s' % subdir savedir = '%s%s' % (self.working_dir, subdir) if not os.path.exists(savedir): os.mkdir(savedir) if unique_saveas: saveas = '%s/%s' % (savedir, saveas) else: saveas = '%s/%03d_%s' % (savedir, index, saveas) if os.path.exists(saveas): self.log('file exists: %s' % saveas) self.image_count += 1 else: while self.thread_count > self.max_threads: time.sleep(0.1) self.thread_count += 1 args = (url, saveas, index, total) t = Thread(target=self.download_image_thread, args=args) t.start() """ Multi-threaded download of image """ def download_image_thread(self, url, saveas, index, total): m = self.web.get_meta(url) if 'Content-Type' not in m: text = 'no Content-Type found at URL %s' % (url) elif ('image' not in m['Content-Type'] and \ 'video' not in m['Content-Type'] and \ 'octet-stream' not in m['Content-Type']): text = 'no "image"/"video"/"octet-stream" in Content-Type (found "%s") for URL %s' % (m['Content-Type'], url) else: if self.web.download(url, saveas): self.image_count += 1 text = 'downloaded (%d' % index if total != '?': text += '/%s' % total text += ') (%s) - %s' % (self.get_size(saveas), url) # Create thumbnail self.create_thumb(saveas) else: text = 'download failed (%d' % index if total != '?': text += '/%s' % total text += ') - %s' % url self.log(text) self.thread_count -= 1 def wait_for_threads(self): while self.thread_count > 0: time.sleep(0.1) if os.path.exists(self.working_dir): if not self.urls_only and len(os.listdir(self.working_dir)) <= 1 \ or self.urls_only and len(os.listdir(self.working_dir)) == 0: rmtree(self.working_dir) # Delete everything in working dir """ Returns human-readable filesize for file """ def get_size(self, filename): try: bytes = os.path.getsize(filename) except: return '?b' b = 1024 * 1024 * 1024 a = ['g','m','k',''] for i in a: if bytes >= b: return '%.2f%sb' % (float(bytes) / float(b), i) b /= 1024 return '0b' """ Returns path to zip file if it exists, otherwise None. """ def existing_zip_path(self): if self.urls_only: txtfile = '%s.txt' % self.working_dir f = txtfile.split('/') f.insert(-1, 'txt') txtfile = '/'.join(f) if os.path.exists(txtfile): return txtfile return None zipfile = '%s.zip' % (self.working_dir) if os.path.exists(zipfile): if not os.path.exists(self.working_dir): # No direcotry; only zip exists return zipfile else: if not os.path.exists('%s%szipping.txt' % (self.working_dir, os.sep)): # 'zipping' file/flag does not exist return zipfile return None """ Zips site's working directory, Deletes zipped files after zip is created Returns path to zip file """ def zip(self): if self.urls_only: # Just URLs, need to store in order & store to a .txt file if not os.path.exists('%s/log.txt' % self.working_dir): raise Exception('no log found') if not os.path.exists('txt/'): try: os.mkdir('txt') except: pass f = self.working_dir.split('/') f.insert(-1, 'txt') url_filename = '%s.txt' % '/'.join(f) f = open('%s/log.txt' % self.working_dir, 'r') lines = f.read().split('\n')[1:] tuples = [] for line in lines: if line.strip() == '' or ' - ' not in line: continue if line.count('|') < 1: continue line = line[line.find(' - ')+3:] splits = line.split('|') index = splits[0] url = '|'.join(splits[1:]) tuples.append( (index, url) ) tuples = sorted(tuples, key=lambda tup: int(tup[0])) f = open(url_filename, 'w') for (index, url) in tuples: f.write('%s\n' % url) f.close() rmtree(self.working_dir) # Delete everything in working dir return url_filename self.log('zipping album...') zip_filename = '%s.zip' % self.working_dir z = ZipFile(zip_filename, "w", ZIP_DEFLATED) for root, dirs, files in os.walk(self.working_dir): # NOTE: ignore empty directories & thumbnails if root.endswith('/thumbs'): continue for fn in files: #if 'log.txt' in fn: continue if fn.endswith('zipping.txt'): continue if fn.endswith('complete.txt'): continue if fn.endswith('ip.txt'): continue if fn.endswith('reports.txt'): continue absfn = os.path.join(root, fn) zfn = absfn[len(self.working_dir)+len(os.sep):] #XXX: relative path z.write(absfn, zfn) z.close() #rmtree(self.working_dir) # Delete everything in working dir return zip_filename """ Creates thumbnail based on file path Creates /thumbs/ sub dir & stores thumbnail """ def create_thumb(self, inp): if inp.lower().endswith('.mp4'): self.create_video_thumb(inp) return if Image == None: sys.stderr.write('Python Image Library (PIL) not installed; unable to create thumbnail for %s\n' % inp) sys.stderr.write('Go to http://www.pythonware.com/products/pil/ to install PIL\n') sys.stderr.flush() return fields = inp.split(os.sep) fields.insert(-1, 'thumbs') saveas = os.sep.join(fields) if os.path.exists(saveas): return thumbpath = os.sep.join(fields[:-1]) if not os.path.exists(thumbpath): try: os.mkdir(thumbpath) except: pass try: im = Image.open(inp) if im.mode != 'RGB': im = im.convert('RGB') im.thumbnail( (200,200), Image.ANTIALIAS) im.save(saveas, 'JPEG') del im except: pass def create_video_thumb(self, inp): fields = inp.split(os.sep) fields.insert(-1, 'thumbs') saveas = os.sep.join(fields) saveas = saveas[:saveas.rfind('.')] + '.png' thumbpath = os.sep.join(fields[:-1]) if not os.path.exists(thumbpath): try: os.mkdir(thumbpath) except: pass overlay = 'play_overlay.png' ffmpeg = '/usr/bin/ffmpeg' if not os.path.exists(ffmpeg): ffmpeg = '/opt/local/bin/ffmpeg' if not os.path.exists(ffmpeg): return # Can't get images if we can't find ffmpeg cmd = ffmpeg cmd += ' -i "' cmd += inp cmd += '" -vf \'movie=' cmd += overlay cmd += ' [watermark]; ' cmd += '[in]scale=200:200 [scale]; ' cmd += '[scale][watermark] overlay=(main_w-overlay_w)/2:(main_h-overlay_h)/2 [out]\' ' cmd += saveas try: (s, o) = getstatusoutput(cmd) except: pass """ Add url to list of URLs found. For "URLs Only" feature """ def add_url(self, index, url, total=0): self.image_count += 1 string = '(%d' % index if total > 0: string += '/%d' % total string += ')' self.log('%s - %d|%s' % (string, index, url)) def debug(self, text): if not self.debugging: return sys.stderr.write('%s\n' % text)