def main(): parser = argparse.ArgumentParser( description="Take screenshots of web pages") parser.add_argument("-w", "--width", type=int, default=800, help="browser width (default=800)") parser.add_argument("-H", "--height", type=int, default=600, help="browser height (default=600)") parser.add_argument("-wp", "--webpage", type=str, help="webpage address") parser.add_argument( "-f", "--filename", type=str, default="screen.png", help="filename of saved screenshot (default=screen.png)" "\nexpected format: .jpeg or .png") args = parser.parse_args() if not args.webpage and args.filename: parser.print_help() return 2 else: pc = PageCapture(args.webpage, args.width, args.height) pc.take_screen_shot(args.filename) title = pc.get_page_title() print "Screen shot of %s taken and saved to %s." % (title, args.filename) return 0
def populate_pages(url_list, category, halved_screen_shot=False): """ :param url_list: a list of the urls for the pages that are going to be populated :param category: the category in which the pages fall into :return: """ #For each url in the url_list f = open('page_meta_data.txt','a') for url in url_list: p = Page.objects.filter(url=url) pf = None if p: pf =p[0] if not pf: # create PageCapture object - specify the browser to be 800 x 600. try: pc = PageCapture(url,800, 600) url_file_name = convert_url_to_filename(url)+'.png' # To change to accomodate for the new changes image_file_name = os.path.join(DATA_DIR, url_file_name) pc.load_url(url) # fetch the screen-shot if halved_screen_shot: pc.crop_screen_shot(image_file_name,0,0,1000,1000) #pc.halve_screen_shot(image_file_name) else: pc.take_screen_shot(image_file_name) # get the title title = pc.get_page_title() # create page in models/db with category # Abdullah , using DATA_DIR did not work for me because it uses the current working directory in the url. #save to file instead of db here to decouple. f.write('%s,%s,%s,%s\n' % (category.name, url, title,image_file_name,)) print("written {0} to file.".format(title)) p = Page(category=category, title=title, is_shown=True, url=url, screenshot=os.path.join('/', MEDIA_ROOT, url_file_name)) p.save() print 'Page title= ' + p.title + ' has been saved!' except ValueError: print 'Page has ((NOT)) been saved!' print 'ERROR IS {0}'.format("ValueError") continue else: print "Already added: {0}".format(pf.title, pf.url) f.close()
def populate_pages(url_list, category, halved_screen_shot=False): """ :param url_list: a list of the urls for the pages that are going to be populated :param category: the category in which the pages fall into :return: """ #For each url in the url_list f = open('page_meta_data.txt','a') for url in url_list: # create PageCapture object - specify the browser to be 800 x 600. try: pc = PageCapture(url,800, 600) url_file_name = convert_url_to_filename(url)+'.png' # To change to accomodate for the new changes image_file_name = os.path.join(DATA_DIR, url_file_name) pc.load_url(url) # fetch the screen-shot if halved_screen_shot: pc.crop_screen_shot(image_file_name,0,0,1000,1000) #pc.halve_screen_shot(image_file_name) else: pc.take_screen_shot(image_file_name) # get the title title = pc.get_page_title() # create page in models/db with category # Abdullah , using DATA_DIR did not work for me because it uses the current working directory in the url. #save to file instead of db here to decouple. f.write('%s,%s,%s,%s\n' % (category.name, url, title,image_file_name,)) print("written {0} to file.".format(title)) #p = Page(category=category, title=title, is_shown=True, url=url, screenshot=os.path.join('/', MEDIA_ROOT, url_file_name)) #p.save() #print 'Page title= ' + p.title + ' has been saved!' except ValueError: print 'Page has ((NOT)) been saved!' print 'ERROR IS {0}'.format("ValueError") continue f.close()
def populate_pages(url_list, category, halved_screen_shot=False): """ :param url_list: a list of the urls for the pages that are going to be populated :param category: the category in which the pages fall into :return: """ #For each url in the url_list for url in url_list: # create PageCapture object - specify the browser to be 800 x 600. try: pc = PageCapture(url, 800, 600) url_file_name = convert_url_to_filename(url) + '.png' # To change to accomodate for the new changes image_file_name = os.path.join(DATA_DIR, url_file_name) pc.load_url(url) # fetch the screen-shot if halved_screen_shot: if random.random() > 0.5: pc.crop_screen_shot(image_file_name, 200, 400, 700, 900) else: pc.crop_screen_shot(image_file_name, 0, 0, 1000, 1000) #pc.halve_screen_shot(image_file_name) else: pc.take_screen_shot(image_file_name) # get the title title = pc.get_page_title() # create page in models/db with category # Abdullah , using DATA_DIR did not work for me because it uses the current working directory in the url. p = Page(category=category, title=title, is_shown=True, url=url, screenshot=os.path.join('/', MEDIA_ROOT, url_file_name)) p.save() print 'Page title= ' + p.title + ' has been saved!' except ValueError: print 'Page has ((NOT)) been saved!' print 'ERROR IS' print ValueError continue
def populate_pages(url_list, category, halved_screen_shot=False): """ :param url_list: a list of the urls for the pages that are going to be populated :param category: the category in which the pages fall into :return: """ #For each url in the url_list for url in url_list: # create PageCapture object - specify the browser to be 800 x 600. try: pc = PageCapture(url,800, 600) url_file_name = convert_url_to_filename(url)+'.png' # To change to accomodate for the new changes image_file_name = os.path.join(DATA_DIR, url_file_name) pc.load_url(url) # fetch the screen-shot if halved_screen_shot: if random.random() > 0.5: pc.crop_screen_shot(image_file_name,200,400,700,900) else: pc.crop_screen_shot(image_file_name,0,0,1000,1000) #pc.halve_screen_shot(image_file_name) else: pc.take_screen_shot(image_file_name) # get the title title = pc.get_page_title() # create page in models/db with category # Abdullah , using DATA_DIR did not work for me because it uses the current working directory in the url. p = Page(category=category, title=title, is_shown=True, url=url, screenshot=os.path.join('/', MEDIA_ROOT, url_file_name)) p.save() print 'Page title= ' + p.title + ' has been saved!' except ValueError: print 'Page has ((NOT)) been saved!' print 'ERROR IS' print ValueError continue
def main(): parser = argparse.ArgumentParser(description="Take screenshots of web pages") parser.add_argument("-w", "--width", type=int, default=800, help="browser width (default=800)") parser.add_argument("-H", "--height", type=int, default=600, help="browser height (default=600)") parser.add_argument("-wp", "--webpage", type=str, help="webpage address") parser.add_argument( "-f", "--filename", type=str, default="screen.png", help="filename of saved screenshot (default=screen.png)" "\nexpected format: .jpeg or .png", ) args = parser.parse_args() if not args.webpage and args.filename: parser.print_help() return 2 else: pc = PageCapture(args.webpage, args.width, args.height) pc.take_screen_shot(args.filename) title = pc.get_page_title() print "Screen shot of %s taken and saved to %s." % (title, args.filename) return 0