# catch label and screenshot img and segment them into smaller size img, label = None, None catch_success = False if is_catch_element and '.com' in links.iloc[index]: # set the format of libel libel_format = pd.read_csv(os.path.join(data_position, 'format.csv'), index_col=0) url = 'http://' + links.iloc[index] if 'http://' not in links.iloc[ index] else links.iloc[index] try: img, label = catch.catch(url, label_path, img_org_path, libel_format, driver_path) except FunctionTimedOut: print('Catch Time Out') continue # segment the lengthy images if is_segment and img is not None: seg.segment_img(img, 600, img_segment_path, 0) # read and draw label on segment img if is_draw_label and img is not None and label is not None: draw.label(label, img, img_drawn_path) end_time = time.clock() print("*** %d Time taken:%ds ***\n" % (index, int(end_time - start_time))) if index > end_pos: break
if browser == 'PhantomJS': driver = webdriver.PhantomJS( executable_path=os.path.join(driver_path, 'phantomjs.exe')) elif browser == 'Chrome': options = webdriver.ChromeOptions() options.add_argument( '--headless') # do not show the browser every time driver = webdriver.Chrome(executable_path=os.path.join( driver_path, 'chromedriver.exe'), options=options) # set the format of label label_format = pd.read_csv('data/format.csv', index_col=0) try: img, label = catch.catch(url, out_html, out_elements, out_img, label_format, driver) # read and draw label on segment img if is_draw_label and img is not None and label is not None: img_drawn_path = 'data/0_drawn.png' draw.label(label, img, img_drawn_path) # segment the lengthy images if is_segment and img is not None: img_segment_dir = '/segment' seg.segment_img(img, 600, img_segment_dir, 0) except FunctionTimedOut: print('Catch Time Out') end_time = time.clock() print("*** Time taken:%ds ***" % int(end_time - start_time))