def checker_1_2(url): driver.get(url) # objects = driver.find_elements_by_css_selector("object") # object_total = len(objects) # object_good_total = 0 # for obj in objects: # if obj.find_element_by_css_selector("p") or obj.find_element_by_css_selector("object"): # object_good_total+=1 # total_score = object_good_total / object_total if object_total > 0 else 1 audios = driver.find_elements_by_css_selector("audio") audios_total = len(audios) audio_good_total = 0 for s in audios: if len( s.find_elements_by_css_selector("[aria-label]") and len(s.find_elements_by_css_selector("track"))): audio_good_total += 1 videos = driver.find_elements_by_css_selector("video") videos_total = len(videos) videos_good_total = 0 for s in audios: if len(s.find_elements_by_css_selector("[aria-label]")): videos_good_total += 1 total = audios_total + audios_total return { "total_score": (videos_good_total + audio_good_total) / total if total > 0 else 1, "subs": [{ "name": "audio", "score": audio_good_total / audios_total if audios_total != 0 else 1, "ratio": [audios_total - audio_good_total, audio_good_total], "present": audios_total > 0 }, { "name": "video", "score": videos_good_total / videos_total if videos_total != 0 else 1, "ratio": [videos_total - videos_good_total, videos_good_total], "present": videos_total > 0 }] }
def render_all_logo_templates(): for page in range(1, 5): for template_index in range(15): driver.get( URL + '/' + str(page) + '?filters%5Bcategory%5D=3&sort%5Bfield%5D=&filters%5Bname%5D=&filters%5Btag%5D=' ) templates = driver.find_elements_by_css_selector( 'body > div.pusher > div.ui.container > div.ui.three.stackable.grid.template-list > div > div > div.preview.template-card' ) click(templates[template_index]) time.sleep(4) click('a[href*="/new"]') time.sleep(4) total_categories = len( driver.find_elements_by_css_selector( 'body > div.pusher > header > div.ui.fluid.second-header.container.customize-container > div > div > a' )) for i in range(total_categories): current_category = driver.find_element_by_css_selector( 'a.item.active').text if (current_category == 'Texts'): texts = driver.find_elements_by_css_selector( 'input[name*="render[texts]"]') for index, text in enumerate(texts): key(text, f"(Text #{index + 1})") click('a[title="Next Step"]') elif (current_category == 'Colors'): click('a[title="Next Step"]') elif (current_category == 'Images'): images = driver.find_elements_by_css_selector( 'input[name*="render[images]"]') for image in images: key( image, '/Users/simon/Dev/viddyoze-bot/assets/logo-black.png' ) click('a[title="Next Step"]') elif (current_category == 'Create'): click("//*[contains(text(), 'Create Your Video')]") time.sleep(4) print('Rendering...') wait = WebDriverWait(driver, 60 * 60 * 24) wait.until(lambda driver: "/download" in driver.current_url)
def render_template_by_name(name, webpage, logo_path): driver.get(URL + '?filters%5Bname%5D=' + name.replace(' ', '+')) template = driver.find_element_by_css_selector( 'body > div.pusher > div.ui.container > div.ui.three.stackable.grid.template-list > div > div > div.preview.template-card' ) click(template) time.sleep(4) click('a[href*="/new"]') time.sleep(4) total_categories = len( driver.find_elements_by_css_selector( 'body > div.pusher > header > div.ui.fluid.second-header.container.customize-container > div > div > a' )) print('Total Categories: ' + str(total_categories)) for _ in range(total_categories): current_category = driver.find_element_by_css_selector( 'a.item.active.hovered').text print(current_category) if (current_category == 'Texts'): texts = driver.find_elements_by_css_selector( 'input[name*="render[texts]"]') print(texts) for text in texts: key(text, webpage) click('a[title="Next Step"]') elif (current_category == 'Colors'): click('a[title="Next Step"]') elif (current_category == 'Images'): images = driver.find_elements_by_css_selector( 'input[name*="render[images]"]') for image in images: key(image, os.getcwd() + '/' + logo_path) click('a[title="Next Step"]') elif (current_category == 'Create'): click("//*[contains(text(), 'Create Your Video')]") time.sleep(4) print('Rendering...') wait = WebDriverWait(driver, 60 * 60 * 24) wait.until(lambda driver: "/download" in driver.current_url) wait_for_download(os.getcwd() + '/renders')
def checker_1_4(url): driver.get(url) audios = driver.find_elements_by_css_selector("audio") audio_score = None if audios: audio_total = len(audios) audio_good_total = sum( [1 for audio in audios if audio.get_attribute("controls")]) audio_score = audio_good_total / len(audios) zoom_score = 1 return { "total_score": ((audio_score or 1) + zoom_score) / 2, "subs": [{ "name": "audio", "score": audio_score, "ratio": [audio_total - audio_good_total, audio_good_total] if audio_score else [], "present": not not audios }, { "name": "zoom", "score": zoom_score, "ratio": [], "present": True }] }
def checker_2_4(url): driver.get(url) title_elem = driver.find_element_by_css_selector("title") title_score = 1 if title_elem else 0 links = driver.find_elements_by_css_selector("a") links_total = len(links) if links_total: good_links_total = sum([1 for link in links if link.text]) link_score = good_links_total / links_total if links_total else 1 return { "total_score": (title_score + link_score) / 2, "subs": [ { "name": "title", "score": title_score, "ratio": [], "present": True }, { "name": "link", "score": link_score, "ratio": [links_total - good_links_total, good_links_total] if links_total else [], "present": not not links_total } ] }
def checker_1_3(url): driver.get(url) roles = driver.find_elements_by_css_selector("[role]") role_score = 1 if len(roles) else 0 heading = driver.find_elements_by_css_selector("[role=heading]") heading_score = 1 if len(heading) else 0 aria_score = None aria_total = None if roles: good_aria_totoal = sum([ 1 for elem in roles if elem.get_attribute("aria-labelledby") or elem.get_attribute("aria-describedby") ]) aria_total = len(roles) aria_score = good_aria_totoal / aria_total else: aria_score = 1 return { "total_score": (role_score + heading_score + aria_score) / 3, "subs": [ { "name": "role_score", "score": role_score, "ratio": [], "present": not not roles }, { "name": "heading_score", "score": heading_score, "ratio": [], "present": not not roles }, { "name": "aria_score", "score": aria_score, "ratio": [aria_total - good_aria_totoal, good_aria_totoal] if aria_total else [], "present": not not roles }, ] }
def checker_4_1(url): checker_url = "https://validator.w3.org/check?" + parse.urlencode( {"uri": url}) driver.get(checker_url) valid_result = driver.find_elements_by_css_selector(".valid") is_valid = not not valid_result return {"total_score": 1 if is_valid else 0, "subs": []}
def checker_2_3(url): driver.get(url) all_elements = driver.find_elements_by_css_selector("*") has_flash = False for elem in all_elements: duration = elem.value_of_css_property("animation-duration")[:-1] if float(duration) * 3 < 1: has_flash = True return {"total_score": 0 if has_flash else 1, "subs": []}
def checker_2_2(url): driver.get(url) forms = driver.find_elements_by_css_selector("form") forms_total = len(forms) if forms_total: good_forms_total = sum([1 for form in forms if form.find_elements_by_css_selector("[type=submit]")]) form_enough_time_score = good_forms_total / forms_total else: form_enough_time_score = 1 return { "total_score":(form_enough_time_score) / 1, "subs":[ { "name": "form", "score": form_enough_time_score, "ratio": [forms_total - good_forms_total, good_forms_total] if forms_total else [], "present": not not forms } ] }
from driver import driver from models.author_ratings import AuthorRatings from selenium import webdriver # try: me.connect("author-ratings") AuthorRatings.drop_collection() start_time = time() requests = 0 for i in range(219, 230): url = make_url(i) driver.get(url) reviews = driver.find_elements_by_css_selector('.review-container') for review in reviews: name = review.find_element_by_css_selector( '.info_text div:nth-child(1)').text rating = trim_rating( review.find_element_by_css_selector( 'div.ui_column.is-9 > span.ui_bubble_rating')) date = review.find_element_by_class_name('ratingDate').get_attribute( "title") record = AuthorRatings(rating=rating, date=date, author=name) record.save() pic = review.find_element_by_css_selector( ".memberOverlayLink.clickable") pic.click() sleep(5) back_drop = driver.find_element_by_css_selector('.ui_backdrop').click()
def get_user_all_reviews_and_ratings(url, profileid): try: js = 'var body = document.body,' \ 'html = document.documentElement;' \ 'var height = Math.max( body.scrollHeight, body.offsetHeight, html.clientHeight, html.scrollHeight, html.offsetHeight );' \ 'window.scrollTo(0, height);' at_bottom = 'function xxx() {if ((window.innerHeight + window.scrollY) >= document.body.offsetHeight) ' \ '{return true;} return false;} return xxx()' driver.get(url) sleep(2) try: see_more = driver.find_element_by_css_selector('.ui_button.primary.large') # if see_more.text: see_more.click() sleep(5) while True: condition = driver.execute_script(at_bottom) if condition: break driver.execute_script(js) sleep(randint(5, 6)) except NoSuchElementException: print("see more not found. skipping over it") reviews = driver.find_elements_by_css_selector('.social-sections-CardSection__card_section--20Wxe.ui_card.section') name = driver.find_element_by_css_selector('.social-common-MemberName__display_name--1HCDW.social-common-MemberBlock__display_name--2a02z').text username = driver.find_element_by_css_selector('.social-common-MemberName__user_name--2ljTA').text print(name) print(username) print(reviews.__len__()) _hotels_ratings_list = [] for review in reviews: try: ele = review.find_element_by_css_selector('.ui_bubble_rating') user_given_rating = trim_rating(ele) except NoSuchElementException: user_given_rating = -1 hotel_name = review.find_element_by_css_selector('.social-common-POIObject__poi_name--39wh4.ui_link').text try: ele = review.find_element_by_css_selector('.ui_poi_review_rating > .ui_bubble_rating') over_all_rating = trim_rating(ele) except NoSuchElementException: over_all_rating = -1 _hotels_ratings = HotelsRatings( user_given_rating=user_given_rating, over_all_rating=over_all_rating, name=hotel_name ).save() _hotels_ratings_list.append(_hotels_ratings) _user_reviews = UserReviews(name=name, username=username, userprofileid=profileid, hotels_ratings=_hotels_ratings_list) _user_reviews.save() # driver.close() return True except Exception as e: print(e) # if driver.title: # driver.close() return False
start_time = time() requests = 0 GansevoortReview.__table__.drop(db) # for i in range(218, 261): for i in range(218, 219): url = make_url(i) driver.get(url) link = driver.find_element_by_class_name('ulBlueLinks') link.click() sleep(randint(8, 15)) reviews = driver.find_elements_by_css_selector('div.ui_column.is-9 > div.prw_rup > div.entry > p.partial_entry') dates = driver.find_elements_by_class_name('ratingDate') ratings = driver.find_elements_by_css_selector('div.ui_column.is-9 > span.ui_bubble_rating') ratings = trim_ratings(ratings) names = driver.find_elements_by_css_selector('.memberOverlayLink .info_text > div:nth-child(1)') addresses = driver.find_elements_by_css_selector('.memberOverlayLink .info_text > div > strong') contributions = driver.find_elements_by_css_selector('.memberBadgingNoText > span:nth-child(2)') likes = driver.find_elements_by_css_selector('.memberBadgingNoText > span:nth-child(4)') for j in range(0, len(reviews)): rev = GansevoortReview( id=i * 10 + j,
import mongoengine as me from crawling.trimming import make_url, trim_ratings from driver import driver from models.author_ratings import AuthorRatings me.connect("author-ratings") AuthorRatings.drop_collection() start_time = time() requests = 0 for i in range(218, 220): url = make_url(i) driver.get(url) dates = driver.find_elements_by_class_name('ratingDate') ratings = trim_ratings(driver.find_elements_by_css_selector('div.ui_column.is-9 > span.ui_bubble_rating')) names = driver.find_elements_by_css_selector('.memberOverlayLink .info_text > div:nth-child(1)') pics = driver.find_elements_by_class_name("basicImg") for j in range(0, len(names)): print(pics[j].get_attribute("src")) record = AuthorRatings(date=dates[j].get_attribute("title"), author=names[j].text, rating=ratings[j], profile=pics[j].get_attribute("src")) record.save() sleep(randint(8, 15)) requests += 1 elapsed_time = time() - start_time print('Request:{}; Frequency: {} requests/s'.format(requests, requests / elapsed_time))
def checker_1_1(url): driver.get(url) imgs = driver.find_elements_by_css_selector("img") roles = [ elem for elem in driver.find_elements_by_css_selector("*[role]") if elem.get_attribute("role").lower() in ["navigation", "region", "math’ "] ] inputs = driver.find_elements_by_css_selector("input") image_total_count = len(imgs) image_good_count = 0 for img in imgs: if img.get_attribute("alt") or img.get_attribute( "aria-label") or img.get_attribute("aria-labelledby"): image_good_count += 1 roles_total_count = len(roles) roles_good_count = 0 for role in roles: if role.get_attribute("aria-label") or role.get_attribute( "aria-labelledby"): roles_good_count += 1 inputs_total_count = len(inputs) inputs_good_count = 0 for input in inputs: if input.get_attribute("aria-label") or input.get_attribute( "aria-labelledby"): inputs_good_count += 1 total = image_total_count + roles_total_count + inputs_total_count result = { "total_score": (image_good_count + roles_good_count + inputs_good_count) / (total) if total > 0 else 1, "subs": [{ "name": "image", "score": image_good_count / image_total_count if image_total_count != 0 else 1, "ratio": [image_total_count - image_good_count, image_good_count], "present": image_total_count > 0 }, { "name": "role", "score": roles_good_count / roles_total_count if roles_total_count != 0 else 1, "ratio": [roles_total_count - roles_good_count, roles_good_count], "present": roles_total_count > 0 }, { "name": "input", "score": inputs_good_count / inputs_total_count if inputs_total_count != 0 else 1, "ratio": [inputs_total_count - inputs_good_count, inputs_good_count], "present": inputs_total_count > 0 }] } return result