def start_requests(self): from general_utils_lib import account_creation as AC from selenium.webdriver.support.ui import WebDriverWait urls = [ 'http://www.guru.com/d/jobs/pg/9/' ] driver = AC.initialize_tor()#driver() driver_wait = WebDriverWait(driver, 20) login_dets = dict() AC.login_as_worker(driver, driver_wait, login_dets) db = MySQLdb.connect("localhost", "root", "", "guru_crawler") cursor = db.cursor() meta = {"driver": driver, "driver_wait": driver_wait, "db": db, "cursor": cursor} yield scrapy.Request(url=urls[0], callback=self.parse, meta=meta)
def check_login(): from general_utils_lib import read_files, account_creation as AC login_dets = read_files.read_csv( "/Users/laveeshrohra/Documents/Workspace/job_RA/guru_accounts.csv") driver = AC.initialize_tor() driver_wait = AC.initialize_wait(driver) for i in range(1, len(login_dets)): login = {"username": login_dets[i][0], "password": login_dets[i][2]} AC.login(login, driver, driver_wait) AC.check_and_click_el(driver_wait, { "name": "ctl00_ContentPlaceHolder1_ucSq_aSkip", "click": 1 }, {"name": "e-topnav-dash-in"}, login) AC.logout(driver_wait) if (i % 4) == 0: driver = AC.wait_ip_change(driver)
def execute_query(db, cursor, query): try: cursor.execute(query) db.commit() except: db.rollback() print query time.sleep(10) def get_all_data(project_urls, driver, driver_wait, cursor, db): return get_project_data(project_urls, driver, driver_wait, cursor, db) #print get_skills_and_stars("http://www.guru.com/freelancers/intelex-informatics/reviews") import MySQLdb, os driver = AC.initialize_tor() driver_wait = WebDriverWait(driver, 5) db = MySQLdb.connect("localhost", "root", "", "guru_crawler") cursor = db.cursor() file_name = '/Users/laveeshrohra/Documents/Workspace/job_RA/new_project_desc.csv' links = get_project_links.get_all_links(driver, driver_wait, file_name) AC.login_as_worker(driver, driver_wait) get_all_data(links, driver, driver_wait, cursor, db) os.remove(file_name) driver.close() #check_json_resp()
def post_pic(): #driver, driver_wait, login_dets from selenium.webdriver.support.ui import WebDriverWait from general_utils_lib import read_files from selenium.webdriver.support import expected_conditions as EC import random, csv, traceback driver = AC.initialize_tor() driver_wait = WebDriverWait(driver, 20) url = "http://www.guru.com/emp/modifyaccount.aspx" image_urls = dict() image_urls["wf"] = "https://s14.postimg.org/3znnyktrl/image.jpg" image_urls["wm"] = "https://s14.postimg.org/5dqvlozap/image.jpg" image_urls["bm"] = "https://s22.postimg.org/v4w2nat81/image.jpg" image_urls["bf"] = "https://s21.postimg.org/k4gqkxj4n/image.jpg" image_urls["im"] = "https://s18.postimg.org/vgy5qdkkp/image.jpg" image_urls["if"] = "https://s22.postimg.org/nd4nhy3c1/image.jpg" image_urls["am"] = "https://s21.postimg.org/azwep0vdz/image.jpg" image_urls["af"] = "https://s21.postimg.org/8ubm72z3r/image.jpg" #Login Module file_data = read_files.read_csv( "/Users/laveeshrohra/Documents/Workspace/job_RA/guru_accounts.csv") login_dets = dict() zip = ["90003", "90039", "90095", "90013", "90021"] log = open('/Users/laveeshrohra/Documents/Workspace/job_RA/logs.csv', "w+") writer = csv.writer(log, delimiter=',', quotechar='"', quoting=csv.QUOTE_ALL) for i in range(1, len(file_data)): data = file_data[i] try: login_dets["username"] = data[0].strip( ) #"*****@*****.**" login_dets["password"] = data[2].strip() #"guru1234" AC.login(login_dets, driver, driver_wait) AC.check_and_click_el(driver_wait, { "name": "ctl00_ContentPlaceHolder1_ucSq_aSkip", "click": 1 }, {"name": "e-topnav-dash-in"}) driver.get(url) city = driver.find_element( AC.constants.BY_ID, "ctl00_guB_ctl00_txtCity_txtCity_TextBox") city.clear() city.send_keys("Los Angeles") driver.find_element( AC.constants.BY_XPATH, "//select[@id='ctl00_guB_ctl00_ddlCountry_ddlCountry_Select']/option[@value='1']" ).click() state = driver_wait.until( EC.element_to_be_clickable(( AC.constants.BY_XPATH, "//select[@id='ctl00_guB_ctl00_ddlState_ddlState_Select']/option[@value='5']" ))) state.click() zip_el = driver.find_element( AC.constants.BY_ID, "ctl00_guB_ctl00_txtPostalCode_txtPostalCode_TextBox") zip_el.clear() zip_el.send_keys(random.choice(zip)) AC.check_and_click_el(driver_wait, { "name": "ctl00_guB_ctl00_btnSubmit_btnSubmit_Button", "click": 1 }, {"name": "ctl00_guB_navigation"}) if data[4].strip() is "Unknown" or not data[3].strip(): continue # post_pic_dom(driver, driver_wait, data, image_urls) AC.logout(driver_wait) except: writer.writerow([data[0].strip(), traceback.print_exc()]) continue driver.close() log.close()
def post_guru_project(): import menuPagesParse as Menu import random, csv, traceback driver = AC.initialize_tor() driver_wait = AC.initialize_wait(driver) project_desc = Menu.get_project_desc( '/Users/laveeshrohra/Documents/Workspace/job_RA/project_desc.csv') project_details = read_files.read_csv( "/Users/laveeshrohra/Documents/Workspace/job_RA/sample_project_posts.csv" )[2] login_dets = read_files.read_csv( "/Users/laveeshrohra/Documents/Workspace/job_RA/guru_accounts.csv") titles = [ "Web Scraping", "Scrape the website", "Data Extractor", "Extract data from website", "Gather data from site", "Data Collector", "Collect all data from website", "Website scraper", "Collection of data", "Extraction of Data", "Website scraping", "Scrape the website", "Webpage parsing", "Scraping data from website", "Network page scraper", "Collect the data from website", "Script for web scraping", "Extraction of data from Web", "Scrape data from web", "Collecting info from website", "Webpage Scraping", "Site scraping", "Web scraping", "Html scraper", "Script for web parsing", "Parsing web page", "Webs scraper", "Internet site scraping", "Scrape data from site" ] of = open( '/Users/laveeshrohra/Documents/Workspace/job_RA/project_details.csv', 'a+') writer = csv.writer(of, delimiter=',', quotechar='"', quoting=csv.QUOTE_ALL) log = open('/Users/laveeshrohra/Documents/Workspace/job_RA/logs.csv', "w+") log_writer = csv.writer(log, delimiter=',', quotechar='"', quoting=csv.QUOTE_ALL) iterations = 1 prev_iter = 9 * (iterations - 1) for i in range(prev_iter + 1, (iterations * 9) + 1): #len(login_dets) login = login_dets[i % len(login_dets)] if ( i % len(login_dets)) > 0 else login_dets[(i % len(login_dets)) + 1] try: project_details[0] = random.choice(titles) project_details[1] = project_desc[i - 1] #45+ after every rotation input_data = get_input_data(login, project_details) AC.login(input_data["login"], driver, driver_wait) AC.check_and_click_el(driver_wait, { "name": "ctl00_ContentPlaceHolder1_ucSq_aSkip", "click": 1 }, {"name": "e-topnav-dash-in"}) #print "logged in" AC.check_and_click_el( driver_wait, { "name": "e-topnav-postjob-in", "click": 1 }, {"name": "ctl00_guB_ucPostProject_txtPT_txtPT_TextBox"}) # print "clicked post" # time.sleep(5) els_data = element_list(input_data["data"]) submit_btn = { "name": ".//button[text()='Continue']", "type": AC.constants.BY_XPATH } assert_page = { "next_page": "Post Project Confirmation" } #"current_val" : "Post", AC.fill_form(driver, driver_wait, None, assert_page, els_data, submit_btn, False) AC.check_and_click_el( driver_wait, { "name": ".//a[@href='/d/freelancers/']", "type": AC.constants.BY_XPATH, "click": 1 }, {"name": "ctl00_guB_txtKeyWord"}) AC.logout(driver_wait) write_data = list() write_data.append(input_data["login"]['username']) write_data.extend(project_details) writer.writerow(write_data) time.sleep(210) except: log_writer.writerow([login[0], traceback.print_exc()]) continue # if (i % 4) == 0: # driver = AC.wait_ip_change(driver) of.close() driver.close() log.close()
from selenium.webdriver.support.ui import WebDriverWait from general_utils_lib import account_creation as AC from general_utils_lib import general_utils as GU from general_utils_lib import read_files import gmail_read, time, csv, itertools, random, traceback driver = AC.initialize_tor() #_browser()#AC.initialize_driver() driver_wait = WebDriverWait(driver, 5) def fill_elem_list(name, email, password): element_ids = list() #freelancer_dets = ["freelancer", "btnCreateAccountFreelancer_btnCreateAccountFreelancer_Button"] element_ids.append({ "name": "employer", "value": None, "click": 1 }) #freelancer - for working element_ids.append({ "name": "ucRegistration_txtFullName_txtFullName_TextBox", "value": name, "click": 0 }) element_ids.append({ "name": "ucRegistration_txtEmail_txtEmail_TextBox", "value": email, "click": 0 }) element_ids.append({ "name": "ucRegistration_txtPassword_txtPassword_TextBox", "value": password,