def readFile(action, name): global outputs if action not in outputs: raise ErrorResponse('in read file process, action is not right') filename = outputs[action] + name if not os.path.exists(filename): raise ErrorResponse('in read file process, file name is not right') with open(filename, 'rb') as csvfile: reader = csv.DictReader(csvfile) fieldname = reader.fieldnames prevData = [one for one in reader] return fieldname, prevData
def login(): global username, psword, session, login_url print "get started logging in" login_page = session.get(login_url) soup = BeautifulSoup(login_page.text, 'html.parser') token = soup.select('.new_session > input')[1]['value'] token_utf8 = soup.select('.new_session > input')[0]['value'] # print token # print token_utf8 param = { "user[email]": username, "user[password]": psword, "authenticity_token": token, 'utf8': token_utf8, 'user[remember_me]': '0', 'commit': 'Login' } ua = UserAgent() user_agent = ua.random header = {"User-Agent": user_agent} session.post(login_url, headers=header, data=param) res = session.get('https://www.fundthatflip.com/deals/10353', headers=header, data=param) if res.status_code == 200: print "login successfully" # print res.text else: raise ErrorResponse("login failed, error code %s" % res.status_code)
def __init__(self): html = gp.session.get(gp.oppen_invest) soup = BeautifulSoup(html.text, 'html.parser') self.fiveParts = soup.select('.io-browse') if len(self.fiveParts) != 5: print 'in progress projects, there is no 5 parts' answer = raw_input('continue to collect data? yes, no\n') if answer != 'yes': raise ErrorResponse('in progress collecting process, abort!')
def writeFile(action, name, fieldname, data): global outputs if action not in outputs: raise ErrorResponse('in write file process, action is not right') filename = outputs[action] + name if os.path.exists(filename): filename = filename + '_1.csv' with open(filename, 'wb') as csvfile: writer = csv.DictWriter(csvfile, fieldnames=fieldname, delimiter=',', lineterminator='\n') writer.writeheader() writer.writerows(data)
def writeDocument(action, name, content, compID): global outputs if action not in outputs: raise ErrorResponse('in write pdf file process, action is not right') folder = outputs[action] + str(compID) if not os.path.exists(folder): os.makedirs(folder) filename = folder + '/' + name.replace('/', '_') with open(filename, 'wb') as pdfwriter: pdfwriter.write(content)
def toString(item, isFirst=True): text = [] for each in item.descendants: if isinstance(each, bs4.element.NavigableString) and each.string.strip( '\r\n').strip(): if isFirst: return each.string.strip('\r\n').strip().encode('utf-8') else: text.append(each.string.strip('\r\n').strip().encode('utf-8')) if isFirst: raise ErrorResponse('toString method, css path is not right') else: return ' '.join(text)
def getTwoDimenData(self, item, rowCssPath, columnCssPath, dataCssPath): rowNames = [] for one in item.select(rowCssPath): if one.string is None: rowNames.append(one.contents[0].string.strip('\r\n').strip()) else: rowNames.append(one.string.strip('\r\n').strip()) columnNames = [ one.string.strip('\r\n').strip() for one in item.thead.find_all('th') if one.string.replace(' ', '').strip('\r\n').strip() and one.string.replace(' ', '').strip( '\r\n').strip() != 'Valuation Method' ] data = item.select(dataCssPath) if len(data) != (len(rowNames) * len(columnNames)): raise ErrorResponse( 'in two dimension table, the number of data does not match row * column' ' Data length is %d, row length is %d and column length is %d' % (len(data), len(rowNames), len(columnNames))) otherData = {} for i in range(len(rowNames)): for j in range(len(columnNames)): label = rowNames[i].encode( 'utf-8') + "_" + columnNames[j].encode('utf-8') value = data[i * len(columnNames) + j].string.strip('\r\n').strip().encode('utf-8') if label not in self.fieldname: self.fieldname.append(label) otherData.update({label: value}) return otherData
def login(): global username, psword, session, login_url print "get started logging in" param = { "userName": username, "password": psword } ua = UserAgent() user_agent = ua.random # print "the fake user agent is %s " % user_agent header = { "User-Agent": user_agent } session.post(login_url, headers=header, data=param) res = session.get('https://www.realcrowd.com/offerings/j-and-r-investments/adams-building-historic-renovation-in-tulsa') if res.status_code == 200: print "login successfully" # print res.text else: raise ErrorResponse("login failed, error code %s" % res.status_code)
def login(): global username, psword, session print "get started logging in" ChromeDriver = webdriver.Chrome(os.getcwd() + "/chromedriver") ChromeDriver.get(login_url) user = ChromeDriver.find_element_by_name("name") user.send_keys(username) password = ChromeDriver.find_element_by_name('pass') password.send_keys(psword) connect = ChromeDriver.find_element_by_name('op') connect.click() WebDriverWait(ChromeDriver, 30).until(readystate_complete) cookies = ChromeDriver.get_cookies() session = requests.session() for cookie in cookies: session.cookies.set(cookie['name'], cookie['value']) res = session.get('https://www.realtymogul.com/investment-opportunities') if res.status_code == 200: print "login successfully" else: raise ErrorResponse("login failed, error code %s" % res.status_code) ChromeDriver.close()
def refreshSoup(url): res = session.get(url) if res.status_code != 200: raise ErrorResponse("refresh page failed, error code %s" % res.status_code) return res.text
def __init__(self): html = gp.session.get(gp.past_invest) soup = BeautifulSoup(html.text, 'html.parser') self.threePart = soup.select('.io-browse') if len(self.threePart) != 3: raise ErrorResponse('in past invest, content error')