def readFile(action, name):
    global outputs
    if action not in outputs:
        raise ErrorResponse('in read file process, action is not right')

    filename = outputs[action] + name
    if not os.path.exists(filename):
        raise ErrorResponse('in read file process, file name is not right')

    with open(filename, 'rb') as csvfile:
        reader = csv.DictReader(csvfile)
        fieldname = reader.fieldnames
        prevData = [one for one in reader]

    return fieldname, prevData
def login():
    global username, psword, session, login_url
    print "get started logging in"
    login_page = session.get(login_url)
    soup = BeautifulSoup(login_page.text, 'html.parser')
    token = soup.select('.new_session > input')[1]['value']
    token_utf8 = soup.select('.new_session > input')[0]['value']
    # print token
    # print token_utf8
    param = {
        "user[email]": username,
        "user[password]": psword,
        "authenticity_token": token,
        'utf8': token_utf8,
        'user[remember_me]': '0',
        'commit': 'Login'
    }

    ua = UserAgent()
    user_agent = ua.random

    header = {"User-Agent": user_agent}

    session.post(login_url, headers=header, data=param)

    res = session.get('https://www.fundthatflip.com/deals/10353',
                      headers=header,
                      data=param)
    if res.status_code == 200:
        print "login successfully"
        # print res.text
    else:
        raise ErrorResponse("login failed, error code %s" % res.status_code)
 def __init__(self):
     html = gp.session.get(gp.oppen_invest)
     soup = BeautifulSoup(html.text, 'html.parser')
     self.fiveParts = soup.select('.io-browse')
     if len(self.fiveParts) != 5:
         print 'in progress projects, there is no 5 parts'
         answer = raw_input('continue to collect data? yes, no\n')
         if answer != 'yes':
             raise ErrorResponse('in progress collecting process, abort!')
def writeFile(action, name, fieldname, data):
    global outputs
    if action not in outputs:
        raise ErrorResponse('in write file process, action is not right')

    filename = outputs[action] + name
    if os.path.exists(filename):
        filename = filename + '_1.csv'

    with open(filename, 'wb') as csvfile:
        writer = csv.DictWriter(csvfile, fieldnames=fieldname, delimiter=',', lineterminator='\n')
        writer.writeheader()
        writer.writerows(data)
def writeDocument(action, name, content, compID):
    global outputs
    if action not in outputs:
        raise ErrorResponse('in write pdf file process, action is not right')

    folder = outputs[action] + str(compID)
    if not os.path.exists(folder):
        os.makedirs(folder)

    filename = folder + '/' + name.replace('/', '_')

    with open(filename, 'wb') as pdfwriter:
        pdfwriter.write(content)
def toString(item, isFirst=True):
    text = []
    for each in item.descendants:
        if isinstance(each, bs4.element.NavigableString) and each.string.strip(
                '\r\n').strip():
            if isFirst:
                return each.string.strip('\r\n').strip().encode('utf-8')
            else:
                text.append(each.string.strip('\r\n').strip().encode('utf-8'))
    if isFirst:
        raise ErrorResponse('toString method, css path is not right')
    else:
        return ' '.join(text)
示例#7
0
    def getTwoDimenData(self, item, rowCssPath, columnCssPath, dataCssPath):
        rowNames = []
        for one in item.select(rowCssPath):
            if one.string is None:
                rowNames.append(one.contents[0].string.strip('\r\n').strip())
            else:
                rowNames.append(one.string.strip('\r\n').strip())

        columnNames = [
            one.string.strip('\r\n').strip()
            for one in item.thead.find_all('th')
            if one.string.replace('&nbsp', '').strip('\r\n').strip()
            and one.string.replace('&nbsp', '').strip(
                '\r\n').strip() != 'Valuation Method'
        ]

        data = item.select(dataCssPath)

        if len(data) != (len(rowNames) * len(columnNames)):
            raise ErrorResponse(
                'in two dimension table, the number of data does not match row * column'
                ' Data length is %d, row length is %d and column length is %d'
                % (len(data), len(rowNames), len(columnNames)))

        otherData = {}

        for i in range(len(rowNames)):
            for j in range(len(columnNames)):
                label = rowNames[i].encode(
                    'utf-8') + "_" + columnNames[j].encode('utf-8')
                value = data[i * len(columnNames) +
                             j].string.strip('\r\n').strip().encode('utf-8')

                if label not in self.fieldname:
                    self.fieldname.append(label)

                otherData.update({label: value})

        return otherData
def login():
    global username, psword, session, login_url
    print "get started logging in"

    param = {
        "userName": username,
        "password": psword
    }

    ua = UserAgent()
    user_agent = ua.random
    # print "the fake user agent is %s " % user_agent
    header = {
        "User-Agent": user_agent
    }

    session.post(login_url, headers=header, data=param)

    res = session.get('https://www.realcrowd.com/offerings/j-and-r-investments/adams-building-historic-renovation-in-tulsa')
    if res.status_code == 200:
        print "login successfully"
        # print res.text
    else:
        raise ErrorResponse("login failed, error code %s" % res.status_code)
def login():
    global username, psword, session
    print "get started logging in"
    ChromeDriver = webdriver.Chrome(os.getcwd() + "/chromedriver")
    ChromeDriver.get(login_url)

    user = ChromeDriver.find_element_by_name("name")
    user.send_keys(username)
    password = ChromeDriver.find_element_by_name('pass')
    password.send_keys(psword)
    connect = ChromeDriver.find_element_by_name('op')
    connect.click()
    WebDriverWait(ChromeDriver, 30).until(readystate_complete)
    cookies = ChromeDriver.get_cookies()
    session = requests.session()
    for cookie in cookies:
        session.cookies.set(cookie['name'], cookie['value'])
    res = session.get('https://www.realtymogul.com/investment-opportunities')
    if res.status_code == 200:
        print "login successfully"
    else:
        raise ErrorResponse("login failed, error code %s" % res.status_code)

    ChromeDriver.close()
def refreshSoup(url):
    res = session.get(url)
    if res.status_code != 200:
        raise ErrorResponse("refresh page failed, error code %s" % res.status_code)
    return res.text
示例#11
0
 def __init__(self):
     html = gp.session.get(gp.past_invest)
     soup = BeautifulSoup(html.text, 'html.parser')
     self.threePart = soup.select('.io-browse')
     if len(self.threePart) != 3:
         raise ErrorResponse('in past invest, content error')