Exemplo n.º 1
0
def fetch(url):
    browser = RoboBrowser(history=True, parser="html.parser")
    browser.open(url)

    votes = browser.select('.moderatorenSlider a.beitrag')
    followed_links = set()

    total_scores = {}

    for v in votes:
        if v["href"] in followed_links:
            continue
        else:
            followed_links.add(v["href"])
        print(v["href"])
        browser.follow_link(v)
        try:
            scores = extractVotes(browser)
            print(scores)
            for title, score in scores.items():
                if title not in total_scores:
                    total_scores[title] = (score, 1)
                else:
                    score_, num = total_scores[title]
                    total_scores[title] = (score_+score, num+1)
        except Exception as e:
            print(e)
        browser.back()
    return total_scores
Exemplo n.º 2
0
def scrape_cosmo_exam(url,email,password):
    browser = RoboBrowser()
    browser.open(tlink)
    search = browser.get_form()
    search[ 'user[email]' ] = email
    search[ 'user[password]' ] = password
    browser.submit_form(search,submit=search.submit_fields['commit'])

    # main page
    browser.follow_link(browser.find_all('a')[2])
    browser

    #browser.get_links()
    all_links = browser.find_all('a')
    announcements_key = list(filter( lambda x: 'Announcements' in x, all_links ))[0]
    announcement_ind = all_links.index(announcements_key)
    browser.follow_link(browser.find_all('a')[announcement_ind])
    
    #obtaining title objects - tags
    titles =mapper( lambda x:date_extract(1) , browser.find_all('h2'))[0]

    # helper function 2
    def date_extract(ind):
        return list(mapper( lambda x:list(x.children)[1], browser.find_all('h2') ))

    # helper function 3
    def matcher(lst,*matches):
        if not matches:
            matches = ['exam','reminder']
        else:
            matches=matches[0]

        return filterer(lambda x:any(string.lower() in str(x).lower() for string in matches) ,lst)

    return titles
Exemplo n.º 3
0
def _download_rib(dir, date):
    url = "http://archive.routeviews.org/route-views.wide/bgpdata/"

    dt_web = date.strftime("%Y") + "." + date.strftime("%m") + "/"

    print("Looking for RIB file...")
    br = RoboBrowser()
    br.open(url)

    link_date = br.get_link(dt_web)
    br.follow_link(link_date)

    link_rib = br.get_link("RIBS/")
    br.follow_link(link_rib)

    elem = "rib." + date.strftime("%Y") + date.strftime("%m") + date.strftime(
        "%d")
    _dt_web = date.strftime("%Y") + "." + date.strftime("%m") + "/"
    links = br.get_links(elem)
    one_link = links[0]

    file = (str(one_link).split('"'))[1]
    url_dw = "http://archive.routeviews.org/route-views.wide/bgpdata/" + _dt_web + "RIBS/" + file
    filename = dir + file

    r = requests.get(url_dw)
    with open(filename, "wb") as code:
        code.write(r.content)

    rib = _decompress_rib(filename)

    return rib
Exemplo n.º 4
0
def run(url):
    """
    start crawler
    """
    # first open novel url with normal browser
    browser = RoboBrowser(parser='html.parser',
                          history=True,
                          timeout=30,
                          tries=5)
    browser.open(url)
    # look all page in novels
    while not is_end_page(browser):
        novels = get_all_novel_links(browser)
        # process each novel
        for novel in novels:
            novel_id = get_id(novel)
            title = get_title(novel)
            author = get_author(novel)
            date = get_date(novel)
            novel_type = get_type(novel)
            print('*' * 75)
            print(title)
            content = get_content(novel, author)
            output(title,
                   novel_id=novel_id,
                   author=author,
                   novel_type=novel_type,
                   content=content,
                   date=date)
        time.sleep(random.randint(R_START, R_END))
        next_page_link = next_page(browser)
        if next_page_link is None:
            break
        browser.follow_link(next_page_link)
Exemplo n.º 5
0
 def pushedbutton(self, b):
     account = self.lineEdit.text()
     pasw = self.lineEdit_3.text()
     #use robobrowser module to manipulate web page
     browser = RoboBrowser(history=True)
     browser.open('http://web1.cmu.edu.tw/stdinfo/login.asp')
     form1 = browser.get_form(id='form1')
     form1['f_id'].value = account
     form1['f_pwd'].value = pasw
     browser.submit_form(form1)
     if browser.state.url == "http://web1.cmu.edu.tw/stdinfo/loginerr.asp":
         self.lineEdit_2.setText('帳號密碼錯了?')
     else:
         link_one = browser.get_link(text=re.compile('.意見調查'))
         browser.follow_link(link_one)
         list = []
         for l in browser.get_links(text=re.compile('.填寫.')):
             list.append(l)
         list.pop(0)
         for li in list:
             browser.follow_link(li)
             form2 = browser.get_form(id='thisform')
             form2['Cos_Q1'].value = '1'
             browser.submit_form(form2)
         self.lineEdit_2.setText('Done!')
Exemplo n.º 6
0
def main():
    # Browse to Rap Genius
    browser = RoboBrowser(history=True)
    browser = RoboBrowser(
        parser="html.parser")  # will get a warning if parser not declared
    browser.open('http://rapgenius.com/')

    # Search for Queen
    form = browser.get_form(action='/search')
    form  # <RoboForm q=>
    form['q'].value = 'queen'
    browser.submit_form(form)

    # Look up the first song
    songs = browser.select('.song_name')
    try:
        browser.follow_link(songs[0])
    except IndexError:
        print("Songs Index doesn't exist!")
        return
    lyrics = browser.select('.lyrics')
    try:
        lyrics[0].text  # \n[Intro]\nIs this the real life...
    except IndexError:
        print("Lyrics Index doesn't exist!")

    # Back to results page
    browser.back()

    # Look up my favorite song
    browser.follow_link('death on two legs')

    # Can also search HTML using regex patterns
    lyrics = browser.find(class_=re.compile(r'\blyrics\b'))
    print(lyrics.text)  # \n[Verse 1]\nYou suck my blood like a leech...
Exemplo n.º 7
0
    def get_cookies(self):
        """ opens a fake browser to get the cookies needed """
        from robobrowser import RoboBrowser
        browser = RoboBrowser(
            user_agent=
            'Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10.5; en-US; rv:1.9.1b3) Gecko/20090305 Firefox/3.1b3 GTB5',
            parser='html.parser')
        browser.open('https://battlemap.deltatgame.com/home#')
        link = browser.find('a')
        browser.follow_link(link)
        form = browser.get_form(0)

        with open('battlecreds.json') as credentialfile:
            credentials = json.load(credentialfile)
            form['Email'] = credentials['email']
            browser.submit_form(form)
            form = browser.get_form(0)
            form['Passwd'] = credentials['password']
            browser.submit_form(form)
            browser.open('https://battlemap.deltatgame.com/home')

        self.battlemap_token = browser.session.cookies.get('battlemap_session')
        self.xsrf = browser.session.cookies.get('XSRF-TOKEN')
        self.cookietimeout = time.time() + 60 * 60 * 1.95
        # GET csrf-token META HERE
        self.csrf = ''
        self.brow = browser
        from bs4 import BeautifulSoup
        soup = BeautifulSoup(str(browser.parsed()), "html.parser")
        for tag in soup.find_all('meta'):
            if 'name' in tag.attrs and tag.attrs['name'] == 'csrf-token':
                self.csrf = tag.attrs['content']
Exemplo n.º 8
0
    def test_calc_interface(self):
        operation = "5,+,2"
        expected_result = 7

        # Add some result to DB
        requests.post('/'.join((TEST_URL, 'calc')),
            data={'operation':'998,-,888'})

        # Init object
        browser = RoboBrowser(history=True, parser='html.parser')
        browser.open(TEST_URL)

        # Fill calc form
        calc_form = browser.get_form(action='/calc')
        calc_form['operation'] = operation
        browser.submit_form(calc_form)

        # Get result
        result_raw = browser.find(id="result").text
        self.assertEqual(int(result_raw), expected_result)

        # Check result link
        browser.follow_link(browser.find(id='result_link'))
        self.assertEqual((operation, expected_result),
            (browser.find(id="operation").text, int(browser.find(id="result").text)))
Exemplo n.º 9
0
 def pushedbutton(self,b):
     account = self.lineEdit.text()
     pasw = self.lineEdit_3.text()
     #use robobrowser module to manipulate web page 
     browser = RoboBrowser(history = True)
     browser.open('http://web1.cmu.edu.tw/stdinfo/login.asp')
     form1 = browser.get_form(id = 'form1')
     form1['f_id'].value = account
     form1['f_pwd'].value = pasw
     browser.submit_form(form1)
     if browser.state.url == "http://web1.cmu.edu.tw/stdinfo/loginerr.asp":
         self.lineEdit_2.setText('帳號密碼錯了?')
     else:
         link_one = browser.get_link(text = re.compile('.意見調查'))
         browser.follow_link(link_one)
         list = []
         for l in browser.get_links(text = re.compile('.填寫.')): 
             list.append(l)
         list.pop(0)
         for li in list:
             browser.follow_link(li)
             form2 = browser.get_form(id = 'thisform')
             form2['Cos_Q1'].value = '1'
             browser.submit_form(form2)
         self.lineEdit_2.setText('Done!')
Exemplo n.º 10
0
def find_download_page(podcast, episode):
    download_base = 'https://www.trancepodcasts.com/download/'
    browser = RoboBrowser(history=True)
    browser.open('https://www.trancepodcasts.com/download/{:s}-{:d}/'.format(
        podcast, episode))

    link = browser.find('a', attrs={'rel': 'nofollow', 'class': 'btn'})
    browser.follow_link(link)
    browser.response
Exemplo n.º 11
0
def dirty_get_mp3_url(yt_url):
    browser = RoboBrowser(history=True)
    browser.open("http://www.youtubeinmp3.com/")
    form = browser.get_form(id="form")
    form["video"].value = yt_url
    browser.submit_form(form)

    a = browser.get_link(id="download")
    browser.follow_link(a)
    return (browser.url, unquote(browser.url.split("t=")[-1]) + ".mp3")
Exemplo n.º 12
0
def gettab(keyword):
    browser = RoboBrowser(history=True, parser='html5lib')
    browser.open('https://www.tabs4acoustic.com/')

    form = browser.get_form(action=re.compile('recherche'))

    form['FindMe'].value = keyword
    browser.submit_form(form)
    div_resultat = browser.find('div', id='page_content')
    browser.follow_link(div_resultat.find('a'))
    tab = browser.find('div', id='tab_zone')
    return tab.find('pre').text
Exemplo n.º 13
0
class Downloader():
    def __init__(self, proxy=None, worker_num=0):
        self.worker_num = worker_num
        session = Session()
        if proxy is not None:
            session.proxies = {'http': proxy, 'https': proxy}
        self.browser = RoboBrowser(history=True,
                                   parser='html.parser',
                                   session=session)

    def get_download_link(self, book_url):
        self.browser.open(book_url)
        for link in self.browser.find_all("a"):
            if "download.php?t=1" in str(link):
                return f"https://www.lectulandia.cc{link['href']}"

    def download_book(self, download_url):
        self.browser.open(download_url)
        pattern = re.compile("var linkCode = \"(.*?)\";")
        section = pattern.findall(str(self.browser.parsed))
        bee_url = f'https://www.beeupload.net/file/{section[0]}'
        self.browser.open(bee_url)
        try:
            filename = self.browser.find(
                "div", id="fileDescription").find_all("p")[1].text.replace(
                    "Name: ", "")

            size = self.browser.find(
                "div", id="fileDescription").find_all("p")[2].text
            file_url = self.browser.find("a", id="downloadB")
            time.sleep(2)
            self.browser.follow_link(file_url)
            with open(f"books/{filename}", "wb") as epub_file:
                epub_file.write(self.browser.response.content)
            return filename, size
        except:
            print(self.browser.parsed)

    def get_book_page_list(self, page):
        self.browser.open(f'https://www.lectulandia.cc/book/page/{page}/')
        return [
            f"https://www.lectulandia.cc{book['href']}"
            for book in self.browser.find_all("a", class_="card-click-target")
        ]

    def download_full_page(self, page):
        print(f"Downloading page: {page} ")
        books = self.get_book_page_list(page)
        for book in books:
            time.sleep(2)
            download_url = self.get_download_link(book)
            print(f"Worker: {self.worker_num} on page: {page}",
                  self.download_book(download_url))
Exemplo n.º 14
0
def scrape_cs2040s(url, email, password):
    browser = RoboBrowser(parser='html.parser')
    browser.open(url)
    search = browser.get_form()
    search['user[email]'] = str(email)
    search['user[password]'] = str(password)
    browser.submit_form(search, submit=search.submit_fields['commit'])

    # main page
    browser.follow_link(browser.find_all('a')[2])

    # missions
    browser.follow_link(browser.find_all('a')[11])

    # find names
    reduced = filterer(lambda x: len(list(x.children)) >= 1,
                       browser.find_all('th'))
    reduced = filterer(lambda x: 'colspan' in x.attrs, reduced)
    # unsure of object structure so convert to list type and assess last element
    names = mapper(lambda x: list(list(x.children)[-1])[-1], reduced)

    # find deadlines
    deadlines_tags = list(
        filter(lambda x: x['class'] == ['table-end-at'],
               browser.find_all('td')))
    deadlines = list(
        map(lambda x: (list(x))[0] if list(x) else 'not yet', deadlines_tags))
    curr_yr = datetime.now().year

    #returns a list of datetime objects
    dates = mapper(
        lambda x: str(datetime.strptime(f"{curr_yr} {x}", '%Y %d %b %H:%M'))
        if x != 'not yet' else 'Not yet', deadlines)

    array = []
    for n, d in zip(names, dates):
        dic1 = {}
        dic1['title'] = n
        dic1['datetime'] = d
        array.append(dic1)

    dic = {}
    dic['data'] = array

    #scrape exam details
    with open(
            '/Users/sherrywu1999/Desktop/untitled/callie/python/deadlines/data.json',
            'w') as json_file:
        json.dump(dic, json_file)
Exemplo n.º 15
0
def pdbfixretrieve(joblink):
	browser=RoboBrowser(history=True)
	browser.open(joblink)
	stdout=browser.get_links('stdout')
	while not stdout:
		time.sleep(5)
		browser.open(joblink)
		stdout=browser.get_links('stdout')
	pdbout=browser.get_links('outpdb')
	browser.follow_link(stdout[0])
	stdcontent=browser.response.content
	browser.follow_link(pdbout[0])
	pdbcontent=browser.response.content
	pdb.set_trace()
	return stdcontent, pdbcontent
Exemplo n.º 16
0
class infs_brsr:
    """This browser will have functions useful to someone
    browsing the Infusionsoft front end programatically.
    """

    def __init__(self, appname, username, password, *args, **kwargs):
        self.loggedin=False
        self.browser=RoboBrowser(history=True)
        self.appname=appname
        self.username=username
        self.password=password
        self.baseurl = 'https://' + self.appname + '.infusionsoft.com'

    def openbase(self):
        self.browser.open(self.baseurl)

    def login(self):
        self.openbase()
        loginform = self.browser.get_form()
        loginform.fields['username'].value = self.username
        loginform.fields['password'].value = self.password
        self.browser.submit_form(loginform)
        # This next step is probably a bad idea.  It needs
        # some form of control
        self.browser.follow_link(self.browser.get_links()[1])
        self.loggedin=True

    def getapikey(self):
        if not self.loggedin:
            self.login()
        self.browser.open(self.baseurl + 'app/miscSetting/itemWrapper?systemId=nav.admin&settingModuleName=Application&settingTabName=Application')
        pageSoup = BeautifulSoup(self.browser.response.content, 'html.parser')
        self.apikey=pageSoup.findAll(id='Application_Encrypted_Key:_data')[0].text
        return self.apikey

    def importContactCSV(self, pathToCSV='/home/jlmarks/importme.csv'):
        if not self.loggedin:
            self.login()
        importURL = "https://" + self.appname + ".infusionsoft.com/Import/jumpToWizard.jsp?update=false&profileClass=com.infusion.crm.db.importer.profiles.ContactProfile"
        self.browser.open(importURL)
        frms = self.browser.get_forms()
        for eachform in frms:
            if 'id' in eachform.fields.keys():
                self.thisimportid=eachform['id'].value
                correctform = eachform
        correctform.fields.pop('Back')
        correctform.fields['importFile'].value=open(pathToCSV, 'rb')
        self.browser.submit_form(correctform)
Exemplo n.º 17
0
def fetch():
    USERNAME = '******'
    PASSWORD = '******'
    result_no = 0
    br = RoboBrowser()
    br.open(LOGIN_URL)
    print(br)
    br.get_form(id="fm1")
    br['username'].value = USERNAME
    br['password'].value = PASSWORD
    resp = br.submit()

    # Automatic redirect sometimes fails, follow manually when needed
    if 'Redirecting' in br.title():
        resp = br.follow_link(text_regex='click here')
        print(resp)


# Loop through the searches, keeping fixed query parameters
    for actor in VARIABLE_QUERY:
        # I like to watch what's happening in the console
        print >> sys.stderr, '***', actor
        # Lets do the actual query now
        br.open(SEARCH_URL + FIXED_QUERY + actor)
        # The query actually gives us links to the content pages we like,
        # but there are some other links on the page that we ignore
        nice_links = [
            l for l in br.links()
            if 'good_path' in l.url and 'credential' in l.url
        ]
        if not nice_links:  # Maybe the relevant results are empty
            break
        for link in nice_links:

            response = br.follow_link(link)
            # More console reporting on title of followed link page
            print(sys.stderr, br.title())
            # Increment output filenames, open and write the file
            result_no += 1
            out = open('result%d' % result_no, 'w')
            print(out, response.read())
            out.close()
            # Nothing ever goes perfectly, ignore if we do not get page
            #  except RoboBrowser:
            #     print(sys.stderr, "Response error (probably 404)")
            # Let's not hammer the site too much between fetches
            time.sleep(1)
Exemplo n.º 18
0
    def gather(self):
        browser = RoboBrowser()
        page = 0
        browser.open(self.url)

        while (page < self.max_pages):
            links = browser.get_links()

            if(page == 0):
                for link in links:
                    self.list.append(link)
            else:
                for link in links:
                     if(self.isInTheList(link)):
                        self.list.append(link)

            browser.follow_link(self.list[page])
            page+=1
Exemplo n.º 19
0
    def gather(self):
        browser = RoboBrowser()
        page = 0
        browser.open(self.url)

        while (page < self.max_pages):
            links = browser.get_links()

            if (page == 0):
                for link in links:
                    self.list.append(link)
            else:
                for link in links:
                    if (self.isInTheList(link)):
                        self.list.append(link)

            browser.follow_link(self.list[page])
            page += 1
Exemplo n.º 20
0
def _login():

    username = input('User ID: ')
    password = getpass('Password: '******'html.parser')

    browser.open('http://online.lloydsbank.co.uk/personal/logon.login.jsp')

    form = browser.get_form('frmLogin')
    form['frmLogin:strCustomerLogin_userID'] = username
    form['frmLogin:strCustomerLogin_pwd'] = password
    browser.submit_form(form)

    mem_info = getpass('Memorable information: ').lower()

    form_name = 'frmentermemorableinformation1'
    option_name = ':strEnterMemorableInformation_memInfo{}'
    form = browser.get_form(form_name)
    indices = re.findall('Character (\d+) :', form.parsed.text)
    indices = [int(x) for x in indices]

    for i, idx in enumerate(indices):
        form[form_name +
             option_name.format(i + 1)] = '&nbsp;' + mem_info[idx - 1]
    browser.submit_form(form)

    assert 'Lloyds Bank - Personal Account Overview' in browser.parsed.title

    accounts = {}
    for link in browser.get_links():
        if 'lnkAccName' in link.attrs.get('id', ''):
            accounts[link.text] = link

    print('Accounts:', list(accounts))

    account = input('Account: ')

    browser.follow_link(accounts[account])
    export_link = browser.get_link(title='Export')
    browser.follow_link(export_link)

    return browser
Exemplo n.º 21
0
    def test_add_valid_link__when_the_link_is_invalid__then_the_link_is_not_in_list(
            self):
        links_finder = LinksFinder(ANY_NOT_SECURED_URL)
        link = MagicMock()
        browser = RoboBrowser(parser=PARSER, history=True)
        browser.follow_link = MagicMock(side_effect=RoboError)

        links_finder.add_valid_link(browser, link)
        actual_url_list = links_finder.url_list

        self.assertTrue(link not in actual_url_list)
Exemplo n.º 22
0
def download_internal(user_id, from_date, to_date):
    """Download the csv files for the transaction between the given dates"""
    # Create the browser and open the lloyds login page
    browser = RoboBrowser(parser='html5lib')
    browser.open(
        'https://online.lloydsbank.co.uk/personal/logon/login.jsp?WT.ac=hpIBlogon'
    )

    while 'Enter Memorable Information' not in browser.parsed.title.text:
        print(browser.parsed.title.text)
        form = browser.get_form(id='frmLogin')
        form['frmLogin:strCustomerLogin_userID'] = str(user_id)
        form['frmLogin:strCustomerLogin_pwd'] = prompt('Enter password: '******'re logged in, now enter memorable information
    print(browser.parsed.title.text)
    form = browser.get_form(id='frmentermemorableinformation1')
    field = 'frmentermemorableinformation1:strEnterMemorableInformation_memInfo{0}'

    for i in range(1, 4):
        label = browser.find("label", {"for": field.format(i)})
        form[field.format(i)] = '&nbsp;' + prompt(label.text.strip())
    browser.submit_form(form)

    # hopefully now we're logged in...
    print(browser.parsed.title.text)
    links = []
    for link in browser.get_links("View statement"):
        if link.text == "View statement":
            links.append(link)

    # loop through all accounts
    for index, link in enumerate(links):
        acc_name = link['data-wt-ac'].split(" resource")[0]
        print(acc_name)
        print(browser.parsed.title)
        browser.follow_link(link)
        yield acc_name, download_account_internal(browser, from_date, to_date)
        browser.back()
Exemplo n.º 23
0
def gatherData(user, password):
    baseURL = 'https://sigarra.up.pt/feup/pt/'
    browser = RoboBrowser(history=True, parser='html.parser')
    browser.open(baseURL + 'web_page.Inicial')

    # Gets the login form
    form = browser.get_form(action=re.compile(r'validacao'))

    # Updates the login form with the user credentials
    form['p_user'].value = 'up' + user
    form['p_pass'].value = password

    browser.submit_form(form)

    # Goes to the user profile
    browser.open(baseURL + 'fest_geral.cursos_list?pv_num_unico=' + user)

    # Opens the extended view
    extended = browser.find(title='Visualizar informações no contexto do curso')
    browser.follow_link(extended)

    credits = []
    grades = []

    # For each html class containing grades ("i", "p" and "o"), gather data
    for i in browser.find_all(class_='i'):
        if i.find(class_='n aprovado'):
            credits.append(i.find(class_='k n').text)
            grades.append(i.find(class_='n aprovado').text)

    for j in browser.find_all(class_='p'):
        if j.find(class_='n aprovado'):
            credits.append(j.find(class_='k n').text)
            grades.append(j.find(class_='n aprovado').text)

    for k in browser.find_all(class_='o'):
        if k.find(class_='n aprovado'):
            credits.append(k.find(class_='k n').text)
            grades.append(k.find(class_='n aprovado').text)

    return credits, grades
Exemplo n.º 24
0
def Revigo_ana(Go_string):
    from robobrowser import RoboBrowser
    import re
    import lxml

    br = RoboBrowser(parser="lxml")
    br.open("http://revigo.irb.hr/")

    form = br.get_form()
    form["goList"].value = Go_string

    br.submit_form(form)

    download_csv_link = br.find("a", href=re.compile("export.jsp"))
    br.follow_link(download_csv_link)
    csv_content = br.response.content.decode("utf-8")
    # write results to file
    f = open('Revigo_Analysis_results.csv', 'w')
    f.write(csv_content)
    f.close()
    print("Revigo results written to file: Revigo_Analysis_results")
Exemplo n.º 25
0
def scrape_revigo_csv(input_GOstats_tsv,
                      out_file,
                      pvalue_cutoff=0.05,
                      fdr_cutoff=1.0):
    """ 
    """
    oh = open(out_file, "w")

    # get input goterms from GOstats result
    goterms = GOstats2Revigo(input_GOstats_tsv,
                             pvalue_cutoff=pvalue_cutoff,
                             fdr_cutoff=fdr_cutoff,
                             output_column=3)
    if goterms:
        br = RoboBrowser(parser="lxml")
        br.open("http://revigo.irb.hr/")

        form = br.get_form()
        #print(form)
        form["goList"].value = goterms

        br.submit_form(form)

        download_rsc_link = br.find("a", href=re.compile("toR.jsp"))
        br.follow_link(download_rsc_link)
        #r_code = br.response.content.decode("utf-8")
        #print(r_code)

        br.back()

        download_csv_link = br.find("a", href=re.compile("export.jsp"))
        br.follow_link(download_csv_link)
        csv_content = br.response.content.decode("utf-8")
        oh.write(csv_content)
    else:
        oh.write(
            "term_ID,description,frequency,plot_X,plot_Y,plot_size,log10 p-value,userVal_2,uniqueness,dispensability,representative,eliminated"
        )

    oh.close()
Exemplo n.º 26
0
def scrape_cosmo(url,email,password ):
    browser = RoboBrowser()
    browser.open(tlink)
    search = browser.get_form()
    search[ 'user[email]' ] = str(email)
    search[ 'user[password]' ] = str(password)
    browser.submit_form(search,submit=search.submit_fields['commit'])

    # main page
    browser.follow_link(browser.find_all('a')[2])
    # missions
    browser.follow_link(browser.find_all('a')[17])

    # find deadlines
    deadlines_tags = list(filter( lambda x:x['class']==['table-end-at'], browser.find_all('td')   ) )
    deadlines = list( map (lambda x: (list(x))[0] if list(x) else 'not yet', deadlines_tags ))


    curr_yr = datetime.now().year

    #returns a list of datetime objects
    return mapper( lambda x:  datetime.strptime( f"{curr_yr} {x}",  '%Y %d %b %H:%M') if x!='not yet' else 'Not yet', deadlines)
Exemplo n.º 27
0
def get_content(novel, author):
    """
    get novel all content
    return content string
    """
    browser = RoboBrowser(parser="html.parser",
                          history=True,
                          timeout=30,
                          tries=5)
    novel_link = novel.find('td', class_='tal').a
    link = host + novel_link['href']
    time.sleep(random.randint(R_START, R_END))
    # browser.follow_link(novel_link)
    try:
        browser.open(link)
    except:
        print('link failed', link)
        return ''
    else:
        print('novel link', browser.url)
    contents = list()
    # look all page in a novel
    while True:
        content = get_cell_content(browser, author)
        contents.append(content)
        if is_end_page(browser):
            break
        time.sleep(random.randint(R_START, R_END))
        next_page_link = next_page(browser)
        if next_page_link is None:
            break
        try:
            browser.follow_link(next_page_link)
        except:
            print('link failed', browser.url)
            continue
        else:
            print('page link', browser.url)
    return "\n".join(contents)
Exemplo n.º 28
0
def GeneOntology(name):
    print("Starting GeneOntology for " + name)
    br = RoboBrowser(parser="html.parser")
    br.open("http://geneontology.org/")

    form = br.get_forms()[1]

    geneinput = form["input"]
    species = form["species"]

    form["species"].value = "IXOSC"

    os.chdir("/home/david/Documents/blast/Blastfiles/outputfiles/Genelists")
    os.listdir(".")
    file = open(name, "r")

    string = ""

    for line in file.readlines():
        #print(line)
        string = string + line

    form["input"] = string
    #print(form)
    br.submit_form(form)

    #print(br.find_all())
    #DebugHtml(str(br.parsed))

    table_link = br.find("a",
                         href=re.compile("/tools/compareToRefListTxt.jsp"))
    br.follow_link(table_link)
    csv_content = br.response.content.decode("utf-8")

    savefile = open("GOoutput/" + name, "w")
    savefile.write(csv_content)
    savefile.close()
    print("finished")
def get_webdav_urls(username, password):

    # log in

    browser = RoboBrowser(history=True)
    browser.open('http://ctools.umich.edu')
    browser.follow_link(browser.find(id='ctoolsLogin'))

    login_form = browser.get_form()
    login_form['login'].value = username
    login_form['password'].value = password
    browser.submit_form(login_form)

    # get the results

    browser.follow_link(browser.find(
        class_='toolMenuLink ',
        title='For creating, revising, and deleting course and project sites'
    ))
    browser.open(browser.find(class_='portletMainIframe').attrs['src'])

    results = []

    course_links = browser.select('#sitesForm td h4 a[target="_top"]')
    for course_link in course_links:

        if not course_link.attrs:
            continue
        href = course_link.attrs['href']
        if '~' in href:
            continue

        results.append(
            'https://ctools.umich.edu/dav' +
            findall('\/[^\/]+$', href)[0]
        )

    return results
Exemplo n.º 30
0
def autoRevigo(name):
    name = name
    os.chdir("/home/david/Documents/BenoitLab/RNA-seq/Gprofiler/")
    os.listdir(".")
    file = open(name, "r")

    string = ""

    for line in file.readlines():
        # print(line)
        string = string + line + "\n"

    goterms = string

    br = RoboBrowser(parser="html")
    br.open("http://revigo.irb.hr/")

    form = br.get_form()
    form["goList"].value = goterms

    br.submit_form(form)

    download_rsc_link = br.find("a", href=re.compile("toR.jsp"))
    br.follow_link(download_rsc_link)
    r_code = br.response.content.decode("utf-8")
    print(r_code)

    br.back()

    download_csv_link = br.find("a", href=re.compile("export.jsp"))
    br.follow_link(download_csv_link)
    csv_content = br.response.content.decode("utf-8")

    writefile = open("/home/david/Documents/BenoitLab/RNA-seq/Revigo/" + name,
                     "w")

    writefile.write(csv_content)
    writefile.close()
Exemplo n.º 31
0
    def pushedbutton(self, b):
        account = self.lineEdit.text()
        pasw = self.lineEdit_3.text()
        #use robobrowser module to manipulate web page
        browser = RoboBrowser(history=True)
        browser.open('http://web1.cmu.edu.tw/stdinfo/login.asp')
        form1 = browser.get_form(id='form1')
        form1['f_id'].value = account
        form1['f_pwd'].value = pasw
        browser.submit_form(form1)
        if browser.state.url == "http://web1.cmu.edu.tw/stdinfo/loginerr.asp":
            self.lineEdit_2.setText('帳號密碼錯了?')
        else:
            self.lineEdit_2.setText('成功登入,填寫中....')
            link_one = browser.get_link(text='教師教學意見調查')
            browser.follow_link(link_one)
            list = []
            for l in browser.get_links(text='填寫'):
                list.append(l)
            list.pop(0)
            for li in list:
                browser.follow_link(li)
                form2 = browser.get_form(id='thisform')
                form2['CH_1'].value = '3'
                form2['CH_2'].value = '3'
                form2['CH_3'].value = '3'
                form2['CH_4'].value = '3'
                form2['CH_5'].value = '3'
                form2['CH_6'].value = '3'
                form2['CH_7'].value = '3'
                form2['CH_8'].value = '3'
                form2['CH_9'].value = '3'
                form2['CH_10'].value = '3'

                browser.submit_form(form2)
            self.lineEdit_2.setText('Done!')
Exemplo n.º 32
0
 def pushedbutton(self,b):
     account = self.lineEdit.text()
     pasw = self.lineEdit_3.text()
     #use robobrowser module to manipulate web page 
     browser = RoboBrowser(history = True)
     browser.open('http://web1.cmu.edu.tw/stdinfo/login.asp')
     form1 = browser.get_form(id = 'form1')
     form1['f_id'].value = account
     form1['f_pwd'].value = pasw
     browser.submit_form(form1)
     if browser.state.url == "http://web1.cmu.edu.tw/stdinfo/loginerr.asp":
         self.lineEdit_2.setText('帳號密碼錯了?')
     else:
         self.lineEdit_2.setText('成功登入,填寫中....')
         link_one = browser.get_link(text = '教師教學意見調查')
         browser.follow_link(link_one)
         list = []
         for l in browser.get_links(text = '填寫'): 
             list.append(l)
         list.pop(0)
         for li in list:
             browser.follow_link(li)
             form2 = browser.get_form(id = 'thisform')
             form2['CH_1'].value = '3'
             form2['CH_2'].value = '3'
             form2['CH_3'].value = '3'
             form2['CH_4'].value = '3'
             form2['CH_5'].value = '3'
             form2['CH_6'].value = '3'
             form2['CH_7'].value = '3'
             form2['CH_8'].value = '3'
             form2['CH_9'].value = '3'
             form2['CH_10'].value = '3'
                 
             browser.submit_form(form2)
         self.lineEdit_2.setText('Done!')
Exemplo n.º 33
0
    def getExcelFromWoS(self, url, mark, totalMarked, outputLocationPath):
        mark_from = str(mark)
        sres = self.sres

        if mark + 499 > int(totalMarked):
            mark_to = totalMarked
        else:
            mark_to = str(mark + 499)

        sres.print(command='log', msg='[%s-%s레코드] 엑셀을 받을 준비를 합니다.'%(mark_from, mark_to))
        
        excelBrowser = RoboBrowser(history=True, parser='lxml')
        excelBrowser.open(url)
        reportLink = excelBrowser.select('a.citation-report-summary-link')

        if len(reportLink) == 0: 
            sres.print(command='err', msg='[%s-%s레코드] 요약 보고서가 없습니다.'%(mark_from, mark_to))
            return None

        sres.print(command='log', msg='[%s-%s레코드] 요약 보고서를 엽니다.'%(mark_from, mark_to))
        excelBrowser.follow_link(reportLink[0])

        summary_records_form = excelBrowser.get_form(id='summary_records_form')
        qid = summary_records_form['qid'].value
        filters = summary_records_form['filters'].value
        sortBy = summary_records_form['sortBy'].value
        timeSpan = summary_records_form['timeSpan'].value
        endYear = summary_records_form['endYear'].value
        startYear = summary_records_form['startYear'].value
        rurl = summary_records_form['rurl'].value


        piChart = summary_records_form['piChart'].value
        toChart = summary_records_form['piChart'].value

        makeExcelURL = "http://apps.webofknowledge.com/OutboundService.do?"
        makeExcelParam = ""
        makeExcelParam += "action=go"
        makeExcelParam += "&save_options=xls"

        makeExcelURL += makeExcelParam

        sres.print(command='log', msg='[%s-%s레코드] 엑셀 데이터 제작을 요청합니다.'%(mark_from, mark_to))
        excelBrowser.session.post(makeExcelURL, data={
            "selectedIds": "",
            "displayCitedRefs":"",
            "displayTimesCited":"",
            "displayUsageInfo":"true",
            "viewType":"summary",
            "product":"WOS",
            "rurl":rurl,
            "mark_id":"WOS",
            "colName":"WOS",
            "search_mode":"CitationReport",
            "view_name":"WOS-CitationReport-summary",
            "sortBy": sortBy,
            "mode":"OpenOutputService",
            "qid":qid,
            "SID":self.SID,
            "format":"crsaveToFile",
            "mark_to":mark_to,
            "mark_from":mark_from,
            "queryNatural":"",
            "count_new_items_marked":"0",
            "use_two_ets":"false",
            "IncitesEntitled":"no",
            "value(record_select_type)":"range",
            "markFrom":mark_from,
            "markTo":mark_to,
            "action":"recalulate",
            "start_year_val":"1900",
            "end_year_val":"2019",
            "viewAbstractUrl":"",
            "LinksAreAllowedRightClick": "full_record.do",
            "filters":filters,
            "timeSpan": timeSpan,
            "db_editions": "",
            "additional_qoutput_params": "cr_qid="+qid,
            "print_opt":"Html",
            "include_mark_from_in_url":"true",
            "endYear":endYear,
            "startYear":startYear,
            "piChart":piChart,
            "toChart":toChart,
            "fields":"DUMMY_VALUE"
        })

        ExcelActionURL = "https://ets.webofknowledge.com"
        ExcelAction = "/ETS/ets.do?"
        
        ExcelParam = "mark_from=1"
        ExcelParam += "&product=UA"
        ExcelParam += "&colName=WOS"
        ExcelParam += "&displayUsageInfo=true"
        ExcelParam += "&parentQid=" + qid
        ExcelParam += "&rurl=" + requests.utils.quote(rurl)
        ExcelParam += "&startYear=" + startYear
        ExcelParam += "&mark_to=" + mark_to
        ExcelParam += "&filters=" + requests.utils.quote(filters)
        ExcelParam += "&qid=" + str(int(qid)+1)
        ExcelParam += "&endYear=" + endYear
        ExcelParam += "&SID=" + self.SID
        ExcelParam += "&totalMarked=" + totalMarked
        ExcelParam += "&action=crsaveToFile"
        ExcelParam += "&timeSpan=" + requests.utils.quote(timeSpan)
        ExcelParam += "&sortBy=" + sortBy
        ExcelParam += "&displayTimesCited=false"
        ExcelParam += "&displayCitedRefs=true"
        ExcelParam += "&fileOpt=xls"
        ExcelParam += "&UserIDForSaveToRID=null"

        ExcelActionURL += ExcelAction
        ExcelActionURL += ExcelParam

        sres.print(command='log', msg='[%s-%s레코드] 엑셀 데이터를 다운로드 받습니다.'%(mark_from, mark_to))
        res = requests.get(ExcelActionURL)
        if res.text.find("<html>") > 0 or res.text.find("Error report</title>") > 0:
            sres.print(command='err', msg='%s-%s 레코드, 서버가 에러를 반환'%(mark_from, mark_to))
        
        ofileName = "%X"%random.getrandbits(128)
        with open(ofileName, 'wb') as rsFile:
            rsFile.write(res.content)
            rsFile.close()
        resPD = pd.read_excel(ofileName, header=26)
        os.remove(ofileName)

        return resPD
Exemplo n.º 34
0
class MarketPoster:
    def __init__(self):
        self.login_url = 'http://forums.zybez.net/index.php?' \
                         'app=curseauth&module=global&section=login'
        self.browser = RoboBrowser(history=False, parser='html.parser')
        self.logged_in = False

    def login(self, login_name, login_password):
        login_name = login_name
        login_password = login_password
        self.browser.open(self.login_url)

        sign_in_form = self.browser.get_form(class_='authentication-box')
        sign_in_form['ips_username'].value = login_name
        sign_in_form['ips_password'].value = login_password
        self.browser.submit_form(sign_in_form)

        correct_url = 'http://forums.zybez.net/index.php'
        if self.browser.url == correct_url:
            self.logged_in = True
            return True
        else:
            return False

    def deleteItemPosts(self, post):
        item_url = self.getItemURL(post.item_name)
        self.browser.open(item_url)
        items_to_delete = self.browser.get_links(href=re.compile(
                "do=trade-delete"))
        for i in items_to_delete:
            self.browser.follow_link(i)
        self.browser.open(item_url)

    def deleteAllPosts(self):
        self.browser.open('http://forums.zybez.net/runescape-2007-prices')
        delete_button = self.browser.get_link('Remove all active offers')
        if delete_button is not None:
            self.browser.follow_link(delete_button)

    def getItemURL(self, item_name):
        item_name = item_name.split()
        item_name = '+'.join(item_name)

        item_data_url = 'http://forums.zybez.net' \
                        '/runescape-2007-prices/api/item/' + item_name

        item_data_dict = self.browser.session.get(item_data_url).json()
        item_id = item_data_dict['id']
        item_url = 'http://forums.zybez.net/runescape-2007-prices/' \
                   + str(item_id) + '-' + item_name
        return item_url

    def postItem(self, post):
        price = post.price
        quantity = post.quantity
        note = post.note
        offer_type = post.offer_type
        contact_method = post.contact_method

        self.deleteItemPosts(post)

        post_item_form = self.browser.find(
            action='http://forums.zybez.net/index.php?app=priceguide&module='
                   'public&section=action&do=trade-add')
        post_item_form = self.browser.get_form(post_item_form)

        # Fill out and submit form
        post_item_form['type'].value = str(int(offer_type))
        post_item_form['qty'].value = quantity
        post_item_form['price'].value = price
        post_item_form['notes'].value = note
        post_item_form['contact'].value = str(int(contact_method))

        self.browser.submit_form(post_item_form)
    def attack(self):
        user_agent = 'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:69.0) Gecko/20100101 Firefox/69.0'
        accept = 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8'
        accept_language = 'en-US,en;q=0.5'

        s = requests.Session()
        s.headers['User-Agent'] = user_agent
        s.headers['Accept'] = accept
        s.headers['Accept-Language'] = accept_language

        robo = RoboBrowser(session=s, history=True, parser='html.parser')
        robo.open(self.url)
        if self.atr_form is "id":
            form = robo.get_form(id=self.atr_value)
        elif self.atr_form is "class":
            form = robo.get_form(class_=self.atr_value)
        elif self.atr_form is "name":
            form = robo.get_form(name=self.atr_value)
        elif self.atr_form is "action":
            form = robo.get_form(action=self.atr_value)
        else:
            self.log.append("Tidak Menemukan Form Login")
            return None

        #melakukan input salah sebagai kondisi gagal login saat melakukan Brute Force
        form[self.name_input[0]].value = "xxxxx"
        form[self.name_input[1]].value = "xxxxx"
        robo.submit_form(form)
        urlFailed = str(robo.url)

        #melakukan serangan Brute
        for username in self.user_list:
            for password in self.pass_list:
                robo.open(self.url)
                form[self.name_input[0]].value = username
                form[self.name_input[1]].value = password
                robo.submit_form(form)
                url = str(robo.url)
                if url != urlFailed:
                    self.log_csv.append(["Brute Force", self.url])
                    self.log.append("login is success or you has been locked out of attempts")
                    self.log.append("Url after login : "******"Username        : "******"Password        : "******"Sensitive Data Exposed", self.url])
                            self.log.append("url contain sensitive data maybe have vulnerability")
                    try:
                        #mencari SESSION PHPSESSID
                        sess = robo.session.cookies['PHPSESSID']
                        self.log_csv.append(["Session ID Exposed", self.url])
                        self.log.append("found PHPSESSID maybe have vulnerability fixation attack")
                        self.log.append("PHPSESSID : " + sess)

                        #mencoba logout dan kembali
                        urlLog = robo.url
                        linkLogout = robo.get_link(text="logout")
                        if linkLogout is not None:
                            robo.follow_link(linkLogout)
                            robo.back(n=1)
                            if robo.url == urlLog:
                                self.log.append("session not destroyed maybe have vulnerabilty")
                    except:
                        pass
                    return
                time.sleep(5)
        self.log.append("Brute Force failed - Login not successfull")
        return
Exemplo n.º 36
0
class tat:
    global pw

    def __init__(self, appname=None):
        self.startingpath = os.path.abspath(os.curdir)
        if not appname:
            self.appname = self.getappname()
        else:
            self.appname = appname
        self.apppath = os.path.join(self.startingpath, self.appname)
        if not os.path.exists(self.apppath):
            os.mkdir(self.apppath)
        os.chdir(self.apppath)
        self.mapping = {}
        self.mapping["Contact"] = -1
        self.mapping["Affiliate"] = -3
        self.mapping["ContactAction"] = -5
        self.mapping["Company"] = -6
        self.mapping["OrderItem"] = -9

        self.menu()

    def menu(self, context="initial"):
        if context is "initial":
            self.baseurl = "https://" + self.appname + ".infusionsoft.com/"
            self.apikey = self.getapikey()
            self.svr = ISServer.ISServer(self.appname, self.apikey)
            if not os.path.exists(self.apppath):
                os.mkdir(self.apppath)
            os.chdir(self.apppath)
            if not os.path.exists("files"):
                os.mkdir("files")
            os.chdir("files")
            self.usermenu = {}
            self.usermenu["downloadAPITables"] = "apit"
            self.usermenu["play"] = "play"
            self.usermenu["reports"] = "rpts"
        # for eachitem in self.usermenu.keys():
        #     print eachitem + ":\t" + self.usermenu[eachitem]
        # thisChoice = raw_input('please make a choice: ').strip(' \n\t')
        thisChoice = "play"
        if thisChoice == "apit":
            self.handleAPItables()
        elif thisChoice == "play":
            self.play()
        elif thisChoice == "rpts":
            self.downloadAllReports()
        else:
            self.inchandlefiles()

    def handlefiles(self):
        os.chdir(self.startingpath)
        if not os.path.exists("files"):
            os.mkdir("files")
        os.chdir("files")
        allfiles = self.svr.getAllRecords("FileBox")
        for eachfile in allfiles:
            downloadurl = self.baseurl + "Download?Id=" + str(eachfile["Id"])
            self.browser.open(downloadurl)
            fileoutpath = os.path.join(self.startingpath, "files", eachfile["ContactId"], eachfile["FileName"])
            if not os.path.exists(os.path.dirname(fileoutpath)):
                os.makedirs(fileoutpath)
            fout = open(fileoutpath, "wb")
            fout.write(self.browser.response.content)
            fout.close()

    def inchandleAPItables(self):
        apidata = {}
        self.customfields = self.svr.getAllRecords("DataFormField")
        for eachtable in ISServer.tables.keys():
            if eachtable not in [
                "LeadSourceExpense",
                "DataFormTab",
                "GroupAssign",
                "AffResource",
                "InvoiceItem",
                "UserGroup",
                "CProgram",
                "ActionSequence",
                "Template",
                "LeadSource",
                "Status",
                "Campaignee",
                "DataFormField",
                "OrderItem",
                "DataFormGroup",
                "ProductOptValue",
                "ContactGroup",
                "Company",
                "TicketStage",
                "ProductCategoryAssign",
                "ContactGroupAssign",
            ]:
                print "starting " + eachtable
                if eachtable not in self.mapping.keys():
                    self.mapping[eachtable] = 99
                fields = ISServer.tables[eachtable] + [
                    "_" + fld["Name"] for fld in self.customfields if fld["FormId"] is self.mapping[eachtable]
                ]
                self.svr.incrementlyGetRecords(eachtable, interestingData=fields)
                print "done writing " + eachtable
            else:
                print "already completed " + eachtable
        self.apidata = apidata

    def inchandleAPItable(self, tablename):
        self.customfields = self.svr.getAllRecords("DataFormField")
        if tablename not in self.mapping.keys():
            self.mapping[tablename] = 99
        fields = ISServer.tables[tablename] + [
            "_" + fld["Name"] for fld in self.customfields if fld["FormId"] is self.mapping[tablename]
        ]
        self.svr.incrementlyGetRecords(tablename, interestingData=fields)
        print "done writing " + tablename

    def inchandlefiles(self):
        os.chdir(self.startingpath)
        self.svr.incgetfiles(self.browser)

    def downloadContact0files(self, numberofmostrecentfilestodownload):
        thesefiles = self.svr.getAllRecords("FileBox", searchCriteria={"ContactId": 0})
        for eachfile in thesefiles[-int(numberofmostrecentfilestodownload) :]:
            print "doing " + str(eachfile)
            self.svr.getfile(self.browser, eachfile)

    def play(self):
        print "she's all yours captain!"

    def downloadAReport(self, reportname):
        self.browser.open(self.baseurl + "Reports/exportResults.jsp?reportClass=" + reportname)
        reportForm = [eachform for eachform in self.browser.get_forms() if eachform.action == "qbExport.jsp"]
        if len(reportForm) > 0:
            self.browser.submit_form(reportForm[0], submit=reportForm[0].submit_fields["process"])
            with open(reportname + ".csv", "wb") as outfile:
                outfile.write(self.browser.response.content)
        else:
            print "no " + reportname

    def downloadAllReports(self):
        for reportname in [
            "AffiliateActivitySummary",
            "AffiliateLedger",
            "AffiliateRedirectActivity",
            "AffiliateReferral",
            "AffPayout",
            "AllOrders",
            "AllSales",
            "AllSalesItemized",
            "ARAgingReport",
            "CampaigneeBasic",
            "CampaigneeByDay",
            "CampaignProductConversion",
            "ClickThroughPercentage",
            "ClickThroughPercentageByEmail",
            "ContactDistributed",
            "CProgramRevenueSummary",
            "CreditCard",
            "CreditsIssued",
            "CustomerLifetimeValue",
            "DailyPayments",
            "DailyReceivables",
            "DailySalesTotals",
            "DashboardCampaign",
            "DashboardEmail",
            "DashboardLeads",
            "DashboardOrders",
            "DashboardUsers",
            "DigitalProductKey",
            "EmailBatchSearch",
            "EmailBroadcastConversionReport",
            "EmailConversion",
            "EmailSentSearch",
            "FailedCharge",
            "FaxBatchSearch",
            "FollowUpSequenceConversionReport",
            "FunnelFlowRecipient",
            "FunnelFlowRecipientWaiting",
            "FunnelGoalAchieved",
            "FunnelQueuedFlowItem",
            "FunnelUniqueContacts",
            "GroupAdds",
            "HabeasDetail",
            "InvoiceNetIncome",
            "LeadSourceConversion",
            "LeadSourceIncome",
            "LeadSourceROI",
            "LeadSourceROIByCategory",
            "MonthlyPayments",
            "MonthlyReceivables",
            "MonthlySalesTotals",
            "MonthlySalesTotalsByProduct",
            "OptOutSearch",
            "PaymentsReport",
            "PieceResponse",
            "ProductNetIncome",
            "Receivables",
            "RevenueForecastReport",
            "TaskSearch",
            "VoiceBatchSearch",
            "VoiceOptOutSearch",
            "WebformActivitySummary",
            "WebFormTracking",
        ]:
            self.downloadAReport(reportname)

    def getFilePath(self):
        return tkFileDialog.askopenfilename()

    def getFolderPath(self):
        return tkFileDialog.askdirectory()

    def getappname(self):
        return raw_input("Please enter appname:").strip("\n \t")

    def getapikey(self):
        global pw
        username = pw["username"]
        password = pw["password"]
        # Basically:
        #    #Add username and password to your global variables.
        self.browser = RoboBrowser(history=True)
        self.browser.open(self.baseurl)
        logform = self.browser.get_form()
        logform.fields["username"].value = username
        logform.fields["password"].value = password
        self.browser.submit_form(logform)
        self.browser.follow_link(self.browser.get_links()[1])
        self.browser.open(
            self.baseurl
            + "app/miscSetting/itemWrapper?systemId=nav.admin&settingModuleName=Application&settingTabName=Application"
        )
        pageSoup = BeautifulSoup(self.browser.response.content, "html.parser")
        return pageSoup.findAll(id="Application_Encrypted_Key:_data")[0].text

    def handleAPItables(self):
        apidata = {}
        self.customfields = self.svr.getAllRecords("DataFormField")
        for eachtable in ISServer.tables.keys():
            print "starting " + eachtable
            if eachtable not in self.mapping.keys():
                self.mapping[eachtable] = 99
            fields = ISServer.tables[eachtable] + [
                "_" + fld["Name"] for fld in self.customfields if fld["FormId"] is self.mapping[eachtable]
            ]
            apidata[eachtable] = self.svr.getAllRecords(eachtable, interestingData=fields)
            with open(eachtable + ".csv", "wb") as outfile:
                writer = csv.DictWriter(outfile, fields)
                writer.writeheader()
                writer.writerows(apidata[eachtable])
            print "done writing " + eachtable
        self.apidata = apidata

    def handlewebforms(self):
        # for eachid
        # webformsubmissionpath="https://" + self.appname + ".infusionsoft.com/app/webformSubmission/contactTabDetails?customFormWebResultId=" + str(x)
        pass

    def creditCardsToCSV(self):
        ccs = self.svr.getAllRecords(
            "CreditCard",
            interestingData=[
                "Id",
                "ContactId",
                "CardType",
                "Last4",
                "ExpirationMonth",
                "ExpirationYear",
                "Email",
                "StartDateMonth",
                "StartDateYear",
                "Status",
            ],
        )
        os.chdir(self.startingpath)
        if not os.path.exists("pyDatas"):
            os.mkdir("pyDatas")
        os.chdir("pyDatas")
        with open("ccs.csv", "wb") as outfile:
            thiswriter = csv.DictWriter(outfile, ccs[0].keys())
            thiswriter.writeheader()
            thiswriter.writerows(ccs)
        print "File written to " + str(os.path.abspath(os.curdir))
        os.chdir(self.startingpath)

    def contactsToCSV(self):
        os.chdir(self.startingpath)
        self.customfields = self.svr.getAllRecords("DataFormField")
        fields = ISServer.tables["Contact"] + ["_" + fld["Name"] for fld in self.customfields if fld["FormId"] == -1]
        cons = self.svr.getAllRecords("Contact", interestingData=fields)
        if not os.path.exists("pyDatas"):
            os.mkdir("pyDatas")
        os.chdir("pyDatas")
        with open("contacts.csv", "wb") as outfile:
            thiswriter = csv.DictWriter(outfile, cons[0].keys())
            thiswriter.writeheader()
            thiswriter.writerows(cons)
	print "Repository: " + link.select('td.repo')[0].text.encode("utf-8").strip()
	print "User: "******"utf-8").strip()
	print "Title: " + link.select('td.title')[0].select('a.execute')[0].text.encode("utf-8").strip()
	print "Updated " + link.select('td.date')[0].text.encode("utf-8").strip()
	print "\n----------------------"
#obtain links with beautifulSoup
links = browser.find_all('a')
for link in links:
	try:
		#print(link.get('href'))
		if not link['href'].startswith("https"):
			link['href']='https://bitbucket.org'+link['href'].encode("utf-8").strip()
			#link['href']='/odigeoteam/frontend-html5'
		print link['href']
		#print link
		browser.follow_link(link)
	
		branches = browser.select('li.branches')
		if len(branches)>0 :
			print 'branches '+ branches[0].select('span.value')[0].text
	
		tags = browser.select('li.tags')
		if len(tags)>0 :
			print 'tags' + tags[0].select('span.value')[0].text
	
		enlaces = browser.find_all('a')
		#print enlaces
		for enlace in enlaces:
			if enlace.get('href') == '#forks':
				print 'forks '+ enlace.select('span.value')[0].text
			if enlace.get('href') == '#tags':
Exemplo n.º 38
0
     fp = br.parsed
     #f0 = open('f1.html', 'w')
     #f0.write(str(fp))
     
     #login
     form=br.get_form(id='mod_loginform')
     form['username'].value= 'pygather'
     form['passwd'].value= '1324354657687980'
     br.submit_form(form)
     sp = br.parsed
     #f2 = open('f2.html','w')
     #f2.write(str(sp))
 
     #navigate to quick submit
     for a in br.find_all('a', href=True, text = re.compile('Quick Submit')):
         br.follow_link(a)
     tp = br.parsed
 
 
 
     form = br.get_form(action = re.compile('Itemid=25'))
     # print(form)
     #form.new_control('text','code',{'value':''})
     #form.fixup()
     form['localid'].value=str(curProgram)
     form['language'].value='2'
     form['code'].value='import java.util.*;class Main{public static void main(String[]args) throws Exception{Scanner in = new Scanner(System.in);StringBuilder sb = new StringBuilder();while(in.hasNextLine()){sb.append(in.nextLine());}byte b=(byte)sb.charAt('+str(curByte)+');if((b>>'+str(shift)+'&0x01)==0){throw new Exception("Error");}}}'
     br.submit_form(form)
     #f3 = open('f3.html','w')
     #f3.write(str(tp))
     #print(tp)
Exemplo n.º 39
0
courseTitle = "".join([x if x.isalnum() else "_" for x in courseTitle])
print('Course Url: ' + courseModulesUrl)
print('Course Title: ' + courseTitle)
print('Finding file links of type: ' + args.downloadOnly)
# Make output dir
outputDir = os.path.join('output/', courseTitle)
make_path(outputDir)
# Get modules links with lecture in title
moduleLinks = browser.find_all("a", {"class": "for-nvda"})

print('Found ' + str(len(moduleLinks)) + ' links, (not all will be valid)')

# Process each lecture link
for moduleLink in moduleLinks:
    print('Opening: ' + moduleLink['aria-label'])
    browser.follow_link(moduleLink)
    try:
        # Find link - containing words "download"
        downloadLinkRel = browser.find('a', href=re.compile(r'.*download*'))
        # If failed, find link - containing reference to file "****.XXX"
        if downloadLinkRel is None:
            downloadLinkRel = browser.find('a',
                                           href=re.compile(r'.*\.[a-z]{3,4}$'))
        fileNameWithExtension = downloadLinkRel.text.strip()
        # Check the link is the right filetype
        if args.downloadOnly != 'all' and not fileNameWithExtension.endswith(
                args.downloadOnly):
            print('   not processing (wrong extension): ' +
                  fileNameWithExtension)
            continue
        downloadLinkAbsolute = urlparse.urljoin(courseModulesUrl,
Exemplo n.º 40
0
from robobrowser import RoboBrowser

# Browse to Genius
browser = RoboBrowser(history=True)
browser.open('http://www.genius.com')

# Search for Porcupine Tree
#form = browser.get_form(action='/search')
#form = browser.get_form(class_='global_search global_search--giant')
#form = browser.get_forms()[0]
print form
form['q'].value = 'porcupine tree'
response = browser.submit_form(form)
print respo

# Look up the first song
songs = browser.select('.song_link')
browser.follow_link(songs[0])
lyrics = browser.select('.lyrics')
lyrics[0].text	

# Back to results page
browser.back()

# Look up my favorite song
song_link = browser.get_link('trains')
browser.follow_link(song_link)

# Can also search HTML using regex patterns
lyrics = browser.find(class_=re.compile(r'\blyrics\b'))
lyrics.text
Exemplo n.º 41
0
class StitchBot(object):
    def __init__(self, output_path=None, username=None, password=None):
        self.browser = RoboBrowser(history=True)
        self.output_path = output_path or tempfile.TemporaryDirectory().name

        self.username = username or os.environ['STITCHBOT_USERNAME']
        self.password = password or os.environ['STITCHBOT_PASSWORD']

        self.logger = logger.getChild('StitchBot')

    def log(self, level, method_name, message, *args, **kwargs):
        child_logger = self.logger.getChild(method_name)
        child_logger.log(level, message, *args, **kwargs)

    def scrape(self):
        self.log(logging.INFO, 'scrape', 'Starting scrape')

        self.log_in()
        self.navigate_to_free_pattern()
        scraped_filenames = self.download_pattern()

        self.log(logging.INFO, 'scrape', 'Scrape complete')

        return scraped_filenames

    def log_in(self):
        self.log(logging.INFO, 'log_in', 'Logging in')

        self.browser.open('http://dailycrossstitch.com/my-account/')
        form = self.browser.get_form(class_='login')
        form['username'] = self.username
        form['password'] = self.password
        self.browser.submit_form(form)

        self.log(logging.INFO, 'log_in', 'Logged in')

    def navigate_to_free_pattern(self):
        self.log(
            logging.INFO, 'navigate_to_free_pattern', 'Finding free pattern')

        self.browser.open('http://dailycrossstitch.com/')
        free_button = self.browser.find('a', class_='button', string='FREE')
        self.browser.follow_link(free_button)

        self.log(
            logging.INFO, 'navigate_to_free_pattern', 'Found free pattern')

    def download_pattern(self):
        self.log(logging.INFO, 'download_pattern', 'Downloading pattern')

        download_buttons = self.browser.find_all(
            'a', class_='single_add_to_cart_button')
        download_urls = list(map(itemgetter('href'), download_buttons))
        local_filenames = [
            self.download_pattern_file(url) for url in download_urls]

        self.log(logging.INFO, 'download_pattern', 'Downloaded pattern')

        return local_filenames

    def download_pattern_file(self, url):
        self.log(
            logging.INFO, 'download_pattern_file',
            'Downloading pattern file at {0}'.format(url))

        self.browser.open(url)
        download_script = self.browser.find(
            'script', string=re.compile(r'^\s*function startDownload'))
        if not download_script:
            return

        pdf_url_match = re.search(r'(http.+\.pdf)', download_script.string)
        if not pdf_url_match:
            return

        pdf_url = pdf_url_match.group(1)
        self.browser.open(pdf_url)

        output_filename = self.save_pattern(self.browser.response)

        self.log(
            logging.INFO, 'download_pattern_file',
            'Downloaded pattern file at {0}'.format(url))

        return output_filename

    def save_pattern(self, response):
        self.log(logging.INFO, 'save_pattern', 'Saving pattern')

        try:
            os.makedirs(self.output_path)
        except OSError:
            pass

        filename = self.get_filename(response.headers)
        output_filename = os.path.join(self.output_path, filename)
        with open(output_filename, 'wb') as output_file:
            output_file.write(response.content)

        self.log(
            logging.INFO, 'save_pattern',
            'Saved pattern to {0}'.format(output_filename))

        return output_filename

    def get_filename(self, headers, default_filename='pattern.pdf'):
        filename_match = re.search(
            r'filename="?([^"]+)"?', headers.get('Content-Disposition', ''))
        if not filename_match:
            return default_filename

        return filename_match.group(1)
Exemplo n.º 42
0
class Robot(object):
    """This robot have two functionality, which is to grab matakuliah data
    and to grab KRS of each mahasiswa. This robot also need username and
    password for authorization.

    :param str username: username for login
    :param str password: password for login

    """
    def __init__(self, username, password):
        self.browser = RoboBrowser()
        self.username = username
        self.password = password
        self.matakuliah = []

    def update_matakuliah(self):
        self.matakuliah = self._get_matakuliah()
        for obj in self.matakuliah:
            detail = self._get_matakuliah_detail(obj['link_detail'])
            obj['jadwal_kuliah'] = detail['jadwal_kuliah']
            # obj['jadwal_uts'] = detail['jadwal_uts']
            # obj['jadwal_uas'] = detail['jadwal_uas']
        self._persist_matakuliah()

    def _persist_matakuliah(self):
        for obj in self.matakuliah:
            try:
                kelas = (Kelas.select()
                         .where(Kelas.nama == obj['nama_kelas']).get())
            except Kelas.DoesNotExist:
                kelas = Kelas()

            kelas.kode_mk = obj['kode_mk']
            kelas.nama = obj['nama_kelas']
            kelas.matakuliah = obj['matakuliah']
            kelas.dosen = obj['dosen']
            kelas.sks = obj['sks']
            kelas.tipe = obj['tipe']
            kelas.jadwal_kuliah = obj['jadwal_kuliah']
            kelas.save()

    def __login(self):
        self.browser.open('http://akademika.ugm.ac.id')
        login_form = self.browser.get_form(id='form-login')
        login_form['username'].value = self.username
        login_form['password'].value = self.password
        self.browser.submit_form(login_form)

    def _get_matakuliah(self):
        self.__login()

        # go to 'informasi matakuliah' page
        link_matakuliah = self.browser.select('#navigation li a')[3]
        self.browser.follow_link(link_matakuliah)

        marshal = []
        matakuliah_raw = browser.select('.table-common > tr')[1:]
        for raw in matakuliah_raw:
            data = raw.select('td')

            obj = {}
            obj['kode_mk'] = data[1].contents[0]
            obj['matakuliah'] = data[2].contents[0]
            obj['dosen'] = data[3].contents[0]
            obj['link_detail'] = data[4].contents[0]
            obj['nama_kelas'] = data[4].contents[0].get_text()
            obj['tipe'] = data[5].contents[0]
            obj['sks'] = data[6].contents[0]
            marshal.append(obj)

        return marshal

    def _get_matakuliah_detail(self, link):
        self.browser.follow_link(link)
        jadwal_row = self.browser.select('table > tr')

        # for brevity
        obj = {}
        obj['jadwal_kuliah'] = ""
        obj['jadwal_uts'] = ""
        obj['jadwal_uas'] = ""

        jadwal_kuliah_row = jadwal_row[0].select('table tr')[1:]
        for row in jadwal_kuliah_row:
            contents = [x.contents[0] for x in row.select('td')]
            data_string = "$".join(contents)
            obj['jadwal_kuliah'] = "|".join([data_string])

        # TODO: find a way to get 'tanggal'
        # jadwal_uts_row = jadwal_row[1].select('table tr')[1:]
        # jadwal_uas_row = jadwal_row[2].select('table tr')[1:]

        return obj
Exemplo n.º 43
0
password = getpass()
challenge_count = 1

while (True):
    browser = RoboBrowser(parser='lxml')
    browser.open(SITE_URL)

    # loop forever
    #try catch this
    signin_form = browser.get_forms()[0]
    signin_form['login'].value = username
    signin_form['password'].value = password
    browser.submit_form(signin_form)

    #get the leaderboard list
    browser.follow_link(browser.get_link(text='Leaderboard'))
    bot_name_tags = browser.find_all('div', {'class': 'bot-name'});
    bot_name_extracter = lambda tag: tag.string.replace('\t', '').replace('\n', '').lower()
    bot_names = map(bot_name_extracter, bot_name_tags)
    no_bots = len(bot_names)

    our_rank = bot_names.index('cbteamname') + 1
    print("[INFO] CBTeamName is ranked " + str(our_rank))

    random.seed(os.urandom(8))
    opponent_queue = []
    #three bots with lower rank
    opponent_queue += ([bot_names[random.randint(our_rank + 1, no_bots - 1)],
                        bot_names[random.randint(our_rank + 1, no_bots - 1)],
                        bot_names[random.randint(our_rank + 1, no_bots - 1)]])
    #one bot with a higher rank
Exemplo n.º 44
0
class Dagr:
        """deviantArt gallery ripper class"""

        NAME = basename(__file__)
        __version__="0.60"
        MAX_DEVIATIONS = 1000000 # max deviations

        def __init__(self):
                # Internals
                self.browser = None
                self.errors_count = dict()

                # Configuration
                self.username = ""
                self.password = ""
                self.overwrite = False
                self.reverse = False
                self.testOnly = False
                self.verbose = False

                # Current status
                self.deviant = ""

        def start(self):
                if not self.browser:
                        # Set up fake browser
                        self.set_browser()
                # Always run login
                self.login()

        def set_browser(self):
                USERAGENTS = (
                    'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/535.1 (KHTML, like Gecko) Chrome/14.0.835.202 Safari/535.1',
                    'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:7.0.1) Gecko/20100101',
                    'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_6_8) AppleWebKit/534.50 (KHTML, like Gecko) Version/5.1 Safari/534.50',
                    'Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 6.1; Trident/4.0)',
                    'Opera/9.99 (Windows NT 5.1; U; pl) Presto/9.9.9',
                    'Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_5_6; en-US) AppleWebKit/530.5 (KHTML, like Gecko) Chrome/ Safari/530.5',
                    'Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US) AppleWebKit/533.2 (KHTML, like Gecko) Chrome/6.0',
                    'Mozilla/5.0 (Windows; U; Windows NT 6.1; pl; rv:1.9.1) Gecko/20090624 Firefox/3.5 (.NET CLR 3.5.30729)'
                    )
                session = req_session()
                session.headers.update({'Referer': 'http://www.deviantart.com/'})

                self.browser = RoboBrowser(history=False, session=session, tries=3, user_agent=random.choice(USERAGENTS))

        def login(self):
                if not (self.username and self.password):
                        return
                print("Attempting to log in to deviantArt...")
                self.browser.open('https://www.deviantart.com/users/login?ref=http%3A%2F%2Fwww.deviantart.com%2F&remember_me=1')
                form = self.browser.get_forms()[1]
                form['username'] = self.username
                form['password'] = self.password
                self.browser.submit_form(form)

                if self.browser.find(text=re.compile("The password you entered was incorrect")):
                        print("Wrong password or username. Attempting to download anyway.")
                elif self.browser.find(text=re.compile("\"loggedIn\":true")):
                        print("Logged in!")
                else:
                        print("Login unsuccessful. Attempting to download anyway.")

        def get(self, url, file_name = None):
                if file_name is not None and (self.overwrite == False) and (path_exists(file_name)):
                        print(file_name + " exists - skipping")
                        return
                #TODO Test robobrowser retries and exceptions
                self.browser.open(url)

                if file_name is None:
                        return str(self.browser.parsed)
                else:
                        # Open our local file for writing
                        local_file = open(file_name, "wb")
                        #Write to our local file
                        local_file.write(self.browser.response.content)
                        local_file.close()

        def find_link(self, link):
                filelink = None
                mature_error = False
                self.browser.open(link)
                # Full image link (via download link)
                img_link = self.browser.get_link(text=re.compile("Download( (Image|File))?"))
                if img_link and img_link.get("href"):
                        self.browser.follow_link(img_link)
                        filelink = self.browser.url
                else:
                        if self.verbose:
                                print("Download link not found, falling back to direct image")
                        # Fallback 1: try meta (filtering blocked meta)
                        filesearch = self.browser.find("meta", {"name":"og:image"})
                        if filesearch:
                                filelink = filesearch['content']
                                if basename(filelink).startswith("noentrythumb-"):
                                        filelink = None
                                        mature_error = True
                        if not filelink:
                                # Fallback 2: try collect_rid, full
                                filesearch = self.browser.find("img", {"collect_rid":True, "class":re.compile(".*full")})
                                if not filesearch:
                                # Fallback 3: try collect_rid, normal
                                        filesearch = self.browser.find("img", {"collect_rid":True, "class":re.compile(".*normal")})
                                if filesearch:
                                        filelink = filesearch['src']

                        if not filelink:
                                if mature_error:
                                        raise DagrException("probably a mature deviation")
                                else:
                                        raise DagrException("all attemps to find a link failed")

                filename = basename(filelink)
                return (filename, filelink)

        def handle_download_error(self, link, e):
                error_string = str(e)
                print("Download error (" + link + ") : " + error_string)
                if error_string in self.errors_count:
                        self.errors_count[error_string] += 1
                else:
                        self.errors_count[error_string] = 1

        def deviant_get(self, mode):
                print("Ripping " + self.deviant + "'s " + mode + "...")
                pat = "http://[a-zA-Z0-9_-]*\.deviantart\.com/art/[a-zA-Z0-9_-]*"
                modeArg = '_'
                if mode.find(':') != -1:
                        mode = mode.split(':',1)
                        modeArg = mode[1]
                        mode = mode[0]

                #DEPTH 1
                pages = []
                for i in range(0,int(Dagr.MAX_DEVIATIONS/24),24):
                        html = ""
                        url = ""

                        if mode == "favs":
                                url = "http://" + self.deviant.lower() + ".deviantart.com/favourites/?catpath=/&offset=" + str(i)
                        elif mode == "collection":
                                url = "http://" + self.deviant.lower() + ".deviantart.com/favourites/" + modeArg + "?offset=" + str(i)
                        elif mode == "scraps":
                                url = "http://" + self.deviant.lower() + ".deviantart.com/gallery/?catpath=scraps&offset=" + str(i)
                        elif mode == "gallery":
                                url = "http://" + self.deviant.lower() + ".deviantart.com/gallery/?catpath=/&offset=" + str(i)
                        elif mode == "album":
                                url = "http://" + self.deviant.lower() + ".deviantart.com/gallery/" + modeArg + "?offset=" + str(i)
                        elif mode == "query":
                                url = "http://" + self.deviant.lower() + ".deviantart.com/gallery/?q=" + modeArg + "&offset=" + str(i)
                        else:
                                continue

                        html = self.get(url)
                        prelim = re.findall(pat, html, re.IGNORECASE|re.DOTALL)

                        c = len(prelim)
                        for match in prelim:
                                if match in pages:
                                        c -= 1
                                else:
                                        pages.append(match)

                        done = re.findall("(This section has no deviations yet!|This collection has no items yet!)", html, re.IGNORECASE|re.S)

                        if len(done) >= 1 or c <= 0:
                                break

                        print(self.deviant + "'s " +  mode + " page " + str(int((i/24)+1)) + " crawled...")

                if not self.reverse:
                        pages.reverse()

                if len(pages) == 0:
                        print(self.deviant + "'s " + mode + " had no deviations.")
                        return 0
                else:
                        try:
                                da_make_dirs(self.deviant + "/" + mode)
                                if (mode == "query") or (mode == "album") or (mode == "collection"):
                                    da_make_dirs(self.deviant + "/" + mode + "/" + modeArg)
                        except Exception as e:
                                print(str(e))
                        print("Total deviations in " + self.deviant + "'s gallery found: " + str(len(pages)))

                ##DEPTH 2
                counter2 = 0
                for link in pages:
                        counter2 += 1
                        if self.verbose:
                                print("Downloading " + str(counter2) + " of " + str(len(pages)) + " ( " + link + " )")
                        filename = ""
                        filelink = ""
                        try:
                                filename,filelink = self.find_link(link)
                        except (KeyboardInterrupt, SystemExit):
                                raise
                        except Exception as e:
                                self.handle_download_error(link, e)
                                continue

                        if self.testOnly == False:
                                if (mode == "query") or (mode=="album") or (mode == "collection"):
                                        self.get(filelink, self.deviant + "/" + mode + "/" + modeArg + "/" + filename)
                                else:
                                        self.get(filelink, self.deviant + "/" + mode + "/" + filename)
                        else:
                                print(filelink)

                print(self.deviant + "'s gallery successfully ripped.")

        def group_get(self, mode):
                if mode == "favs":
                        strmode  = "favby"
                        strmode2 = "favourites"
                        strmode3 = "favs gallery"
                elif mode == "gallery":
                        strmode  = "gallery"
                        strmode2 = "gallery"
                        strmode3 = "gallery"
                else:
                        print("?")
                        sys.exit()
                print("Ripping " + self.deviant + "'s " + strmode2 + "...")

                folders = []

                insideFolder = False
                #are we inside a gallery folder?
                html = self.get('http://' + self.deviant + '.deviantart.com/' + strmode2 + '/')
                if re.search(strmode2 + "/\?set=.+&offset=", html, re.IGNORECASE|re.S):
                        insideFolder = True
                        folders = re.findall(strmode + ":.+ label=\"[^\"]*\"", html, re.IGNORECASE)

                #no repeats
                folders = list(set(folders))

                i = 0
                while not insideFolder:
                        html = self.get('http://' + self.deviant + '.deviantart.com/' + strmode2 + '/?offset=' + str(i))
                        k = re.findall(strmode + ":" + self.deviant + "/\d+\"\ +label=\"[^\"]*\"", html, re.IGNORECASE)
                        if k == []:
                                break
                        flag = False
                        for match in k:
                                if match in folders:
                                        flag = True
                                else:
                                        folders+=k
                        if self.verbose:
                                print("Gallery page " + str(int((i/10) + 1)) + " crawled...")
                        if flag:
                                break
                        i += 10

                #no repeats
                folders = list(set(folders))

                if len(folders) == 0:
                        print(self.deviant + "'s " +  strmode3 + " is empty.")
                        return 0
                else:
                        print("Total folders in " + self.deviant + "'s " + strmode3 + " found: " + str(len(folders)))

                if self.reverse:
                        folders.reverse()

                pat = "http:\\/\\/[a-zA-Z0-9_-]*\.deviantart\.com\\/art\\/[a-zA-Z0-9_-]*"
                pages = []
                for folder in folders:
                        try:
                                folderid = re.search("[0-9]+",folder,re.IGNORECASE).group(0)
                                label = re.search("label=\"([^\"]*)",folder,re.IGNORECASE).group(1)
                        except:
                                continue
                        for i in range(0,int(Dagr.MAX_DEVIATIONS/24),24):
                                html = self.get("http://" + self.deviant.lower() + ".deviantart.com/" + strmode2 + "/?set=" + folderid + "&offset=" + str(i - 24))
                                prelim = re.findall(pat, html, re.IGNORECASE)
                                if not prelim:
                                        break
                                for x in prelim:
                                        p = str(re.sub(r'\\/','/',x))
                                        if p not in pages:
                                                pages.append(p)
                                if self.verbose:
                                        print("Page " + str(int((i/24) + 1)) + " in folder " + label + " crawled...")

                        if not self.reverse:
                                pages.reverse()

                        try:
                                if mode == "favs":
                                        da_make_dirs(self.deviant + "/favs/" + label)
                                elif mode == "gallery":
                                        da_make_dirs(self.deviant + "/" + label)
                        except Exception as err:
                                print(err)
                        counter = 0
                        for link in pages:
                                counter += 1
                                if self.verbose:
                                        print("Downloading " +  str(counter) +  " of " + str(len(pages)) +  " ( " + link + " )")
                                filename = ""
                                filelink = ""
                                try:
                                        filename,filelink = self.find_link(link)
                                except (KeyboardInterrupt, SystemExit):
                                        raise
                                except Exception as e:
                                        self.handle_download_error(link, e)
                                        continue

                                if self.testOnly == False:
                                        if mode == "favs":
                                                self.get(filelink, self.devianti + "/favs/" + label + "/" + filename)
                                        elif mode == "gallery":
                                                self.get(filelink, self.deviant + "/" + label + "/" + filename)
                                else:
                                        print(filelink)

                print(self.deviant + "'s " + strmode3 + " successfully ripped.")

        def print_errors(self):
                if len(self.errors_count):
                        print("Download errors count:")
                        for error, count in self.errors_count.iteritems():
                                print("* " + error + " : " + str(count))
Exemplo n.º 45
0
class ISServer:
    def __init__(self):
        global pw
        self.pw = pw
        self.startingpath = os.path.abspath(os.curdir)
        self.infusionsoftapp=self.getappname()
        self.baseurl = 'https://' + self.infusionsoftapp + '.infusionsoft.com/'
        self.infusionsoftAPIKey=self.getapikey()
        self.appurl = "https://" + self.infusionsoftapp + ".infusionsoft.com:443/api/xmlrpc"
        self.connection = xmlrpclib.ServerProxy(self.appurl)

    def getappname(self):
        return raw_input("Please enter appname:").strip('\n \t')
    def getapikey(self):
        global pw
        username = self.pw['username']
        password = self.pw['password']
        #Basically:
        #    #Add username and password to your global variables.
        self.browser = RoboBrowser(history=True)
        self.browser.open(self.baseurl)
        logform = self.browser.get_form()
        logform.fields['username'].value = username
        logform.fields['password'].value = password
        self.browser.submit_form(logform)
        self.browser.follow_link(self.browser.get_links()[1])
        self.browser.open(self.baseurl + 'app/miscSetting/itemWrapper?systemId=nav.admin&settingModuleName=Application&settingTabName=Application')
        pageSoup = BeautifulSoup(self.browser.response.content, 'html.parser')
        return pageSoup.findAll(id='Application_Encrypted_Key:_data')[0].text
    ########################################################
    ## Methods to get records from various tables
    ##
    ##
    def getMatchingRecords(self, tableName, criteria, desiredFields=None, orderedBy=None):
        """Search at table by criteria
        """
        return self.getAllRecords(tableName, searchCriteria=criteria, interestingData=desiredFields, orderedBy=orderedBy)
    def getTagCats(self):
        return self.getAllRecords("ContactGroupCategory")
    def getAllTags(self):
        return self.getAllRecords("ContactGroup")
    def getAllProductCats(self):
        return self.getAllRecords("ProductCategory")
    def getAllProducts(self):
        return self.getAllRecords("Product")
    def getAllRecords(self, tableName, interestingData=None, searchCriteria=None, orderedBy=None):
        if interestingData is None:
            interestingData = tables[tableName]
        if searchCriteria is None:
            searchCriteria={}
        if orderedBy is None:
            orderedBy = interestingData[0]
        records = []
        p=0
        while True:
            listOfDicts = self.connection.DataService.query(self.infusionsoftAPIKey, tableName, 1000, p, searchCriteria, interestingData, orderedBy, True)
            for each in listOfDicts:
                thisRecord={}
                for eachbit in interestingData:   # this should be records.append(zip(interestingData, each)) perhaps
                    if not each.has_key(eachbit):   # TODO: research THIS
                        each[eachbit]=None
                    thisRecord[eachbit] = each[eachbit]
                records.append(thisRecord)
            if not(len(listOfDicts)==1000):
                break
            p+=1
        return records
    def incrementlyGetRecords(self, tableName, interestingData=None, searchCriteria=None, orderedBy=None):
        if interestingData is None:
            interestingData = tables[tableName]
        if searchCriteria is None:
            searchCriteria={}
        if orderedBy is None:
            orderedBy = interestingData[0]
        records = []
        p=0
        while True:
            print tableName, p
            print "trying!"
            try:
                listOfDicts = self.connection.DataService.query(self.infusionsoftAPIKey, tableName, 1000, p, searchCriteria, interestingData, orderedBy, True)
            except Exception, e:
                print e ,p
            for each in listOfDicts:
                thisRecord={}
                for eachbit in interestingData:   # this should be records.append(zip(interestingData, each)) perhaps
                    if not each.has_key(eachbit):   # TODO: research THIS
                        each[eachbit]=None
                    thisRecord[eachbit] = each[eachbit]
                records.append(thisRecord)
            if not(len(listOfDicts)==1000):
                break
            p+=1
            if p%10==0:
                fname = tableName + "%010d" %(p) + ".csv"
                print 'writing', p, fname
                with open(fname, 'wb') as outfile:
                    thisWriter = csv.DictWriter(outfile, records[0])
                    thisWriter.writeheader()
                    thisWriter.writerows(records)
                records=[]
        fname = tableName + "%010d" %(p) + ".csv"
        print 'writing', p, fname
        with open(fname, 'wb') as outfile:
            thisWriter = csv.DictWriter(outfile, records[0])
            thisWriter.writeheader()
            thisWriter.writerows(records)
Exemplo n.º 46
0
# -*- coding: utf-8 -*-
import re
import sys
from robobrowser import RoboBrowser
from getpass import getpass

account = input('account:')
password = getpass('password:'******'http://web1.cmu.edu.tw/stdinfo/login.asp')
form1 = browser.get_form(id = 'form1')
form1['f_id'].value = account
form1['f_pwd'].value = password
browser.submit_form(form1)

link_one = browser.get_link(text = '期中網路教學意見調查')
browser.follow_link(link_one)
list = []
for l in browser.get_links(text = re.compile('填寫')): 
    list.append(l)
list.pop(0)
for li in list:
    browser.follow_link(li)
    form2 = browser.get_form(id = 'thisform')
    form2['Cos_Q1'].value = '1'
    browser.submit_form(form2)
    

Exemplo n.º 47
0
# coding: utf-8
import re
from robobrowser import RoboBrowser

url = "http://www.qq.com/"
b = RoboBrowser(history=True)
b.open(url)

# 获取今日话题这个link
today_top = b.find(id="todaytop").a
print today_top["href"]

b.follow_link(today_top)

# 这个时候已经跳转到了今日话题的具体页面了

# 打印标题
title = b.select(".hd h1")[0]
print "*************************************"
print title.text
print "*************************************"

# 打印正文内容
print b.find(id="articleContent").text
class fullexporter():
    global pw
    def __init__(self):
        self.startingpath = os.path.abspath(os.curdir)
        self.appname=self.getappname()
        self.mapping={}
        self.mapping['Contact']=-1
        self.mapping['Affiliate']=-3
        self.mapping['ContactAction']=-5
        self.mapping['Company']=-6
        self.mapping['OrderItem']=-9

        self.menu()

    def menu(self, context="initial"):
        if context is "initial":
            self.baseurl = 'https://' + self.appname + '.infusionsoft.com/'
            self.apikey=self.getapikey()
            self.svr = ISServer.ISServer(self.appname, self.apikey)
            self.apppath = os.path.join(self.startingpath, self.appname)
            if not os.path.exists(self.apppath):
                os.mkdir(self.apppath)
            os.chdir(self.apppath)
            if not os.path.exists('files'):
                os.mkdir('files')
            os.chdir('files')
            self.usermenu={}
            self.usermenu['downloadAPITables'] = 'apit'
            self.usermenu['play'] = 'play'
            self.usermenu['reports'] = 'rpts'
        for eachitem in self.usermenu.keys():
            print eachitem + ":\t" + self.usermenu[eachitem]
        thisChoice = raw_input('please make a choice: ').strip(' \n\t')
        if thisChoice == 'apit':
            self.handleAPItables()
        elif thisChoice == 'play':
            self.play()
        elif thisChoice == 'rpts':
            self.downloadAllReports()
        else:
            self.menu()

    def handlefiles(self):
        os.chdir(self.startingpath)
        if not os.path.exists('files'):
            os.mkdir('files')
        os.chdir('files')
        allfiles = self.svr.getAllRecords('FileBox')
        for eachfile in allfiles:
            downloadurl = self.baseurl+"Download?Id="+str(eachfile['Id'])
            self.browser.open(downloadurl)
            fileoutpath = os.path.join(self.startingpath, 'files', eachfile['ContactId'], eachfile['FileName'])
            if not os.path.exists(os.path.dirname(fileoutpath)):
                os.makedirs(fileoutpath)
            fout = open(fileoutpath, 'wb')
            fout.write(self.browser.response.content)
            fout.close()


    def play(self):
        print "she's all yours captain!"

    def downloadAReport(self, reportname):
        self.browser.open(self.baseurl + "Reports/exportResults.jsp?reportClass=" + reportname)
        reportForm = [eachform for eachform in self.browser.get_forms() if eachform.action == 'qbExport.jsp']
        if len(reportForm) > 0:
            self.browser.submit_form(reportForm[0], submit=reportForm[0].submit_fields['process'])
            with open(reportname+".csv", 'wb') as outfile:
                outfile.write(self.browser.response.content)
        else:
            print "no " + reportname

    def downloadAllReports(self):
        for reportname in [ "AffiliateActivitySummary", "AffiliateLedger", "AffiliateRedirectActivity", "AffiliateReferral", "AffPayout", "AllOrders", "AllSales", "AllSalesItemized", "ARAgingReport", "CampaigneeBasic", "CampaigneeByDay", "CampaignProductConversion", "ClickThroughPercentage", "ClickThroughPercentageByEmail", "ContactDistributed", "CProgramRevenueSummary", "CreditCard", "CreditsIssued", "CustomerLifetimeValue", "DailyPayments", "DailyReceivables", "DailySalesTotals", "DashboardCampaign", "DashboardEmail", "DashboardLeads", "DashboardOrders", "DashboardUsers", "DigitalProductKey", "EmailBatchSearch", "EmailBroadcastConversionReport", "EmailConversion", "EmailSentSearch", "FailedCharge", "FaxBatchSearch", "FollowUpSequenceConversionReport", "FunnelFlowRecipient", "FunnelFlowRecipientWaiting", "FunnelGoalAchieved", "FunnelQueuedFlowItem", "FunnelUniqueContacts", "GroupAdds", "HabeasDetail", "InvoiceNetIncome", "LeadSourceConversion", "LeadSourceIncome", "LeadSourceROI", "LeadSourceROIByCategory", "MonthlyPayments", "MonthlyReceivables", "MonthlySalesTotals", "MonthlySalesTotalsByProduct", "OptOutSearch", "PaymentsReport", "PieceResponse", "ProductNetIncome", "Receivables", "RevenueForecastReport", "TaskSearch", "VoiceBatchSearch", "VoiceOptOutSearch", "WebformActivitySummary", "WebFormTracking" ]:
            self.downloadAReport(reportname)

    def getFilePath(self):
        return tkFileDialog.askopenfilename()

    def getFolderPath(self):
        return tkFileDialog.askdirectory()

    def getappname(self):
        return raw_input("Please enter appname:").strip('\n \t')

    def getapikey(self):
        global pw
        username = pw['username']
        password = pw['password']
        #Basically:
        #    #Add username and password to your global variables.
        self.browser = RoboBrowser(history=True)
        self.browser.open(self.baseurl)
        logform = self.browser.get_form()
        logform.fields['username'].value = username
        logform.fields['password'].value = password
        self.browser.submit_form(logform)
        self.browser.follow_link(self.browser.get_links()[1])
        self.browser.open(self.baseurl + 'app/miscSetting/itemWrapper?systemId=nav.admin&settingModuleName=Application&settingTabName=Application')
        pageSoup = BeautifulSoup(self.browser.response.content, 'html.parser')
        return pageSoup.findAll(id='Application_Encrypted_Key:_data')[0].text

    def handleAPItables(self):
        apidata={}
        self.customfields=self.svr.getAllRecords('DataFormField')
        for eachtable in ISServer.tables.keys():
            print "starting " + eachtable
            if eachtable not in self.mapping.keys():
                self.mapping[eachtable]=99
            fields = ISServer.tables[eachtable] +  ['_'+fld['Name'] for fld in self.customfields if fld['FormId'] is self.mapping[eachtable]]
            apidata[eachtable] =  self.svr.getAllRecords(eachtable, interestingData=fields)
            with open(eachtable+".csv", 'wb') as outfile:
                writer=csv.DictWriter(outfile, fields)
                writer.writeheader()
                writer.writerows(apidata[eachtable])
            print "done writing " + eachtable
        self.apidata = apidata



    def handlewebforms(self):
        # for eachid
        # webformsubmissionpath="https://" + self.appname + ".infusionsoft.com/app/webformSubmission/contactTabDetails?customFormWebResultId=" + str(x)
        pass
Exemplo n.º 49
0
# Browser
#br = mechanize.Browser()
br = RoboBrowser(history=True, user_agent='Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.2 (KHTML, like Gecko) Chrome/22.0.1216.0 Safari/537.2')


# The site we will navigate into, handling it's session
br.open('http://heroes-wow.com/wotlk/index.php?page=login')

login_form = br.get_form(action="http://heroes-wow.com/wotlk/execute.php?take=login")
login_form['username'].value = 'anathk2'
login_form['password'].value = 'wow123456'
login_form['rememberme'].value = '1'

br.submit_form(login_form)


br.open('http://topg.org/server-heroes-wow-id347987')
links = br.find_all('a', href=True)
br.follow_link(links[22])
result = br.parsed

new_links = br.find_all('a', href=True)
br.follow_link(new_links[1])






Exemplo n.º 50
0
import re
from robobrowser import RoboBrowser

# Browse to Rap Genius
browser = RoboBrowser(history=True)
browser.open('http://rapgenius.com/')

# Search for Queen
form = browser.get_form(action='/search')
form  # <RoboForm q=>
form['q'].value = 'queen'
browser.submit_form(form)

# Look up the first song
songs = browser.select('.song_name')
browser.follow_link(songs[0])
lyrics = browser.select('.lyrics')
lyrics[0].text  # \n[Intro]\nIs this the real life...

# Back to results page
browser.back()

# Look up my favorite song
browser.follow_link('death on two legs')

# Can also search HTML using regex patterns
lyrics = browser.find(class_=re.compile(r'\blyrics\b'))
lyrics.text
Exemplo n.º 51
0
courseTitle = "".join([x if x.isalnum() else "_" for x in courseTitle])
print('Course Url: ' + courseModulesUrl)
print('Course Title: ' + courseTitle)
print('Finding file links of type: ' + args.downloadOnly)
# Make output dir
outputDir = os.path.join('output/', courseTitle)
make_path(outputDir)
# Get modules links with lecture in title
moduleLinks = browser.find_all("a", { "class" : "for-nvda" })

print('Found ' + str(len(moduleLinks)) + ' links, (not all will be valid)')

# Process each lecture link
for moduleLink in moduleLinks:
    print('Opening: ' + moduleLink['aria-label'])
    browser.follow_link(moduleLink)
    try:
        # Find link - containing words "download"
        downloadLinkRel = browser.find('a', href = re.compile(r'.*download*'))
        # If failed, find link - containing reference to file "****.XXX"
        if downloadLinkRel is None: 
            downloadLinkRel = browser.find('a', href = re.compile(r'.*\.[a-z]{3,4}$'))
        fileNameWithExtension = downloadLinkRel.text.strip()
        # Check the link is the right filetype
        if args.downloadOnly != 'all' and not fileNameWithExtension.endswith(args.downloadOnly):
            print('   not processing (wrong extension): ' + fileNameWithExtension)
            continue
        downloadLinkAbsolute = urlparse.urljoin(courseModulesUrl, downloadLinkRel['href'])
        pdfOutputPath = os.path.join(outputDir, fileNameWithExtension)
        # Check if file already download (incase the tool was interrupted)
        if os.path.isfile(pdfOutputPath):