Exemplo n.º 1
0
def getContent(url):
    data = s.get(url)
    doc = pq(data.content)
    d = doc(".ruContentPage>center>table")
    if len(d) == 0:
        d = doc(".ruContentPage>table")
    return d.remove("style").outer_html()
Exemplo n.º 2
0
def getAssignments(url):
    host = stripQuery(url)
    data = s.get(url)
    doc = pq(data.content)
    tableQ = ".ruContentPage > center > .ruTable"
    titles = [
        el.text() for el in doc(tableQ + " > .ruTableTitle").items()
        if el.text() != ""
    ]
    tables = doc(tableQ + " .ruTable").items()
    assignments = []
    for table in tables:
        title = [el.text() for el in table.find("tr.ruTableTitle th").items()]
        assign = []
        for el in table.find("tr").not_(".ruTableTitle").items():
            if el.text().strip() == "": continue
            d = []
            for item in el.find("td").items():
                if len(item.children()) == 0:
                    d.append(item.text())
                else:
                    # Assert that this is a link if it has children
                    assert len(item.children()) == 1 and len(
                        item.find("a")) == 1
                    d.append((item.find("a").text(),
                              host + item.find("a").attr("href")))
            assign.append(dict(zip(title, d)))

        assignments.append(assign)
    return dict(zip(titles, assignments))
Exemplo n.º 3
0
def getContent(url):
    data = s.get(url)
    doc = pq(data.content)
    d = doc(".ruContentPage>center>table")
    if len(d) == 0:
        d = doc(".ruContentPage>table")
    return d.remove("style").outer_html()
Exemplo n.º 4
0
def test_room():
    """
    https://www.uoko.com/api/room/list?CommunityIds=2ed6cfaf18de4d149ea999c0bf03b233&source=uoko&cityid=278&PageIndex=1

    :return:
    """

    for _id, Name in {AreaId["_id"]: AreaId["name"] for AreaId in mongo.collection.aggregate(pipeline=[
        {"$group": {
            "_id": "$Id",
            "name": {"$first": "$Name"},
        }},
    ])}.items():

        i = 1
        while i:
            print(_id, Name, i)
            resp = session.get(
                f"https://www.uoko.com/api/room/list?CommunityIds={_id}&source=uoko&cityid=278&PageIndex={i}")
            j = resp.json()

            if len(j["Items"]) == 0:
                break

            mongo.collection.insert_many(j['Items'])

            i += 1
Exemplo n.º 5
0
    def get_bad_people(self):

        self.bad_people = []

        for name in self.unfollowers:
            print(name)
            time.sleep(1)
            r = session.get('https://instagram.com/{}/'.format(name),
                            headers={'User-agent': 'your bot 0.1'})
            try:
                soup = BeautifulSoup(r.text, 'html.parser')
                scripts = soup.findAll('script', {'type': 'text/javascript'})
                shared_data = scripts[3].text[21:-1]
                main_json = json.loads(shared_data)
                followers = float(
                    main_json['entry_data']['ProfilePage'][0]['graphql']
                    ['user']['edge_followed_by']['count'])
                following = float(main_json['entry_data']['ProfilePage'][0]
                                  ['graphql']['user']['edge_follow']['count'])
                try:
                    if following / followers > 0.1:
                        self.bad_people.append('@' + name)
                except:
                    pass
            except:
                print(r.status_code)
        print("Bad People Extracted : ", len(self.bad_people))
        for name in self.bad_people:
            print(name)
Exemplo n.º 6
0
def getLectures(url):
    host = stripQuery(url)
    data = s.get(url)
    doc = pq(data.content)
    tableQ = ".ruContentPage > center > form > .ruTable"
    titles = [el.text() for el in doc(tableQ + " > .ruTableTitle").items() if el.text() != ""]
    tables = doc(tableQ + " .ruTable").items()
    items = []
    for table in tables:
        title = [el.text() for el in table.find("tr.ruTableTitle th").items()]
        stuff = []
        for el in table.find("tr").not_(".ruTableTitle").items():
            if el.text().strip() == "": continue
            d = []
            for item in el.find("td").items():
                if len(item.children()) == 0:
                    d.append(item.text())
                elif len(item.children()) == 1 and len(item.find("a[href]")) == 1:
                    url = item.find("a").attr("href")
                    if url.startswith("?"): url = host + url
                    d.append((item.find("a").text(), url))
                else:
                    d.append(item.text())
            stuff.append(dict(zip(title, d)))
        items.append(stuff)
    return dict(zip(titles,items))
Exemplo n.º 7
0
def auth_with_ptc(username, password):
    try:
        headers = {'User-Agent': config.mobile_UA}
        response = session.get(ptc_login_url, headers=headers)
        jdata = json.loads(response.content)

        ptc_url_two = response.history[0].headers['Location']
        data = OrderedDict(
            [('lt', jdata['lt']), ('execution', jdata['execution']), ('_eventId', 'submit'), ('username', username),
             ('password', password)])

        response = session.post(ptc_url_two, data=data, headers=headers, allow_redirects=False)
        if 'errors' in response.content:
            print(json.loads(response.content)['errors'][0].replace(''', '\''))
            return None
        raw_ticket = response.headers['Location']
        ticket = re.sub('.*ticket=', '', raw_ticket)

        data = OrderedDict(
            [('client_id', 'mobile-app_pokemon-go'), ('redirect_uri', 'https://www.nianticlabs.com/pokemongo/error'),
             ('client_secret', 'w8ScCUXJQc6kXKw8FiOhd8Fixzht18Dq3PEVkUCP5ZPxtgyWsbTvWHFLm2wNY0JR'),
             ('grant_type', 'refresh_token'), ('code', ticket)])

        response = session.post(ptc_oauth_url, data=data)
        raw_token = re.sub('.*en=', '', response.content)
        access_token = re.sub('.com.*', '.com', raw_token)
        return access_token

    except Exception as error:
        if config.debug:
            print("[+] Failed Pokemon Trainer Club Auth:", error)
        return None
Exemplo n.º 8
0
def getAssignments(url):
    host = stripQuery(url)
    data = s.get(url)
    doc = pq(data.content)
    tableQ = ".ruContentPage > center > .ruTable"
    titles = [el.text() for el in doc(tableQ + " > .ruTableTitle").items() if el.text() != ""]
    tables = doc(tableQ + " .ruTable").items()
    assignments = []
    for table in tables:
        title = [el.text() for el in table.find("tr.ruTableTitle th").items()]
        assign = []
        for el in table.find("tr").not_(".ruTableTitle").items():
            if el.text().strip() == "": continue
            d = []
            for item in el.find("td").items():
                if len(item.children()) == 0:
                    d.append(item.text())
                else:
                    # Assert that this is a link if it has children
                    assert len(item.children()) == 1 and len(item.find("a")) == 1
                    d.append((item.find("a").text(), host + item.find("a").attr("href")))
            assign.append(dict(zip(title, d)))

        assignments.append(assign)
    return dict(zip(titles,assignments))
Exemplo n.º 9
0
def getLectures(url):
    host = stripQuery(url)
    data = s.get(url)
    doc = pq(data.content)
    tableQ = ".ruContentPage > center > form > .ruTable"
    titles = [
        el.text() for el in doc(tableQ + " > .ruTableTitle").items()
        if el.text() != ""
    ]
    tables = doc(tableQ + " .ruTable").items()
    items = []
    for table in tables:
        title = [el.text() for el in table.find("tr.ruTableTitle th").items()]
        stuff = []
        for el in table.find("tr").not_(".ruTableTitle").items():
            if el.text().strip() == "": continue
            d = []
            for item in el.find("td").items():
                if len(item.children()) == 0:
                    d.append(item.text())
                elif len(item.children()) == 1 and len(
                        item.find("a[href]")) == 1:
                    url = item.find("a").attr("href")
                    if url.startswith("?"): url = host + url
                    d.append((item.find("a").text(), url))
                else:
                    d.append(item.text())
            stuff.append(dict(zip(title, d)))
        items.append(stuff)
    return dict(zip(titles, items))
Exemplo n.º 10
0
def current_user(request):
    """
    根据请求得到 cookie 并查看里面的 username, 没有的话说明还没有设置, 默认设置 session_id 为空
    username 默认为游客
    """
    session_id = request.cookies.get('user', '')
    username = session.get(session_id, '游客')
    return username
Exemplo n.º 11
0
    def _loadCoverArt(self):
        """ Utility function that gets the cover art from the movie """
        cover = self['Cover']
        cover = re.search(r'(https*:\/\/ptpimg.me\/[0-9a-z]{6}.jpg)',
                          cover).group(1)

        req = session.get(cover)
        self.coverArt = req.content
Exemplo n.º 12
0
def save(path, url):
    data = s.get(url)
    if os.path.isdir(path):
        _, params = parse_header(data.headers["Content-Disposition"])
        with open(os.path.join(path, cleanFile(params["filename"])), "wb") as f:
            f.write(data.content)
    else:
        with open(path, 'wb') as f:
            f.write(data.content)
Exemplo n.º 13
0
 def downloadTorrent(self, ID, dest=None, name=None):
     if not dest:
         dest = os.getcwd()
     r = session.get(self.baseURL + "/down.php/%s/file.torrent" % ID)
     if not name:
         name = re.search(r'filename="(.*)"', r.headers["Content-Disposition"]).group(1)
     with open(os.path.join(dest, name), "wb") as fh:
         fh.write(r.content)
     return os.path.join(dest, name)
Exemplo n.º 14
0
def getSideMenu(url):
    data = s.get(url)
    doc = pq(data.text)
    host = stripQuery(url)
    return {
        trim(a.text()): host + a.attr("href")
        for el in list(doc(".ruRight table").items())[:-1]
        for a in el.find("a[href]").items()
    }
Exemplo n.º 15
0
def save(path, url):
    data = s.get(url)
    if os.path.isdir(path):
        _, params = parse_header(data.headers["Content-Disposition"])
        with open(os.path.join(path, cleanFile(params["filename"])),
                  "wb") as f:
            f.write(data.content)
    else:
        with open(path, 'wb') as f:
            f.write(data.content)
Exemplo n.º 16
0
def main():
    rsp = session.get(WATER_ZONE_URL)

    # s.cookies.save(ignore_discard=True, ignore_expires=True)
    rsp.raise_for_status()

    if '开心灌水' in rsp.text:
        print('登录成功')

    check_new_topic(rsp.text)
def get_speaker_articles(record):
    print("get speaker articles: " + record.id)
    print(article_list_url.format(record.name.replace(' ', '-').lower(), record.id))
    try:
        r = session.get(article_list_url.format(record.name.replace(' ', '-').lower(), record.id))
        soup = BeautifulSoup(r.text, features="html.parser")
        tags = soup.find_all('li', class_="paper")
        for li in tags:
            print(li)
    except requests.exceptions.RequestException as e:  # This is the correct syntax
        print(e)
Exemplo n.º 18
0
def getAbout(url):
    data = s.get(url)
    doc = pq(data.content)

    table = doc(".ruContentPage>table")
    ret = {}
    ret2 = {}
    ret["title"], ret["subtitle"] = getTitle(table.find("tr").eq(0))
    main = table.find("tr").eq(1).find("td").eq(1)
    ret["teachers"], ret2["teachers"] = getTeachers(main, stripQuery(url))
    ret2["main"] = main.html().strip()
    return ret, ret2
Exemplo n.º 19
0
def getAbout(url):
    data = s.get(url)
    doc = pq(data.content)

    table = doc(".ruContentPage>table")
    ret = {}
    ret2 = {}
    ret["title"], ret["subtitle"] = getTitle(table.find("tr").eq(0))
    main = table.find("tr").eq(1).find("td").eq(1)
    ret["teachers"], ret2["teachers"] = getTeachers(main, stripQuery(url))
    ret2["main"] = main.html().strip()
    return ret, ret2
def get_speakers_detail(record):
    print("get speaker detail: " + record.id)
    try:
        r = session.get(speaker_detail_url.format(record.id))
        soup = BeautifulSoup(r.text, features="html.parser")
        tags = soup.find_all('div', class_="maincard")
        for t in tags:
            record.name = t.h3.text
            record.organization = t.h4.text
    except requests.exceptions.RequestException as e:  # This is the correct syntax
        print(e)
    # 延迟一会儿,避免服务器压力过大
    time.sleep(Configs.sleep_interval)
Exemplo n.º 21
0
def getAssignment(url):
    data = s.get(url)
    doc = pq(data.content)
    tables = doc(".ruContentPage>center>form>table")
    host = stripQuery(url)
    ret = {}
    ret["description"] = getAssignmentDescription(tables.eq(0), host)
    if len(tables) >= 3:
        ret["grade"] = getAssignmentGrade(tables.eq(1), host)
        ret["handin"] = getAssignmentHandin(tables.eq(2), host)
        ret["statistics"] = getAssignmentStatistics(doc(".ruContentPage>table"), host)

    return ret
Exemplo n.º 22
0
def download(url):
    """Go to the thread page and follow the link to download the torrent"""
    # 进入资源页面
    log.info('Going to resource page {}'.format(url))
    r = s.get(url)

    soup = BeautifulSoup(r.text)
    a = soup.find('dl', class_='tattl').a
    # 获取种子下载链接
    href = a['href']
    if not href:
        return
    log.info('Found torrent url: {}'.format(href))

    # 直接下载种子或者进入待下载页面
    r = s.get(urljoin(url, href))
    r.raise_for_status()

    # 直接下载
    if 'Content-Disposition' in r.headers:
        filename = get_filename_from_response(r)
        content = r.content
        log.info('Downloading torrent file {} directly'.format(filename))
    # 进入等待下载页面
    else:
        soup = BeautifulSoup(r.text)
        torrent_url = soup.find('p', class_='alert_btnleft').a['href']
        log.info(
            'Going to the page waiting for downloading, torrent real url is {}'
            .format(torrent_url))
        r = s.get(urljoin(url, torrent_url))
        r.raise_for_status()

        filename = get_filename_from_response(r)
        log.info('Downloading torrent file {}'.format(filename))
        content = r.content

    return filename, content
Exemplo n.º 23
0
def getAssignment(url):
    data = s.get(url)
    doc = pq(data.content)
    tables = doc(".ruContentPage>center>form>table")
    host = stripQuery(url)
    ret = {}
    ret["description"] = getAssignmentDescription(tables.eq(0), host)
    if len(tables) >= 3:
        ret["grade"] = getAssignmentGrade(tables.eq(1), host)
        ret["handin"] = getAssignmentHandin(tables.eq(2), host)
        ret["statistics"] = getAssignmentStatistics(
            doc(".ruContentPage>table"), host)

    return ret
def get_year_posters(year):
    print("get poster of year: " + year)
    poster_list = []
    try:
        r = session.get(poster_url.format(year))
        soup = BeautifulSoup(r.text, features="html.parser")
        tags = soup.find_all('div', {"onclick": re.compile(r"showDetail.*")})
        for a in tags:
            ids = re.compile(r"[\d | -]+").findall(a["onclick"])
            title = a.find('div', class_='maincardBody')
            if len(ids)>0:
                poster_list.append(Poster(ids[0], title.text))
    except requests.exceptions.RequestException as e:  # This is the correct syntax
        print(e)
    return poster_list
def get_speakers(poster, speaker_dict = {}):
    print("get speakers by poster: " + poster.id)
    try:
        r = session.get(poster_detail_url.format(poster.id))
        soup = BeautifulSoup(r.text, features="html.parser")
        tags = soup.find_all('button', {"onclick": re.compile(r"showSpeaker.*")})
        for i, a in enumerate(tags):
            ids = re.compile(r"[\d | -]+").findall(a["onclick"])
            if len(ids)>0 :
                id = ids[0]
                if (id not in speaker_dict):
                    speaker_dict[id] = Record(ids[0]) 
                record = speaker_dict[id]
                if i == 0 and (poster.title not in record.first):
                    record.add_first(poster.title)
                if i > 0 and (poster.title not in record.other):
                    record.add_other(poster.title)
                get_speakers_detail(record)
    except requests.exceptions.RequestException as e:  # This is the correct syntax
        print(e)
    return speaker_dict
Exemplo n.º 26
0
def test_SubArea():
    """
db.getCollection('uoko').aggregate([
    { $group: {
        _id: "$AreaId",
        name: {$first:"$Name"},
        count: {$first:"$Count"},
    }}
 ])

    """

    for _id, Name in {AreaId["_id"]: AreaId["name"] for AreaId in mongo.collection.aggregate(pipeline=[
        {"$group": {
            "_id": "$AreaId",
            "name": {"$first": "$Name"},
            "count": {"$first": "$Count"},
        }},
    ])}.items():
        print(_id, Name)
        resp = session.get(f"https://www.uoko.com/api/room/map?cityid=278&level=3&AreaId=3515&SubAreaId={_id}")
        j = resp.json()
        mongo.collection.insert_many(j["data"])
Exemplo n.º 27
0
def fetch_new(filter=None):
    """Refresh the resource index page and yield new url that has not been recorded"""

    if filter is None:
        filter = lambda heat, size, is_free: size < 50 * 1024 and is_free and heat <= 3  # return True to be kept

    log.info('Fetching new thread from resource index page')
    r = s.get(INDEX_PAGE)
    r.raise_for_status()

    soup = BeautifulSoup(r.text)

    for thread in soup.find('div', id='threadlist').find_all('tr'):
        img = thread.find_all('td')[1].img
        heat = int(re.search(r'signal_(\d)\.png$', img['src']).group(1))

        try:
            size = convert_size(thread.find_all('td')[2].text)
        except ValueError as e:
            log.error(str(e))
            continue

        a = thread.find_all('td')[3].a
        href = a['href']
        title = a.text
        is_free = a.next_sibling is not None and a.next_sibling[
            'src'].endswith('free.gif')

        url = urljoin(INDEX_PAGE, href)

        if not filter(heat, size, is_free):
            record.add(url)

        elif url not in record:
            log.info('Found new thread {} : {}'.format(title, href))
            record.add(url)
            yield url
Exemplo n.º 28
0
 def downloadTorrent(self, tID, name=None):
     r = session.get(self.baseURL + '/download.php', params={'id': tID})
     if not name:
         name = str(tID) + '.torrent'
     with open(name.replace('/', '_'), 'wb') as fh:
         fh.write(r.content)
Exemplo n.º 29
0
 def download(self, ID):
     r = session.get(self.baseURL + "/down.php/%s/file.torrent" % ID)
     downloadName = re.search(r'filename="(.*)"', r.headers["Content-Disposition"]).group(1)
     return (downloadName, r.content)
Exemplo n.º 30
0
def test_area():
    resp = session.get("https://www.uoko.com/api/room/map?AreaId=3515&level=2&cityid=278")
    j = resp.json()
    mongo.collection.insert_many(j["data"])
Exemplo n.º 31
0
 def __request(self, url, data=None):
     return session.get(url, data=data).text
Exemplo n.º 32
0
def getSideMenu(url):
    data = s.get(url)
    doc = pq(data.text)
    host = stripQuery(url)
    return {trim(a.text()): host + a.attr("href") for el in list(doc(".ruRight table").items())[:-1] for a in el.find("a[href]").items()}
Exemplo n.º 33
0
def saveRedirect(path, url):
    data = s.get(url, allow_redirects=False)
    saveLink(path, data.headers["Location"])
Exemplo n.º 34
0
def current_user(request):
    session_id = request.query.get('user_id', '')
    user_id = session.get(session_id, -1)
    return user_id
Exemplo n.º 35
0
def auth_with_google(username, password):
    try:
        headers = {'User-Agent': config.mobile_UA}
        response = session.get(google_oauth_url, headers=headers)

        GALX = re.search('<input type="hidden" name="GALX" value=".*">', response.content)
        GALX = re.sub('.*value="', '', GALX.group(0))
        GALX = re.sub('".*', '', GALX)

        gxf = re.search('<input type="hidden" name="gxf" value=".*:.*">', response.content)
        gxf = re.sub('.*value="', '', gxf.group(0))
        gxf = re.sub('".*', '', gxf)

        cont = re.search('<input type="hidden" name="continue" value=".*">', response.content)
        cont = re.sub('.*value="', '', cont.group(0))
        cont = re.sub('".*', '', cont)

        data = {'Page': 'PasswordSeparationSignIn',
                'GALX': GALX,
                'gxf': gxf,
                'continue': cont,
                'ltmpl': 'embedded',
                'scc': '1',
                'sarp': '1',
                'oauth': '1',
                'ProfileInformation': '',
                '_utf8': '?',
                'bgresponse': 'js_disabled',
                'Email': username,
                'signIn': 'Next'}
        response = session.post(google_login_url, data=data)

        profile = re.search('<input id="profile-information" name="ProfileInformation" type="hidden" value=".*">',
                            response.content)
        profile = re.sub('.*value="', '', profile.group(0))
        profile = re.sub('".*', '', profile)

        gxf = re.search('<input type="hidden" name="gxf" value=".*:.*">', response.content)
        gxf = re.sub('.*value="', '', gxf.group(0))
        gxf = re.sub('".*', '', gxf)

        data = {'Page': 'PasswordSeparationSignIn',
                'GALX': GALX,
                'gxf': gxf,
                'continue': cont,
                'ltmpl': 'embedded',
                'scc': '1',
                'sarp': '1',
                'oauth': '1',
                'ProfileInformation': profile,
                '_utf8': '?',
                'bgresponse': 'js_disabled',
                'Email': username,
                'Passwd': password,
                'signIn': 'Sign in',
                'PersistentCookie': 'yes'}
        response = session.post(google_challenge_url, data=data)

        google_oauth_url_two = response.history[len(response.history)-1].headers['Location'].replace('amp%3B', '').replace('amp;', '')
        response = session.get(google_oauth_url_two)

        client_id = re.search('client_id=.*&from_login', google_oauth_url_two)
        client_id = re.sub('.*_id=', '', client_id.group(0))
        client_id = re.sub('&from.*', '', client_id)

        state_wrapper = re.search('<input id="state_wrapper" type="hidden" name="state_wrapper" value=".*">',
                                  response.content)
        state_wrapper = re.sub('.*state_wrapper" value="', '', state_wrapper.group(0))
        state_wrapper = re.sub('"><input type="hidden" .*', '', state_wrapper)

        connect_approve = re.search('<form id="connect-approve" action=".*" method="POST" style="display: inline;">',
                                    response.content)
        connect_approve = re.sub('.*action="', '', connect_approve.group(0))
        connect_approve = re.sub('" me.*', '', connect_approve)

        data = OrderedDict([('bgresponse', 'js_disabled'), ('_utf8', '☃'), ('state_wrapper', state_wrapper),
                             ('submit_access', 'true')])
        response = session.post(connect_approve.replace('amp;', ''), data=data)

        code = re.search('<input id="code" type="text" readonly="readonly" value=".*" style=".*" onclick=".*;" />',
                         response.content)
        code = re.sub('.*value="', '', code.group(0))
        code = re.sub('" style.*', '', code)

        data = {'client_id': client_id,
                'client_secret': 'NCjF1TLi2CcY6t5mt0ZveuL7',
                'code': code,
                'grant_type': 'authorization_code',
                'redirect_uri': 'urn:ietf:wg:oauth:2.0:oob',
                'scope': 'openid email https://www.googleapis.com/auth/userinfo.email'}
        response = session.post(google_token_url, data=data)

        jdata = json.loads(response.content)
        access_token = jdata['id_token']
        return access_token

    except Exception as error:
        if config.debug:
            print("[+] Failed Google Auth:", error)
        return None
Exemplo n.º 36
0
 def __jsonRequest(self, url, data=None):
     if not self.loggedIn:
         print "Not logged in"
         return None
     return session.get(url, data=data).json()
Exemplo n.º 37
0
def saveRedirect(path, url):
    data = s.get(url, allow_redirects=False)
    saveLink(path, data.headers["Location"])