Пример #1
0
        #self.hrefs[self.url] = self.soup.find_all('a')
        self.hrefs[self.url] = self.soup.find_all(href=re.compile("showforum"))
    def setLinks(self):
        templinks = []      
        for link in self.hrefs[self.url]:
            templinks.append(link.get('href'))
            #templinks.append(link)
        self.links[self.url] = templinks
    def getLinks(self):
        return self.links[self.url]
    
if __name__ == "__main__":
    #print sys.path
    #global dbs
    dbs,cursor = connectDB()
    myhost = URL()
    myhost.setUrls("http://seahawkshuddle.com/forum/")
    conn = connection(myhost.getUrls())
    conn.createConnection()
    #conn.gethtmlPage()
    myparent = ParentWebPage(conn)
    myparent.setContent()
    #print "content = ", myparent.getContent()
    tree= lxml.html.parse('http://seahawkshuddle.com/forum/')
    #print tree
    elements = tree.xpath('//title//text()')
    print elements
    myparent.sethrefs()
    myparent.setLinks()
    #print myparent.getLinks()
    #print type(myparent.gethrefs())
Пример #2
0
    def setLinks(self):
        templinks = []
        for link in self.hrefs[self.url]:
            templinks.append(link.get('href'))
            #templinks.append(link)
        self.links[self.url] = templinks

    def getLinks(self):
        return self.links[self.url]


if __name__ == "__main__":
    #print sys.path
    #global dbs
    dbs, cursor = connectDB()
    myhost = URL()
    myhost.setUrls("http://seahawkshuddle.com/forum/")
    conn = connection(myhost.getUrls())
    conn.createConnection()
    #conn.gethtmlPage()
    myparent = ParentWebPage(conn)
    myparent.setContent()
    #print "content = ", myparent.getContent()
    tree = lxml.html.parse('http://seahawkshuddle.com/forum/')
    #print tree
    elements = tree.xpath('//title//text()')
    print elements
    myparent.sethrefs()
    myparent.setLinks()
    #print myparent.getLinks()
    #print type(myparent.gethrefs())
Пример #3
0
    def createConnection(self):
        try:
            openConxn = urllib2.Request(self.host)
            response = urllib2.urlopen(openConxn)
            self.htmlPage = response.read()
        except Exception as e:
            print e
            exit
    def gethtmlPage(self):
        return self.htmlPage
    def getHost(self):
        return self.host


if __name__ == "__main__":
    myhost = URL()
    myhost.setUrls("http://boards.baltimoreravens.com/")
    conn = connection(myhost.getUrls())
    conn.createConnection()
    parser = etree.HTMLParser()
    tree   = etree.parse(StringIO(conn.gethtmlPage()), parser)
    elements = tree.xpath('//title//text()')
    print elements
    
    urls = tree.xpath('.//td[@class="col_c_forum"]//h4//a/@href')
    #print urls
    p = re.compile('#')
    urlQ = []
    for url in urls:
        urlQ.append(url)
    forum = forumPage()
Пример #4
0
            openConxn = urllib2.Request(self.host)
            response = urllib2.urlopen(openConxn)
            self.htmlPage = response.read()
        except Exception as e:
            print e
            exit

    def gethtmlPage(self):
        return self.htmlPage

    def getHost(self):
        return self.host


if __name__ == "__main__":
    myhost = URL()
    myhost.setUrls("http://boards.baltimoreravens.com/")
    conn = connection(myhost.getUrls())
    conn.createConnection()
    parser = etree.HTMLParser()
    tree = etree.parse(StringIO(conn.gethtmlPage()), parser)
    elements = tree.xpath('//title//text()')
    print elements

    urls = tree.xpath('.//td[@class="col_c_forum"]//h4//a/@href')
    #print urls
    p = re.compile('#')
    urlQ = []
    for url in urls:
        urlQ.append(url)
    forum = forumPage()