Пример #1
0
    def scrape_bid_description(self, bid):
        self.br.open(bid.url)

        s = soupify(self.br.response().read())
        r = re.compile(r'showdocument\?id=\d+$')
        a = s.find('a', href=r)
        u = urlparse.urljoin(self.br.geturl(), a['href'])

        self.br.open(u)

        d = self.br.response().read()
        s = soupify(pdftohtml(d))

        bid.description = get_all_text(s.html.body)
        bid.save()
Пример #2
0
    def scrape_bid_description(self, bid):
        self.br.open(bid.url)

        s = soupify(self.br.response().read())
        r = re.compile(r'showdocument\?id=\d+$')
        a = s.find('a', href=r)
        u = urlparse.urljoin(self.br.geturl(), a['href'])

        self.br.open(u)

        d = self.br.response().read()
        s = soupify(pdftohtml(d))

        bid.description = get_all_text(s.html.body)
        bid.save()
Пример #3
0
    def scrape_bids(self):
        self.br.open(self.org.bids_page_url)

        s = soupify(self.br.response().read())
        r = re.compile(r'^\s*Solicitation')
        f = lambda x: x.name == 'td' and re.search(r, x.text)

        self.org.bid_set.all().delete()

        for td in s.findAll(f):
            tr = td.findParent('tr')
            td = tr.findAll('td')

            bid = Bid(org=self.org)
            bid.title = td[0].text
            bid.url = self.br.geturl()
            bid.description = get_all_text(td[1])

            z = re.search(self.date_regex, td[-3].text)
            if z:
                m, d, y = z.groups()
                bid.due_date = datetime.date(day=int(d),
                                             month=int(m),
                                             year=int(y))

            bid.save()
Пример #4
0
    def scrape_bid_links(self, url):
        bids = []

        self.br.open(url)

        s = soupify(self.br.response().read())
        r = re.compile(r'^index\.php\?m=procurement&id=\d+$')

        for a in s.findAll('a', href=r):
            tr = a.findParent('tr')
            td = tr.findAll('td')

            # Verify open status
            if td[-1].text != 'OPEN':
                continue

            z = re.search(self.date_regex, td[1].contents[2])
            if z:
                m,d,y = z.groups()
                
            bid = Bid(org=self.org)
            bid.title = a.text
            bid.url = urlparse.urljoin(self.br.geturl(), a['href'])

            if z:
                bid.due_date = datetime.date(day=int(d), month=int(m), year=int(y))

            bid.location = self.org.location
            bids.append(bid)

        return bids
Пример #5
0
    def scrape_bid_links(self, url):
        bids = []

        self.br.open(url)

        s = soupify(self.br.response().read())
        r = re.compile(r'^/purchasing/bid-form\?bidnumb=\d+')

        for a in s.findAll('a', href=r):
            bid = Bid(org=self.org)
            bid.title = a.text
            bid.url = urlparse.urljoin(self.br.geturl(), a['href'])
            bid.location = self.org.location

            d = a.findNext(text=re.compile(r'Due:'))
            d = d.split(':')[1].strip()

            try:
                r = time.strptime(d, "%B %d, %Y")
                bid.due_date = datetime.date(day=r.tm_mday, month=r.tm_mon, year=r.tm_year)
            except:
                pass

            bids.append(bid)

        return bids
Пример #6
0
    def scrape_bid_links(self, url):
        bids = []

        self.br.open(url)

        s = soupify(self.br.response().read())
        r = re.compile(r'bids\.aspx\?bidID=\d+$')
        v = re.compile(r'(\d{1,2})/(\d{1,2})/(\d{4})') # date regex

        for a in s.findAll('a', href=r):
            if a.get('style', False):
                continue

            tr = a.findParent('tr')
            td = tr.findAll('td')
            sp = td[-1].findAll('span')
            
            z = re.search(v, sp[-1].text)
            if z:
                m,d,y = z.groups()

            bid = Bid(org=self.org)
            bid.title = a.text
            bid.url = urlparse.urljoin(self.br.geturl(), a['href'])

            if z:
                bid.due_date = datetime.date(day=int(d), month=int(m), year=int(y))

            bid.location = self.org.location
            bids.append(bid)

        return bids
Пример #7
0
    def scrape_bid_links(self, url):
        bids = []

        self.br.open(url)

        s = soupify(self.br.response().read())
        r = re.compile(r'viewbid\.aspx\?bid_id=\d+$')

        for a in s.findAll('a', href=r):
            tr = a.findParent('tr')
            td = tr.findAll('td')

            z = re.search(self.date_regex, td[-2].text)
            if z:
                m,d,y = z.groups()

            bid = Bid(org=self.org)
            bid.title = td[1].text
            bid.url = urlparse.urljoin(self.br.geturl(), a['href'])

            if z:
                bid.due_date = datetime.date(day=int(d), month=int(m), year=int(y))

            bid.location = self.org.location
            bids.append(bid)

        return bids
Пример #8
0
    def scrape_bid_links(self, url):
        bids = []

        self.br.open(url)

        s = soupify(self.br.response().read())
        r = re.compile(r'^files/[^.]+\.pdf$')

        for a in s.findAll('a', href=r):
            bid = Bid(self.org)
            bid.title = a.text
            bid.url = urlparse.urljoin(self.br.geturl(), a['href'])
            bid.location = self.org.location

            x = re.compile(r'lblSubDate')
            p = a.findNext('span', id=x)

            if p:
                z = re.search(self.date_regex, p.text)
                if z:
                    m,d,y = z.groups()
                    bid.due_date = datetime.date(day=int(d), month=int(m), year=int(y))                

            bids.append(bid)

        return bids
Пример #9
0
    def scrape_bid_links(self, url):
        bids = []

        self.br.open(url)

        s = soupify(self.br.response().read())
        r = re.compile(r'viewbid\?id=\d+$')

        for a in s.findAll('a', href=r):
            tr = a.findParent('tr')
            td = tr.findAll('td')

            z = re.search(self.date_regex, td[-1].text)
            if z:
                m, d, y = z.groups()

            bid = Bid(org=self.org)
            bid.title = td[1].span.text
            bid.url = urlparse.urljoin(self.br.geturl(), a['href'])

            if z:
                bid.due_date = datetime.date(day=int(d),
                                             month=int(m),
                                             year=int(y))

            bid.location = self.org.location
            bids.append(bid)

        return bids
Пример #10
0
    def scrape_bid_links(self, url):
        bids = []

        self.br.open(url)

        s = soupify(self.br.response().read())
        r = re.compile(r'\.pdf$')
        z = re.compile(r'^RFP -')
        f = lambda x: x.name == 'a' and re.search(r, x.get('href', '')) and re.search(z, x.text)

        for a in s.findAll(f):
            bid = Bid(org=self.org)
            bid.title = a.strong.text
            bid.url = urlparse.urljoin(self.br.geturl(), a['href'])
            bid.location = self.org.location

            d = a.strong.nextSibling
            d = ' '.join(d.split())
            d = d.split(',', 1)[1].strip()
            d = d.rsplit(' ', 1)[0].strip()
            
            try:
                r = time.strptime(d, "%b %d, %Y")
                bid.due_date = datetime.date(day=r.tm_mday, month=r.tm_mon, year=r.tm_year)
            except:
                pass

            bids.append(bid)

        return bids
Пример #11
0
    def scrape_bid_links(self, url):
        bids = []

        self.br.open(url)

        s = soupify(self.br.response().read())
        x = {'summary': 'Current Bid Postings '}
        t = s.find('table', attrs=x)
        r = re.compile(r'/Bids/')

        for a in t.findAll('a', href=r):
            tr = a.findParent('tr')
            td = tr.findAll('td')

            z = re.search(self.date_regex, td[-2].text)
            if z:
                m, d, y = z.groups()

            bid = Bid(org=self.org)
            bid.title = a.text
            bid.url = urlparse.urljoin(self.br.geturl(), a['href'])

            if z:
                bid.due_date = datetime.date(day=int(d),
                                             month=int(m),
                                             year=int(y))

            bid.location = self.org.location
            bids.append(bid)

        return bids
Пример #12
0
    def scrape_bid_links(self, url):
        bids = []

        self.br.open(url)

        s = soupify(self.br.response().read())
        f = lambda x: x.name == 'a' and x.text == 'Invitation for Bid' or x.text == 'Summary'

        for a in s.findAll(f):
            t = a.previousSibling.previousSibling.previousSibling
            bid = Bid(org=self.org)
            bid.title = t
            bid.url = urlparse.urljoin(self.br.geturl(), a['href'])
            bid.location = self.org.location

            tr = a.findParent('tr')
            d = tr.find(text=re.compile(r'\bDue:'))

            if d:
                d = d.split(':')[1].strip()
                d = d.split('at')[0].strip()

                try:
                    r = time.strptime(d, "%B %d, %Y")
                    bid.due_date = datetime.date(day=r.tm_mday, month=r.tm_mon, year=r.tm_year)
                except:
                    pass

            bids.append(bid)

        return bids
Пример #13
0
    def scrape_bid_links(self, url):
        bids = []

        self.br.open(url)

        s = soupify(self.br.response().read())
        r = re.compile(r'bids\.aspx\?bidID=\d+$')
        v = re.compile(r'(\d{1,2})/(\d{1,2})/(\d{4})')  # date regex

        for a in s.findAll('a', href=r):
            if a.get('style', False):
                continue

            tr = a.findParent('tr')
            td = tr.findAll('td')
            sp = td[-1].findAll('span')

            z = re.search(v, sp[-1].text)
            if z:
                m, d, y = z.groups()

            bid = Bid(org=self.org)
            bid.title = a.text
            bid.url = urlparse.urljoin(self.br.geturl(), a['href'])

            if z:
                bid.due_date = datetime.date(day=int(d),
                                             month=int(m),
                                             year=int(y))

            bid.location = self.org.location
            bids.append(bid)

        return bids
Пример #14
0
    def scrape_bid_description(self, bid):
        self.br.open(bid.url)

        s = soupify(self.br.response().read())
        x = {'summary': 'Bid Details'}
        t = s.find('table', attrs=x)

        c = re.compile(r'^Contact', re.IGNORECASE)
        f = lambda x: x.name == 'span' and 'BidListHeader' in x.attrs.get(
            'class', []) and re.search(c, x.text)
        p = t.find(f)

        if p:
            tr = p.findNext('tr')
            bid.contact = tr.text.strip()

            e = re.compile(r'\b[A-Z0-9._%+-]+@[A-Z0-9.-]+\.[A-Z]{2,4}\b',
                           re.IGNORECASE)  # email regex
            v = tr.find(text=e)

            if v:
                m = re.search(e, v)
                bid.email = m.group(0)

        bid.description = get_all_text(t)
        bid.save()
Пример #15
0
    def scrape_bid_links(self, url):
        bids = []

        self.br.open(url)

        s = soupify(self.br.response().read())
        r = re.compile(r'\bRFP[^.]+\.pdf$')

        for a in s.findAll('a', href=r):
            tr = a.findParent('tr')
            td = tr.findAll('td')

            bid = Bid(org=self.org)
            bid.title = a.text
            bid.url = urlparse.urljoin(self.br.geturl(), a['href'])
            bid.location = self.org.location
            
            d = td[-1].text.split(',', 1)[1]
            d = d.split('at')[0].strip()

            try:
                r = time.strptime(d, "%B %d, %Y")
                bid.due_date = datetime.date(day=r.tm_mday, month=r.tm_mon, year=r.tm_year)
            except:
                pass

            bids.append(bid)

        return bids
Пример #16
0
    def scrape_bid_links(self, url):
        bids = []

        self.br.open(url)

        s = soupify(self.br.response().read())
        r = re.compile(r'\bRFP[^.]+\.pdf$')

        for a in s.findAll('a', href=r):
            tr = a.findParent('tr')
            td = tr.findAll('td')

            bid = Bid(org=self.org)
            bid.title = a.text
            bid.url = urlparse.urljoin(self.br.geturl(), a['href'])
            bid.location = self.org.location

            d = td[-1].text.split(',', 1)[1]
            d = d.split('at')[0].strip()

            try:
                r = time.strptime(d, "%B %d, %Y")
                bid.due_date = datetime.date(day=r.tm_mday,
                                             month=r.tm_mon,
                                             year=r.tm_year)
            except:
                pass

            bids.append(bid)

        return bids
Пример #17
0
    def scrape_bid_links(self, url):
        bids = []

        self.br.open(url)

        s = soupify(self.br.response().read())
        x = {'summary': 'Current Bid Postings '}
        t = s.find('table', attrs=x)
        r = re.compile(r'/Bids/')

        for a in t.findAll('a', href=r):
            tr = a.findParent('tr')
            td = tr.findAll('td')

            z = re.search(self.date_regex, td[-2].text)
            if z:
                m,d,y = z.groups()

            bid = Bid(org=self.org)
            bid.title = a.text
            bid.url = urlparse.urljoin(self.br.geturl(), a['href'])

            if z:
                bid.due_date = datetime.date(day=int(d), month=int(m), year=int(y))

            bid.location = self.org.location
            bids.append(bid)

        return bids
Пример #18
0
    def scrape_bid_links(self, url):
        bids = []

        self.br.open(url)

        s = soupify(self.br.response().read())
        r = re.compile(r'^files/[^.]+\.pdf$')

        for a in s.findAll('a', href=r):
            bid = Bid(self.org)
            bid.title = a.text
            bid.url = urlparse.urljoin(self.br.geturl(), a['href'])
            bid.location = self.org.location

            x = re.compile(r'lblSubDate')
            p = a.findNext('span', id=x)

            if p:
                z = re.search(self.date_regex, p.text)
                if z:
                    m, d, y = z.groups()
                    bid.due_date = datetime.date(day=int(d),
                                                 month=int(m),
                                                 year=int(y))

            bids.append(bid)

        return bids
Пример #19
0
    def scrape_bid_links(self, url):
        bids = []

        self.br.open(url)

        s = soupify(self.br.response().read())
        r = re.compile(r'^/purchasing/bid-form\?bidnumb=\d+')

        for a in s.findAll('a', href=r):
            bid = Bid(org=self.org)
            bid.title = a.text
            bid.url = urlparse.urljoin(self.br.geturl(), a['href'])
            bid.location = self.org.location

            d = a.findNext(text=re.compile(r'Due:'))
            d = d.split(':')[1].strip()

            try:
                r = time.strptime(d, "%B %d, %Y")
                bid.due_date = datetime.date(day=r.tm_mday,
                                             month=r.tm_mon,
                                             year=r.tm_year)
            except:
                pass

            bids.append(bid)

        return bids
Пример #20
0
    def scrape_bid_links(self, url):
        bids = []

        self.br.open(url)

        s = soupify(self.br.response().read())
        r = re.compile(r'^BidBoard\.cfm\?BidID=\d+$')

        for a in s.findAll('a', href=r):
            tr = a.findParent('tr')
            td = tr.findAll('td')

            bid = Bid(org=self.org)
            bid.title = a.text
            bid.url = urlparse.urljoin(self.br.geturl(), a['href'])
            bid.location = self.org.location

            v = re.compile(
                r'(\d{1,2})[/.](\d{1,2})[/.](\d{2,4})')  # date regex
            z = re.search(v, td[-4].text)

            if z:
                m, d, y = z.groups()
                bid.due_date = datetime.date(day=int(d),
                                             month=int(m),
                                             year=int(y))

            bids.append(bid)

        return bids
Пример #21
0
    def scrape_bid_links(self, url):
        bids = []

        self.br.open(url)

        s = soupify(self.br.response().read())
        f = lambda x: x.name == 'a' and x.text == 'Invitation for Bid' or x.text == 'Summary'

        for a in s.findAll(f):
            t = a.previousSibling.previousSibling.previousSibling
            bid = Bid(org=self.org)
            bid.title = t
            bid.url = urlparse.urljoin(self.br.geturl(), a['href'])
            bid.location = self.org.location

            tr = a.findParent('tr')
            d = tr.find(text=re.compile(r'\bDue:'))

            if d:
                d = d.split(':')[1].strip()
                d = d.split('at')[0].strip()

                try:
                    r = time.strptime(d, "%B %d, %Y")
                    bid.due_date = datetime.date(day=r.tm_mday,
                                                 month=r.tm_mon,
                                                 year=r.tm_year)
                except:
                    pass

            bids.append(bid)

        return bids
Пример #22
0
    def scrape_bid_links(self, url):
        bids = []

        self.br.open(url)

        s = soupify(self.br.response().read())
        y = re.compile(r'^/LinkClick\.aspx\?')
        z = re.compile(r'^Bid:')
        f = lambda x: x.name == 'a' and re.search(y, x.get('href', '')) and re.search(z, x.text)

        for a in s.findAll(f):
            td = a.findParent('td')
            t = td.find(text='Title:')
            t = t.parent.nextSibling

            bid = Bid(org=self.org)
            bid.title = t
            bid.url = urlparse.urljoin(self.br.geturl(), a['href'])
            bid.url = urllib.quote(bid.url, ':/')
            bid.location = self.org.location

            d = a.findNext(text=re.compile(r'^Close Date:'))
            d = d.parent.nextSibling.strip()

            try:
                r = time.strptime(d, "%B %d, %Y")
                bid.due_date = datetime.date(day=r.tm_mday, month=r.tm_mon, year=r.tm_year)
            except:
                pass

            bids.append(bid)

        return bids
Пример #23
0
    def scrape_bid_description(self, bid):
        self.br.open(bid.url)

        d = self.br.response().read()
        s = soupify(doctohtml(d))

        bid.description = get_all_text(s.html.body)
        bid.save()
Пример #24
0
    def scrape_bid_description(self, bid):
        self.br.open(bid.url)

        d = self.br.response().read()
        s = soupify(pdftohtml(d))

        bid.description = get_all_text(s.html.body)
        bid.save()
Пример #25
0
    def scrape_bid_description(self, bid):
        self.br.open(bid.url)

        s = soupify(self.br.response().read())
        h = s.find('h1')
        t = h.findParent('table')

        bid.description = get_all_text(t)
        bid.save()
Пример #26
0
    def scrape_bid_description(self, bid):
        self.br.open(bid.url)

        s = soupify(self.br.response().read())
        x = {'class': 'ms-formtable'}
        t = s.find('table', attrs=x)

        bid.description = get_all_text(t)
        bid.save()
Пример #27
0
    def scrape_bid_description(self, bid):
        self.br.open(bid.url)

        s = soupify(self.br.response().read())
        r = re.compile(r'PurchasingBids')
        t = s.find('table', id=r)

        bid.description = get_all_text(t)
        bid.save()
Пример #28
0
    def scrape_bid_description(self, bid):
        self.br.open(bid.url)

        s = soupify(self.br.response().read())
        x = {'class': 'ms-formtable'}
        t = s.find('table', attrs=x)

        bid.description = get_all_text(t)
        bid.save()
Пример #29
0
    def scrape_bid_description(self, bid):
        self.br.open(bid.url)

        s = soupify(self.br.response().read())
        h = s.find('h1')
        t = h.findParent('table')

        bid.description = get_all_text(t)
        bid.save()
Пример #30
0
    def scrape_bid_description(self, bid):
        self.br.open(bid.url)

        s = soupify(self.br.response().read())
        x = {'class': 'bidViewResultsLeft'}
        t = s.find('td', attrs=x)

        bid.description = get_all_text(t)
        bid.save()
Пример #31
0
    def scrape_bid_description(self, bid):
        self.br.open(bid.url)

        s = soupify(self.br.response().read())
        x = {'class': 'content'}
        d = s.find('div', attrs=x)

        bid.description = get_all_text(d)
        bid.save()
Пример #32
0
    def scrape_bid_description(self, bid):
        self.br.open(bid.url)

        s = soupify(self.br.response().read())
        x = {'class': 'content'}
        d = s.find('div', attrs=x)

        bid.description = get_all_text(d)
        bid.save()
Пример #33
0
    def scrape_bid_description(self, bid):
        self.br.open(bid.url)

        s = soupify(self.br.response().read())
        x = {'class': 'bidViewResultsLeft'}
        t = s.find('td', attrs=x)

        bid.description = get_all_text(t)
        bid.save()
Пример #34
0
    def scrape_bid_description(self, bid):
        self.br.open(bid.url)

        s = soupify(self.br.response().read())
        t = s.find(text=re.compile(r'^Contact:'))

        if t:
            p = t.findParent('p')
            bid.contact = get_all_text(p)

        f = lambda x: x.name == 'a' and x.text == 'Download Bid Package'
        a = s.find(f)
        u = urlparse.urljoin(self.br.geturl(), a['href'])

        self.br.open(u)

        d = self.br.response().read()
        s = soupify(pdftohtml(d))

        bid.description = get_all_text(s.html.body)
        bid.save()
Пример #35
0
    def scrape_bid_links(self, url):
        bids = []

        self.br.open(url)

        pageno = 2
        done = False

        while True:
            s = soupify(self.br.response().read())
            r = re.compile(r'^/node/\d+$')

            for a in s.findAll('a', href=r):
                li = a.findParent('li')
                x = {'class': 'views-field-title'}
                title_div = li.find('div', attrs=x)

                y = {'class': 'views-field-field-closing-date-value'}
                closing_date_div = li.find('div', attrs=y)

                z = re.compile(r'(\d{2})/(\d{2})/(\d{4})')
                x = re.search(z, closing_date_div.span.text)

                # Stop once we start seeing bids whose closing date is later
                # than the current date
                m,d,y = [int(n) for n in x.groups()]

                closing_date = datetime.date(y,m,d)
                today = datetime.date.today()

                # If we've passed the closing date we're done
                if today > closing_date:
                    done = True
                    break

                bid = Bid(org=self.org)
                bid.title = title_div.span.text
                bid.url = urlparse.urljoin(self.br.geturl(), a['href'])
                bid.location = self.org.location
                bids.append(bid)

            if done:
                break

            try:
                self.br.follow_link(self.br.find_link(text='%d' % pageno))
                pageno += 1
            except mechanize.LinkNotFoundError:
                break

        return bids
Пример #36
0
    def scrape_bid_links(self, url):
        bids = []

        self.br.open(url)

        s = soupify(self.br.response().read())
        ul = s.find('ul', id='toplevel')

        for a in ul.findAll('a'):
            bid = Bid(org=self.org)
            bid.title = a.text
            bid.url = urlparse.urljoin(self.br.geturl(), a['href'])
            bid.location = self.org.location
            bids.append(bid)

        return bids
Пример #37
0
    def scrape_bid_links(self, url):
        bids = []

        self.br.open(url)

        s = soupify(self.br.response().read())
        r = re.compile(r'bid[^.]+\.pdf$')

        for a in s.findAll('a', href=r):
            bid = Bid(org=self.org)
            bid.title = a.text
            bid.url = urlparse.urljoin(self.br.geturl(), a['href'])
            bid.location = self.org.location
            bids.append(bid)

        return bids
Пример #38
0
    def scrape_bid_links(self, url):
        bids = []

        self.br.open(url)

        s = soupify(self.br.response().read())
        ul = s.find('ul', id='toplevel')

        for a in ul.findAll('a'):
            bid = Bid(org=self.org)
            bid.title = a.text
            bid.url = urlparse.urljoin(self.br.geturl(), a['href'])
            bid.location = self.org.location
            bids.append(bid)

        return bids
Пример #39
0
    def scrape_bid_description(self, bid):
        self.br.open(bid.url)

        s = soupify(self.br.response().read())
        r = re.compile(r'^Agent')
        f = lambda x: x.name == 'b' and re.search(r, x.text)
        b = s.find(f)

        if b:
            t = b.findParent('table')
            bid.contact = get_all_text(t)

        b = s.find('blockquote')

        bid.description = get_all_text(b)
        bid.save()
Пример #40
0
    def scrape_bid_links(self, url):
        bids = []

        self.br.open(url)

        s = soupify(self.br.response().read())
        r = re.compile(r'bid[^.]+\.pdf$')

        for a in s.findAll('a', href=r):
            bid = Bid(org=self.org)
            bid.title = a.text
            bid.url = urlparse.urljoin(self.br.geturl(), a['href'])
            bid.location = self.org.location
            bids.append(bid)

        return bids
Пример #41
0
    def scrape_bid_links(self, url):
        bids = []

        self.br.open(url)

        s = soupify(self.br.response().read())
        d = s.find('div', id=re.compile(r'^_ctl\d+_content$'))
        r = re.compile(r'^Modules/ShowDocument\.aspx\?documentid=\d+$')

        for a in d.findAll('a', href=r):
            bid = Bid(org=self.org)
            bid.title = a.text
            bid.url = urlparse.urljoin(self.br.geturl(), a['href'])
            bid.location = self.org.location
            bids.append(bid)

        return bids
Пример #42
0
    def scrape_bid_links(self, url):
        bids = []

        self.br.open(url)

        s = soupify(self.br.response().read())
        r = re.compile(r'^/DocumentView\.aspx\?DID=\d+$')
        x = {'href': r, 'class': 'Hyperlink', 'title': True}

        for a in s.findAll('a', attrs=x):
            bid = Bid(org=self.org)
            bid.title = a['title']
            bid.url = urlparse.urljoin(self.br.geturl(), a['href'])
            bid.location = self.org.location
            bids.append(bid)

        return bids
Пример #43
0
    def scrape_bid_links(self, url):
        bids = []

        self.br.open(url)

        s = soupify(self.br.response().read())
        r = re.compile(r'^/DocumentView\.aspx\?DID=\d+$')
        x = {'href': r, 'class': 'Hyperlink', 'title': True}

        for a in s.findAll('a', attrs=x):
            bid = Bid(org=self.org)
            bid.title = a['title']
            bid.url = urlparse.urljoin(self.br.geturl(), a['href'])
            bid.location = self.org.location
            bids.append(bid)

        return bids
Пример #44
0
    def scrape_bid_links(self, url):
        bids = []

        self.br.open(url)

        s = soupify(self.br.response().read())
        d = s.find('div', id=re.compile(r'^_ctl\d+_content$'))
        r = re.compile(r'^Modules/ShowDocument\.aspx\?documentid=\d+$')

        for a in d.findAll('a', href=r):
            bid = Bid(org=self.org)
            bid.title = a.text
            bid.url = urlparse.urljoin(self.br.geturl(), a['href'])
            bid.location = self.org.location
            bids.append(bid)

        return bids
Пример #45
0
    def scrape_bid_links(self, url):
        bids = []

        self.br.open(url)

        s = soupify(self.br.response().read())

        r1 = re.compile(r'\.pdf$')
        r2 = re.compile(r'window\.open')

        x = {'href': r1, 'onclick': r2}

        for a in s.findAll('a', attrs=x):
            if not a.previous.strip() == 'Bid Number:':
                continue

            title = a.findPrevious(text=re.compile(r'Title:'))
            title = re.sub(r'Title:', '', title)

            bid = Bid(org=self.org)
            bid.title = title
            bid.url = urlparse.urljoin(self.br.geturl(), a['href'])
            bid.location = self.org.location

            d = a.findNext(text=re.compile(r'^Due Date/Time:'))
            d = d.split(':')[1].strip()
            d = d.split('@')[0].strip()

            try:
                r = time.strptime(d, "%B %d, %Y")
                bid.due_date = datetime.date(day=r.tm_mday,
                                             month=r.tm_mon,
                                             year=r.tm_year)
            except:
                pass

            tr = a.findParent('tr')
            p = tr.find(text=re.compile(r'Project Manager:'))

            if p:
                bid.contact = p.parent.text

            bids.append(bid)

        return bids
Пример #46
0
    def scrape_bid_links(self, url):
        bids = []

        self.br.open(url)

        s = soupify(self.br.response().read())
        r1 = re.compile(r'^RFP')
        r2 = re.compile(r'\.pdf$')
        f = lambda x: x.name == 'a' and re.search(r2, x.get('href', '')) and re.search(r1, x.text)

        for a in s.findAll(f):
            bid = Bid(org=self.org)
            bid.title = a.text
            bid.url = urlparse.urljoin(self.br.geturl(), a['href'])
            bid.location = self.org.location
            bids.append(bid)

        return bids
Пример #47
0
    def scrape_bid_links(self, url):
        bids = []

        self.br.open(url)

        s = soupify(self.br.response().read())
        r = re.compile(r'^_ctl\d+_listDataGrid$')
        t = s.find('table', id=r)
        r = re.compile(r'^index\.aspx\?recordid=\d+')

        for a in t.findAll('a', href=r):
            bid = Bid(org=self.org)
            bid.title = a.text
            bid.url = urlparse.urljoin(self.br.geturl(), a['href'])
            bid.location = self.org.location
            bids.append(bid)

        return bids
Пример #48
0
    def scrape_bid_description(self, bid):
        self.br.open(bid.url)
        
        s = soupify(self.br.response().read())
        x = {'class': 'body-content'}
        y = {'class': 'promo'}
        n = s.find('section', attrs=y)

        bid.contact = n.text

        m = re.search(self.email_regex, n.text)
        if m:
            bid.email = m.group(0)

        d = s.find('div', attrs=x)

        bid.desc = get_all_text(d)
        bid.save()
Пример #49
0
    def scrape_bid_links(self, url):
        bids = []

        self.br.open(url)

        s = soupify(self.br.response().read())
        r = re.compile(r'\.pdf$')
        f = lambda x: x.name == 'a' and re.search(r, x.get('href', '')) and x.text == 'Read more'

        for a in s.findAll(f):
            h3 = a.findPrevious('h3')
            bid = Bid(org=self.org)
            bid.title = h3.text
            bid.url = urlparse.urljoin(self.br.geturl(), a['href'])
            bid.location = self.org.location
            bids.append(bid)

        return bids
Пример #50
0
    def scrape_bid_links(self, url):
        bids = []

        self.br.open(url)

        s = soupify(self.br.response().read())
        d = s.find('div', id='mainContent')
        r = re.compile(r'\.pdf$')

        for a in d.findAll('a', href=r):
            bid = Bid(org=self.org)
            bid.title = a.text
            bid.url = urlparse.urljoin(self.br.geturl(), a['href'])
            bid.url = urllib.quote(bid.url, ':/')
            bid.location = self.org.location
            bids.append(bid)

        return bids
Пример #51
0
    def scrape_bid_links(self, url):
        bids = []

        self.br.open(url)

        s = soupify(self.br.response().read())
        r = re.compile(r'^_ctl\d+_listDataGrid$')
        t = s.find('table', id=r)
        r = re.compile(r'^index\.aspx\?recordid=\d+')

        for a in t.findAll('a', href=r):
            bid = Bid(org=self.org)
            bid.title = a.text
            bid.url = urlparse.urljoin(self.br.geturl(), a['href'])
            bid.location = self.org.location
            bids.append(bid)

        return bids
Пример #52
0
    def scrape_bid_links(self, url):
        bids = []

        self.br.open(url)

        s = soupify(self.br.response().read())
        r = re.compile(r'center\.egov\?path=doc&id=\d+&id2=\d+&linked=0')

        for a in s.findAll('a', href=r):
            if len(a.text) == 0:
                continue

            bid = Bid(org=self.org)
            bid.title = a.text
            bid.url = urlparse.urljoin(self.br.geturl(), a['href'])
            bid.location = self.org.location
            bids.append(bid)

        return bids
Пример #53
0
    def scrape_bid_links(self, url):
        bids = []

        self.br.open(url)

        s = soupify(self.br.response().read())
        r = re.compile(r'/Currentbids/[^.]+\.pdf$')
        x = {'class': 'learn-more-content'}
        d = s.find('div', attrs=x)
        d.extract()

        for a in d.findAll('a', href=r):
            bid = Bid(org=self.org)
            bid.title = a.text
            bid.url = urlparse.urljoin(self.br.geturl(), a['href'])
            bid.location = self.org.location
            bids.append(bid)

        return bids
Пример #54
0
    def scrape_bid_links(self, url):
        bids = []

        self.br.open(url)

        s = soupify(self.br.response().read())

        r1 = re.compile(r'\.pdf$')
        r2 = re.compile(r'window\.open')

        x = {'href': r1, 'onclick': r2}

        for a in s.findAll('a', attrs=x):
            if not a.previous.strip() == 'Bid Number:':
                continue

            title = a.findPrevious(text=re.compile(r'Title:'))
            title = re.sub(r'Title:', '', title)

            bid = Bid(org=self.org)
            bid.title = title
            bid.url = urlparse.urljoin(self.br.geturl(), a['href'])
            bid.location = self.org.location

            d = a.findNext(text=re.compile(r'^Due Date/Time:'))
            d = d.split(':')[1].strip()
            d = d.split('@')[0].strip()

            try:
                r = time.strptime(d, "%B %d, %Y")
                bid.due_date = datetime.date(day=r.tm_mday, month=r.tm_mon, year=r.tm_year)
            except:
                pass

            tr = a.findParent('tr')
            p = tr.find(text=re.compile(r'Project Manager:'))

            if p:
                bid.contact = p.parent.text

            bids.append(bid)

        return bids