def _format_data(self, fee, list): urls = [] listings = [] for item in list: if 'NumPosts' in item: urls.append(item['url']) continue bedrooms = int(float(item['Bedrooms'])) if bedrooms != 1 or 'studio' in item['PostingTitle'].lower(): continue apartment = Apartment( SOURCE, item['PostingTitle'], int(float(item['Ask'])), BASE_URL + item['PostingURL']) apartment.set_location(item['Latitude'], item['Longitude']) apartment.set_posting_timestamp(item['PostedDate']) apartment.set_has_fee(fee == Fees.FEE) listings.append(apartment) map(lambda listing: self._load_more_data(listing), listings) return (listings, urls)
def _find_listing(self, s): (url, s) = html_helper.advance_and_find(s, TITLE_PLACE_MARKER, 'href="', '"') (title, s) = html_helper.find_in_between(s, '>', '<') if url == None or title == None: return (None, s) title = html_helper.strip_tags(title) (price, s) = html_helper.advance_and_find(s, 'color-fg-green', '$', '<') price = int(float(price.strip().replace(',', ''))) (_, s) = html_helper.advance_and_find(s, '<td', '', '<div') (recency, s) = html_helper.advance_and_find(s, '"bold font-size-100"', '>', '</div') recency = html_helper.strip_tags(recency).lower() dt = self._understand_recency(recency, url) listing = Apartment(SOURCE, title, price, url) listing.set_posting_timestamp(dt.strftime('%s')) return (listing, s)