def get_data_projects(url): try: session = requests.Session() adapter = requests.adapters.HTTPAdapter(max_retries=20) session.mount('https://', adapter) session.mount('http://', adapter) session.proxies.update(get_proxy()) # r = session.get(url, headers=headers_xml, timeout=30) r = session.get(url, headers=headers, timeout=20) if r.status_code != 200: # logger.debug('status_code', r.status_code) return None soup = BeautifulSoup(r.content, 'html.parser') project_links = soup.find('div', {'id': 'search-results'}).find_all('a') result = set() for link in project_links: if link['href']: result.add(link['href']) # logger.debug(result, len(result)) except requests.exceptions.RequestException as e: # logger.debug(e) return None
def get_data_graph(url, month_target_1, month_target_2): """ 13.07 add: Area's median sale price/sqm 1 year ago, Area's median sale price/sqm 8 months ago """ try: session = requests.Session() adapter = requests.adapters.HTTPAdapter(max_retries=20) session.mount('https://', adapter) session.mount('http://', adapter) session.proxies.update(get_proxy()) # r = session.get(url, headers=headers_xml, timeout=30) # print(url) r = session.get(url, headers=headers_xml, timeout=35) if r.status_code != 200: logger.debug('status_code', r.status_code) return None data = r.json()['msg'] # print(r.text) try: median_sale_prices_sqm = data.split("'sqmSale': {")[1].split( "data: [")[1].split("],")[0].split(',') except: median_sale_prices_sqm = [] try: months = data.split("labels: [")[1].split("],")[0].split(",") months = [x.replace('"', '') for x in months] logger.debug("months {}".format(len(months))) target_month_position = months.index(month_target_1) logger.debug(target_month_position) result1 = median_sale_prices_sqm[target_month_position] except: result1 = '' try: median_rent_prices_sqm = data.split("'sqmRent': {")[1].split( "data:[")[1].split("],")[0].split(',') target_month_position_2 = months.index(month_target_2) result2 = median_rent_prices_sqm[target_month_position_2] except: result2 = '' try: area_median_sale_price_sqm_1_year_ago = median_sale_prices_sqm[ -13:-12][0] #print(area_median_sale_price_sqm_1_year_ago) except: area_median_sale_price_sqm_1_year_ago = '' try: area_median_sale_price_sqm_8_month_ago = median_sale_prices_sqm[ -9:-8][0] #print(area_median_sale_price_sqm_8_month_ago) except: area_median_sale_price_sqm_8_month_ago = '' #try: # proj_media_sale_price = \ # data.split('borderColor: "rgb(215, 219, 221)",')[1].split("label: htmlDecode('Average enquiry sale price/sqm.'")[0].replace("data: [", "").replace("],", "").replace("\n", "").replace("\t", "").split(',') # proj_media_sale_price = [d.strip(" ") for d in proj_media_sale_price] # # print(proj_media_sale_price) #except: # proj_media_sale_price = [] # #try: # proj_media_sale_price_sqm_1_year_ago = proj_media_sale_price[-12] #except: # proj_media_sale_price_sqm_1_year_ago = '' # #try: # proj_media_sale_price_sqm_8_month_ago = proj_media_sale_price[-8] #except: # proj_media_sale_price_sqm_8_month_ago = '' # logger.debug(f'{r.url}:\n\t Result: {result1}; {result2}; {area_median_sale_price_sqm_1_year_ago}; {area_median_sale_price_sqm_8_month_ago};') # {proj_media_sale_price_sqm_1_year_ago}; {proj_media_sale_price_sqm_8_month_ago}') """ The data in these columns should be interchanged, so the 3 digit numbers in here and the more than 5 -6 digit numbers in column CC """ return result2, result1, area_median_sale_price_sqm_1_year_ago, area_median_sale_price_sqm_8_month_ago, # proj_media_sale_price_sqm_1_year_ago, proj_media_sale_price_sqm_8_month_ago except: return '', '', '', ''
def get_data_projects(urls): links_to_save = [] column_i = 2 # result = [] for url in urls: result = [] # print(url) session = requests.Session() adapter = requests.adapters.HTTPAdapter(max_retries=20) session.mount('https://', adapter) session.mount('http://', adapter) session.proxies.update(get_proxy()) #r = session.get(url, headers=headers_xml, timeout=30) r = session.get(url, headers=headers, timeout=20) if r.status_code != 200: print('status_code', r.status_code) return None soup = BeautifulSoup(r.content, 'html.parser') try: number_of_projects = int( soup.find('span', { 'id': 'properties_total' }).text) except: number_of_projects = 0 # print('number_of_projects', number_of_projects) result.append(number_of_projects) if number_of_projects > 0: try: count_pages = math.ceil(number_of_projects / 20) except: count_pages = 0 # print('count_pages', count_pages) project_links = soup.find('div', { 'id': 'search-results' }).find_all('a') for link in project_links: if link['href'] and link['href'] not in result: result.append(link['href']) links_to_save.append(link['href']) if count_pages > 1: for i in range(2, count_pages + 1): url_n_page = "{}?page={}".format(url, i) # print(url_n_page) # https://www.dotproperty.co.th/en/condos/all/Chiang-Mai?page=2 r = session.get(url_n_page, headers=headers, timeout=20) if r.status_code != 200: print('status_code', r.status_code) return None # TODO: add attempts here and everywhere in project soup = BeautifulSoup(r.content, 'html.parser') project_links = soup.find('div', { 'id': 'search-results' }).find_all('a') for link in project_links: if link['href'] and link['href'] not in result: result.append(link['href']) links_to_save.append(link['href']) # print(result, len(result)) # post here wks.update_col(column_i, result, row_offset=2) column_i += 1 # return result # TODO: Save to file cont file_object = open('data/links_th_condo_thai_prov.json', 'w') # Thai province URLs (condominums) json.dump(links_to_save, file_object, indent=4)
def get_data_condo(url): if url == '/cdn-cgi/l/email-protection': return "", "", "", "", "" try: session = requests.Session() adapter = requests.adapters.HTTPAdapter(max_retries=20) session.mount('https://', adapter) session.mount('http://', adapter) session.proxies.update(get_proxy()) # r = session.get(url, headers=headers_xml, timeout=30) r = session.get(url, headers=headers, timeout=20) if r.status_code != 200: logger.debug('status_code', r.status_code, url) return ["", "", "", "", ""] soup = BeautifulSoup(r.content, 'html.parser') # print(soup) if input_tab_name.split(" ")[0] == "Vietnam": # print("vi") session = requests.Session() adapter = requests.adapters.HTTPAdapter(max_retries=20) session.mount('https://', adapter) session.mount('http://', adapter) session.proxies.update(get_proxy()) vi_url = url.replace("/en", "") vi_r = session.get(vi_url, headers=headers, timeout=20) # print(vi_url) vi_soup = BeautifulSoup(vi_r.content, 'html.parser') try: breadcrumbs = vi_soup.find('ol', {'class': 'breadcrumb'}).find_all('li') except: breadcrumbs = '' try: province = breadcrumbs[3].text.strip() except: province = '' if len(breadcrumbs) > 5: city = breadcrumbs[4].text.strip() # print(city) else: city = '' if len(breadcrumbs) > 6: area = breadcrumbs[5].text.strip() else: area = '' else: try: breadcrumbs = soup.find('ol', {'class': 'breadcrumb'}).find_all('li') except: breadcrumbs = '' try: province = breadcrumbs[3].text.strip() except: province = '' if len(breadcrumbs) > 5: city = breadcrumbs[4].text.strip() else: city = '' if len(breadcrumbs) > 6: area = breadcrumbs[5].text.strip() else: area = '' try: try: condo_name = soup.find('div', {'class': 'row top-navigation-bar add-padding'}).find('a').text.strip() except: condo_name = '' try: developer_name = soup.find('div', {'class': 'col-sm-6 nav-top-btngroups text-right'}).find('li').find( 'p').text.strip() except: developer_name = '' # total_units = soup.find('div', {'class': 'col-md-12 col-lg-8 project-content'}).find('section').text.strip().split('contains ')[1].split(' total')[0] total_units_raw = \ soup.find('div', {'class': 'col-md-12 col-lg-8 project-content'}).find('section').text.strip().split( ' total units')[0].split(' ')[-1] if any(char.isdigit() for char in total_units_raw): total_units = total_units_raw else: total_units = '' except: total_units = '' try: units_for_rent = soup.find('a', {'id': 'open-tab-rent'}).text.strip().split(' ')[0] except: units_for_rent = '' number_of_studios = scrape_room_types_prices(soup) start_from, sqm = scrape_rent_units_listing(soup) lowest_ask_price, sqm_ask = scrape_rent_sale_listing(soup) other_projects_nearby = scrape_other_projects_nearby(soup) popular_condos_in_area = scrape_popular_condos_in_area(soup) room_types_prices = scrape_room_types_prices_ext(soup) s_n_of_units_for_rent, s_av_rent, s_av_ask_price, bd1_n_of_units_for_rent, bd1_av_rent, bd1_av_ask_price, bd2_n_of_units_for_rent, bd2_av_rent, bd2_av_ask_price, bd3_n_of_units_for_rent, bd3_av_rent, bd3_av_ask_price, bd4_n_of_units_for_rent, bd4_av_rent, bd4_av_ask_price = '', '', '', '', '', '', '', '', '', '', '', '', '', '', '' s_n_of_units_for_sale, bd1_n_of_units_for_sale, bd2_n_of_units_for_sale, bd3_n_of_units_for_sale, bd4_n_of_units_for_sale = '', '', '', '', '' s_size, bd1_size, bd2_size, bd3_size, bd4_size = '', '', '', '', '' for t in room_types_prices: if t['type'] == 'Studio': s_size = t['size'] s_n_of_units_for_rent = t['number of units for rent'] s_av_rent = t['average rent'] s_av_ask_price = t['average ask price'] s_n_of_units_for_sale = t['number of units for sale'] if t['type'] == '1 Bedroom' or t['type'] == '1 Phòng Ngủ': bd1_size = t['size'] bd1_n_of_units_for_rent = t['number of units for rent'] bd1_av_rent = t['average rent'] bd1_av_ask_price = t['average ask price'] bd1_n_of_units_for_sale = t['number of units for sale'] if t['type'] == '2 Bedrooms' or t['type'] == '2 Phòng Ngủ': bd2_size = t['size'] bd2_n_of_units_for_rent = t['number of units for rent'] bd2_av_rent = t['average rent'] bd2_av_ask_price = t['average ask price'] bd2_n_of_units_for_sale = t['number of units for sale'] if t['type'] == '3 Bedrooms' or t['type'] == '3 Phòng Ngủ': bd3_size = t['size'] bd3_n_of_units_for_rent = t['number of units for rent'] bd3_av_rent = t['average rent'] bd3_av_ask_price = t['average ask price'] bd3_n_of_units_for_sale = t['number of units for sale'] if t['type'] == '4 Bedrooms' or t['type'] == '4 Phòng Ngủ': bd4_size = t['size'] bd4_n_of_units_for_rent = t['number of units for rent'] bd4_av_rent = t['average rent'] bd4_av_ask_price = t['average ask price'] bd4_n_of_units_for_sale = t['number of units for sale'] graph_link = resolve_graph_link(url) # logger.debug(graph_link) try: median_rent_price_sqm, median_sale_price_sqm, earliest_median_sale_price_sqm, earliest_month, \ earliest_median_rent_price_sqm, earliest_month_1, project_median_sale_price_sqm_1_year_ago, \ project_median_sale_price_sqm_8_month_ago, median_sale_price = get_data_graph(graph_link) except TypeError as err: median_rent_price_sqm, median_sale_price_sqm, earliest_median_sale_price_sqm, earliest_month, \ earliest_median_rent_price_sqm, earliest_month_1, project_median_sale_price_sqm_1_year_ago, \ project_median_sale_price_sqm_8_month_ago, median_sale_price = '', '', '', '', '', '', '', '', '' logger.debug(err) logger.debug("get data graph err") # logger.debug('DEBUG s_n_of_units_for_rent:', s_n_of_units_for_rent) try: gps_str = soup.find('a', {'id': 'go-to-map-mobile'}).find('img')['src'].split('map_')[1].split('.jpg')[0] gps_lat, gps_long = gps_str.split('_') except: gps_lat, gps_long = '', '' result_bulk_1 = { 'url': url, 'condo_name': condo_name, 'developer_name': developer_name, 'province': province, 'city': city } # print(result_bulk_1) result_bulk_2 = { 'area': area, } result_bulk_3 = { 'median_rent_price_sqm': median_rent_price_sqm, # L 's_size': s_size, # 'bd1_size': bd1_size, 'bd2_size': bd2_size, 'bd3_size': bd3_size, 'bd4_size': bd4_size, # Q 'median_sale_price_sqm': median_sale_price_sqm, 's_n_of_units_for_sale': s_n_of_units_for_sale, 'bd1_n_of_units_for_sale': bd1_n_of_units_for_sale, 'bd2_n_of_units_for_sale': bd2_n_of_units_for_sale, 'bd3_n_of_units_for_sale': bd3_n_of_units_for_sale, 'bd4_n_of_units_for_sale': bd4_n_of_units_for_sale, # W 'earliest_median_rent_price_sqm': earliest_median_rent_price_sqm, # X 'earliest_month_1': earliest_month_1, 'earliest_median_sale_price_sqm': earliest_median_sale_price_sqm, # Z 'earliest_month': earliest_month, 'total_units': total_units, # AB col 'part2_28_09-1': s_n_of_units_for_rent, 'part2_28_09-2': check_outlier(s_av_rent, filter_values_int[0], filter_values_int[1]), 'part2_28_09-3': check_outlier(s_av_ask_price, filter_values_int[2], filter_values_int[3]), 'part2_28_09-4': bd1_n_of_units_for_rent, 'part2_28_09-5': check_outlier(bd1_av_rent, filter_values_int[4], filter_values_int[5]), 'part2_28_09-6': check_outlier(bd1_av_ask_price, filter_values_int[6], filter_values_int[7]), 'part2_28_09-7': bd2_n_of_units_for_rent, 'part2_28_09-8': check_outlier(bd2_av_rent, filter_values_int[8], filter_values_int[9]), 'part2_28_09-9': check_outlier(bd2_av_ask_price, filter_values_int[10], filter_values_int[11]), 'part2_28_09-10': bd3_n_of_units_for_rent, 'part2_28_09-11': check_outlier(bd3_av_rent, filter_values_int[12], filter_values_int[13]), 'part2_28_09-12': check_outlier(bd3_av_ask_price, filter_values_int[14], filter_values_int[15]), 'part2_28_09-13': bd4_n_of_units_for_rent, 'part2_28_09-14': check_outlier(bd4_av_rent, filter_values_int[16], filter_values_int[17]), 'part2_28_09-15': check_outlier(bd4_av_ask_price, filter_values_int[18], filter_values_int[19]), 'start_from': start_from, # Lowest rent # AR col 'sqm': sqm, 'lowest_ask_price': lowest_ask_price, # AT col 'sqm_ask': sqm_ask, 'number_of_studios': number_of_studios, 'lat': gps_lat, 'long': gps_long, 'other_projects_nearby': other_projects_nearby, } result_bulk_4 = { 'project_median_sale_price_sqm_1_year_ago': project_median_sale_price_sqm_1_year_ago, 'project_median_sale_price_sqm_8_month_ago': project_median_sale_price_sqm_8_month_ago, } result_bulk_5 = { 'median_sale_price': median_sale_price } """result = [] result.append(result_bulk_1) result.append(result_bulk_2) result.append(result_bulk_3) result.append(result_bulk_4) result.append(result_bulk_5)""" return result_bulk_1, result_bulk_2, result_bulk_3, result_bulk_4, result_bulk_5 except requests.exceptions.RequestException as e: # return {}, {}, {}, {}, {} logger.debug(e)
def get_data_graph(url): """ 13.07: add Project's median sale price/sqm 1 year ago, Project's median sale price/sqm 8 months ago, Median sale price """ try: session = requests.Session() adapter = requests.adapters.HTTPAdapter(max_retries=20) session.mount('https://', adapter) session.mount('http://', adapter) session.proxies.update(get_proxy()) # r = session.get(url, headers=headers_xml, timeout=30) r = session.get(url, headers=headers_xml, timeout=30) if r.status_code != 200: # logger.debug('status_code', r.status_code) return None data = r.json()['msg'] #print(r.text) if r.url == "https://www.dotproperty.co.th/en": return '', '', '', '', '', '', '', '', '' # logger.debug(data) try: median_rent_price_sqm = data.split("'sqmRent': {")[1].split("data:[")[1].split("],")[0].split(',')[-1] if median_rent_price_sqm is None: median_rent_price_sqm = '' except: median_rent_price_sqm = '' try: earliest_median_rent_prices_sqm = data.split("'sqmRent': {")[1].split("data:[")[1].split("],")[0].split(',') # print(earliest_median_rent_prices_sqm) if earliest_median_rent_prices_sqm is None: earliest_median_rent_prices_sqm = '' except: earliest_median_rent_price_sqm = '' try: median_sale_price_sqm = data.split("'sqmSale': {")[1].split("data:[")[1].split("],")[0].split(',')[-1] if median_sale_price_sqm is None: median_sale_price_sqm = '' except: median_sale_price_sqm = '' try: earliest_median_sale_prices_sqm = data.split("'sqmSale': {")[1].split("data:[")[1].split("],")[0].split(',') # print(earliest_median_sale_prices_sqm) if earliest_median_sale_prices_sqm is None: earliest_median_sale_prices_sqm = '' except: earliest_median_sale_prices_sqm = '' # earliest_median_sale_price_sqm = '' try: # TODO soup find earliest_median_sale_price_sqm earliest_median_sale_price_sqm = '' if earliest_median_sale_prices_sqm == '': raise Exception month_num = 0 # print(earliest_median_sale_prices_sqm) for price in earliest_median_sale_prices_sqm: month_num += 1 if price != '': earliest_median_sale_price_sqm = price break months = data.split("labels: [")[1].split("],")[0].split(",") earliest_month = months[month_num - 1].replace('"', '') except: earliest_median_sale_price_sqm = '' earliest_month = '' earliest_median_rent_price_sqm = '' try: # TODO soup find earliest_median_rent_price_sqm = '' if earliest_median_rent_prices_sqm == '': raise Exception month_num = 0 # print(earliest_median_rent_prices_sqm) for price in earliest_median_rent_prices_sqm: month_num += 1 if price != '' and price is not None: earliest_median_rent_price_sqm = price break months = data.split("labels: [")[1].split("],")[0].split(",") earliest_month_1 = months[month_num - 1].replace('"', '') if earliest_month_1 is None: earliest_month_1 = '' except: earliest_median_rent_price_sqm = '' earliest_month_1 = '' try: project_median_sale_price_sqm_1_year_ago = \ data.split("'sqmSale': {")[1].split("data:[")[1].split("],")[0].split(',')[-12] #[0] # print("1-{}".format(project_median_sale_price_sqm_1_year_ago)) if project_median_sale_price_sqm_1_year_ago is None: project_median_sale_price_sqm_1_year_ago = '' except: project_median_sale_price_sqm_1_year_ago = '' try: project_median_sale_price_sqm_8_month_ago = \ data.split("'sqmSale': {")[1].split("data:[")[1].split("],")[0].split(',')[-8] #[0] # print("8-{}".format(project_median_sale_price_sqm_8_month_ago)) if project_median_sale_price_sqm_8_month_ago is None: project_median_sale_price_sqm_8_month_ago = '' except: project_median_sale_price_sqm_8_month_ago = '' try: median_sale_price = data.split("'sale': {")[1].split("data:[")[1].split("],")[0].split(',')[-1] # print("median sale price-{}".format(median_sale_price)) if median_sale_price is None: median_sale_price = '' except: median_sale_price = '' # logger.debug("get_data_graph:" ,median_rent_price_sqm, median_sale_price_sqm, earliest_median_sale_price_sqm, earliest_month, earliest_median_rent_price_sqm, earliest_month_1) return median_rent_price_sqm, median_sale_price_sqm, earliest_median_sale_price_sqm, earliest_month, earliest_median_rent_price_sqm, earliest_month_1, project_median_sale_price_sqm_1_year_ago, project_median_sale_price_sqm_8_month_ago, median_sale_price except requests.exceptions.RequestException as e: logger.debug(e) return '', '', '', '', '', '', '', '', ''
def get_data_projects(url): # links_to_save = [] # result = [] result = [] session = requests.Session() adapter = requests.adapters.HTTPAdapter(max_retries=20) session.mount('https://', adapter) session.mount('http://', adapter) session.proxies.update(get_proxy()) r = session.get(url, headers=headers, timeout=20, allow_redirects=True) if r.status_code == 503: logger.debug('request was block - {}'.format(r.status_code)) if r.status_code != 200: logger.debug('status_code {}'.format(r.status_code)) return None soup = BeautifulSoup(r.content, 'html.parser') try: number_of_projects = int( soup.find('span', { 'id': 'properties_total' }).text.replace(',', '')) # print(number_of_projects) except: number_of_projects = 0 logger.debug('number_of_projects {}'.format(number_of_projects)) result.append(number_of_projects) if number_of_projects > 0: try: count_pages = math.ceil(number_of_projects / 20) except: count_pages = 0 logger.debug('count_pages {}'.format(count_pages)) project_links = soup.find('div', { 'id': 'search-results' }).find_all('a') for link in project_links: if link['href'] and link['href'] not in result: result.append(link['href']) links_to_save.append(link['href']) if count_pages > 1: for i in range(2, count_pages + 1): logger.debug('page {}'.format(i)) url_n_page = "{}?page={}".format(url, i) logger.debug(url_n_page) r = session.get(url_n_page, headers=headers, timeout=20) if r.status_code != 200: logger.debug('status_code', r.status_code) return None # TODO: add attempts here and everywhere in project soup = BeautifulSoup(r.content, 'html.parser') project_links = soup.find('div', { 'id': 'search-results' }).find_all('a') for link in project_links: if link['href'] and link['href'] not in result: result.append(link['href']) links_to_save.append(link['href'])
def get_data_graph(url): logger.debug(url) if url is None: logger.debug("url is NONE") return session = requests.Session() adapter = requests.adapters.HTTPAdapter(max_retries=20) session.mount('https://', adapter) session.mount('http://', adapter) session.proxies.update(get_proxy()) r = session.get(url, headers=headers_xml, timeout=30) if r.status_code != 200: logger.debug('status_code {}'.format(r.status_code)) return None data = r.json()['msg'] try: median_rent_price_sqm = data.split("'sqmRent': {")[1].split( "data:[")[1].split("],")[0].split(',')[-1] except: median_rent_price_sqm = '' try: median_sale_price_sqm = data.split("'sqmSale': {")[1].split( "data: [")[1].split("],")[0].split(',')[-1] except: median_sale_price_sqm = '' try: earliest_median_sale_prices_sqm = data.split("'sqmSale': {")[1].split( "data: [")[1].split("],")[0].split(',') except: earliest_median_sale_prices_sqm = '' try: earliest_median_rent_prices_sqm = data.split("'sqmRent': {")[1].split( "data:[")[1].split("],")[0].split(',') except: earliest_median_rent_prices_sqm = '' try: earliest_median_sale_price_sqm = '' if earliest_median_sale_prices_sqm == '': raise Exception month_num = 0 for price in earliest_median_sale_prices_sqm: month_num += 1 if price != '': earliest_median_sale_price_sqm = price break months = data.split("labels: [")[1].split("],")[0].split(",") earliest_month = months[month_num - 1].replace('"', '') except: earliest_month = '' try: earliest_median_rent_price_sqm = '' if earliest_median_rent_prices_sqm == '': raise Exception month_num = 0 for price in earliest_median_rent_prices_sqm: month_num += 1 if price != '': earliest_median_rent_price_sqm = price break months = data.split("labels: [")[1].split("],")[0].split(",") if earliest_median_rent_price_sqm == '': earliest_month_1 = '' else: earliest_month_1 = months[month_num - 1].replace('"', '') except: earliest_month_1 = '' # print(data) if input_tab_name == "Vietnam townhouses": try: part_2_1 = data.split("htmlDecode('Giá bán trung bình")[1].split( "data: [")[1].split("],")[0].split(',')[-1] except: part_2_1 = '' try: part_2_2 = data.split("htmlDecode('Giá bán trung bình'\n")[ 1].split("data:[")[1].split("],")[0].split(',')[-1] except: part_2_2 = '' try: part_2_3 = data.split("htmlDecode('Giá cho thuê trung bình")[ 1].split("type: 'bar'")[1].split("data:[")[1].split( "],")[0].split(',')[-1] except: part_2_3 = '' try: part_2_4 = data.split("htmlDecode('Giá cho thuê trung bình'\n")[ 1].split("data:[")[1].split("],")[0].split(',')[-1] except: part_2_4 = '' else: try: part_2_1 = data.split("htmlDecode('Median sale price")[1].split( "data: [")[1].split("],")[0].split(',')[-1] except: part_2_1 = '' try: part_2_2 = data.split("htmlDecode('Median sale price'\n")[1].split( "data:[")[1].split("],")[0].split(',')[-1] except: part_2_2 = '' try: part_2_3 = data.split("htmlDecode('Median rent price")[1].split( "type: 'bar'")[1].split("data:[")[1].split("],")[0].split( ',')[-1] except: part_2_3 = '' try: part_2_4 = data.split("htmlDecode('Median rent price'\n")[1].split( "data:[")[1].split("],")[0].split(',')[-1] except: part_2_4 = '' try: part_2_5 = data.split("'sale': {")[1].split("data:[")[1].split( "],")[0].split(',')[-1] except: part_2_5 = '' # return median_rent_price_sqm, earliest_median_sale_price_sqm, earliest_month, median_sale_price_sqm, earliest_median_rent_price_sqm, earliest_month_1, '', part_2_1, part_2_2, part_2_3, part_2_4,'',part_2_5 logger.debug(part_2_4, median_rent_price_sqm, part_2_3, earliest_median_rent_price_sqm, earliest_month_1, part_2_5, part_2_2, median_sale_price_sqm, part_2_1, earliest_median_sale_price_sqm, earliest_month) return part_2_4, median_rent_price_sqm, part_2_3, earliest_median_rent_price_sqm, earliest_month_1, part_2_5, part_2_2, median_sale_price_sqm, part_2_1, earliest_median_sale_price_sqm, earliest_month