def tranfer_data_from_stage_to_temp():
    dao = Dao()
    rows = dao.get_entries_from_stage()
    for row in rows:
        for key in row:
            row[key] = row[key].strip()
            if key == 'kilometer_done' or key == 'price':
                row[key] = int(row[key].replace(',', ''))
            elif key == 'model_year':
                row[key] = int(re.search(r'([A-Za-z]{3})(-)(.+\d)', row[key]).group(3))
            elif key == 'profile_id':
                row[key] = int(re.search(r'(S)(.+\d)', row[key]).group(2))
        dao.populateAndExecuteIntoTemp(row)
示例#2
0
__author__ = 'aliHitawala'
from DataModels.webcrawler.bikewale.DataExtractor import Extractor
from DataModels.persistence.bikewale.BikeWaleDaoImpl import Dao

if __name__ == '__main__':
    filename = 'html_pages_url'
    __list = []
    urls = []

    def readAllUrls():
        global urls
        f = open(filename, 'r')
        urls = f.readlines()

    readAllUrls()
    instance = Extractor()
    daoInstance = Dao()
    for url in urls:
        dict = instance.extract(url)
        if bool(dict):
            print dict
            daoInstance.populateAndExecute(dict)