Пример #1
0
def find_longest_length(driver, tag, n):
    url_start = 'https://api.punkapi.com/v2/beers?page='
    url_end = '&per_page=80'
    max_length = 0
    for i in range(n):
        url = url_start + str(i + 1) + url_end
        temp_db_json = beerproj.fetch_json(driver, url)
        for x in temp_db_json:
            try:
                if type(x[tag]) is str:
                    len_mod = len(x[tag])
                # this is actually not needed, but for illustration, if a returned type is json,
                # after json.loads(), the returned value would be dict
                elif type(x[tag]) is (int or float):
                    len_mod = x[tag]
                elif type(x[tag]) is dict:
                    len_mod = len(x[tag])
                elif type(x[tag]) is None:
                    len_mod = 0
                else:
                    print("there is unexpected type: ", type(x[tag]))
                    break
                if (len_mod > max_length):
                    max_length = len_mod
            except Exception as e:
                pass
                print('Ooops..., the %s error happened!' % (e))
                print('Place of error: ', x['id'])
        driver.implicitly_wait(30)
    return max_length
Пример #2
0
def get_all_tag_value(driver, tag, n):
    url_start = 'https://api.punkapi.com/v2/beers?page='
    url_end = '&per_page=80'
    tag_list = []
    for i in range(n):
        url = url_start + str(i + 1) + url_end
        temp_db_json = beerproj.fetch_json(driver, url)
        for x in temp_db_json:
            try:
                tag_value = x[tag]
                if (tag_value not in tag_list):
                    tag_list.append(tag_value)
            except Exception as e:
                pass
                print('Ooops..., the %s error happened!' % (e))
                print('Place of error: ', x['id'])
        driver.implicitly_wait(30)
    return (tag_list)
Пример #3
0
def get_max_tags(driver):
    try:
        url_start = 'https://api.punkapi.com/v2/beers?page='
        url_end = '&per_page=80'
        tag_list = []
        for i in range(6):
            url = url_start + str(i + 1) + url_end
            temp_db_json = beerproj.fetch_json(driver, url)
            for x in temp_db_json:
                keys = list(x.keys())
                for key_elem in keys:
                    if key_elem not in tag_list:
                        tag_list.append(key_elem)
            driver.implicitly_wait(30)
        return tag_list
    except Exception as e:
        print('Ooops..., the %s error happened!' % (e))
        print('Place of error: ', x)
        pass
Пример #4
0
def get_json_keys(driver, tag, n):
    url_start = 'https://api.punkapi.com/v2/beers?page='
    url_end = '&per_page=80'
    max_depth = 0
    json_keys_all = []
    for i in range(n):
        url = url_start + str(i + 1) + url_end
        temp_db_json = beerproj.fetch_json(driver, url)
        for x in temp_db_json:
            try:
                tag_value = x[tag]
                depth, json_keys = get_dict_allkeys(tag_value)
                for json_key in json_keys:
                    if json_key not in json_keys_all:
                        json_keys_all.append(json_key)
                if depth > max_depth:
                    max_depth = depth
            except Exception as e:
                pass
                print('Ooops..., the %s error happened!' % (e))
                print('Place of error: ', x['id'])
        driver.implicitly_wait(30)
    return json_keys_all, max_depth
Пример #5
0
    'ibu', 'target_fg', 'target_og', 'ebc', 'srm', 'ph', 'attenuation_level',
    'volume', 'boil_volume', 'method', 'ingredients', 'food_pairing',
    'brewers_tips', 'contributed_by'
]

# x indicates how many times you want to execute the random fetch
x = 100
ctn = 0
# how many consecutive times that the returned value already been inserted into database
ctn_non_execute = 0
db_prepare_json = []
saved_json_id = []
# the main part,
# get non-repetative data for 100 times
while ctn < x:
    temp_db_json = beerproj.fetch_json(driver_main, url)[0]
    # this try is to prevent less than 100 information left
    if ctn_non_execute <= 15:
        if temp_db_json['id'] not in saved_json_id:
            # if value not repeated, clean None/missing column values
            dirtyworks.find_loss_columns(temp_db_json)
            dirtyworks.kick_none(temp_db_json)
            dirtyworks.repair_dates(temp_db_json)
            db_prepare_json.append(temp_db_json)
            saved_json_id.append(temp_db_json['id'])
            # insert data into db
            add_tuple = []
            for index, (key, value) in enumerate(temp_db_json.items()):
                add_tuple.append(json.dumps(value))
            add_tuple[3] = add_tuple[3].strip('"')
            insert_query = """INSERT INTO beer_Brewdog VALUES