Пример #1
0
def DeadFish(c):
    subgroup_data = next(sg for sg in d.available_subgroups if sg['subgroup'] == 'DeadFish')
    if not subgroup_data:
        print("None Found")
    #user_id = '64513'
    user_id = subgroup_data['nyaa_id']
    title_regex = subgroup_data['regex']['title']
    quality_regex = subgroup_data['regex']['quality']
    #title_regex = r'(?<=\[HorribleSubs\] ).*(?= \- \d{2,3} \[)'
    #quality_regex = r'(?<=\[)\d*p(?=\])'
    param = {}
    anime_list = []
    i = 1
    input_title = input('Anime name: ').replace(' ', '+')
    
    # Used to query all pages from RSS feed.
    while True:
        results = feedparser.parse(utils.get_rss_feed(input_title, user_id, i))
        i = i + 1
        if not results['entries']:
            break
        for hit in results['entries']:
            try:
                parsed_title = re.search(title_regex, hit['title']).group(0)
                parsed_quality = re.search(quality_regex, hit['title']).group(0)
                
            except Exception as e:
                parsed_title = ''
                parsed_quality = ''
                pass
            
            title_found = False
            #optimize this
            if parsed_title:
                if anime_list:
                    for t in anime_list:
                        if t[0] == parsed_title and parsed_quality:
                            t[1].append(parsed_quality) if parsed_quality not in t[1] else None 
                            title_found = True
                if not title_found and parsed_quality:
                    anime_list.append((parsed_title, [parsed_quality]))
    if anime_list:
        print('Found titles: ')
        utils.selection(list([i[0] for i in anime_list]))
        #asks for input for title choice
        anime_selection = anime_list[utils.selector('Select the title: ', len(anime_list))]
        param['title'] = anime_selection[0]

        #asks for input for quality choice
        print('Available qualities: ')
        utils.selection(anime_selection[1])
        param['quality'] = anime_selection[1][utils.selector('Quality: ', len(anime_selection[1]))]
        
        try:
            c.execute("INSERT INTO DeadFish VALUES('%s', '%s');" %(param['title'].replace("'","''"), param['quality']))
        except Exception as e:
            print(e)

    else:
        print("No title found under %s" %input_title)
Пример #2
0
def get_position_avgcpc_based_on_maxcpc(data, maxcpc_relation):

    logger = logging.getLogger(ms.LOGNAME)
    logger.info("msg=get relations")

    fields_check = rs.FILTER + ['avgposition', 'avgcpc']
    check = [i for i in fields_check if i in list(data.columns)]
    if len(fields_check) != len(check):
        logger.error('error=cannot find all records in data')
        raise ValueError(
            "Error get_position_cpc: I cannot find all records in data")

    groups = data[rs.FILTER].drop_duplicates().values

    df_ret = pd.DataFrame()  # initialization to avoid PEP8 warning
    for i in range(0, groups.shape[0]):

        selected, selected_maxcpc = utils.selection(
            data=data,
            group=groups[i],
            metric='maxcpc_avgcpc',
            maxcpc_relation=maxcpc_relation)

        selected = selected[pd.notnull(selected['avgcpc'])]

        if selected.shape[0] >= 2:
            x = np.array(selected['maxcpc'])
            x = np.log(x[:, np.newaxis] + 1)
            y = np.array(selected['avgcpc'])
            coeffs, _, _, _ = np.linalg.lstsq(x, y, rcond=None)
            avg_cpc = list(coeffs[0] * np.log(selected_maxcpc['maxcpc'] + 1))

        else:
            avg_cpc = [np.nan] * rs.ACPCPOS_N_POINTS

        avg_position = [
            round(x, 1) for x in list(
                np.linspace(1.0, rs.ACPCPOS_MAX_POS, rs.ACPCPOS_N_POINTS))
        ]

        feed_dict, feed_dict_input_data = utils.format_output(
            group=groups[i],
            metric='maxcpc_avgcpc',
            metric_data=avg_cpc,
            position=avg_position,
            selected_metric_data=selected['maxcpc'],
            selected_position=selected['avgposition'],
            selected_data=selected)

        df_aux = pd.DataFrame(feed_dict)
        df_aux_input_data = pd.DataFrame(feed_dict_input_data)

        if i == 0:
            df_ret = df_aux
        else:
            df_ret = df_ret.append(df_aux)

    logger.info("msg=finish")
    # TODO: WHY COPY???
    return df_ret
Пример #3
0
def get_position_avgcpc(data):

    logger = logging.getLogger(ms.LOGNAME)
    logger.info("msg=get relations")

    fields_check = rs.FILTER + ['avgposition', 'avgcpc']
    check = [i for i in fields_check if i in list(data.columns)]
    if len(fields_check) != len(check):
        logger.error('error=cannot find all records in data')
        raise ValueError(
            "Error get_position_cpc: I cannot find all records in data")

    groups = data[rs.FILTER].drop_duplicates().values

    df_ret = pd.DataFrame()  # initialization to avoid PEP8 warning
    for i in range(0, groups.shape[0]):

        selected, _ = utils.selection(data=data,
                                      group=groups[i],
                                      metric='maxcpc')
        selected = selected[pd.notnull(selected['avgcpc'])]

        if selected.shape[0] >= 2:
            x = list(selected['avgposition'])
            y = list(selected['avgcpc'])

            # force the logarithm to be 0 at position 5
            x = x + [rs.ACPCPOS_MAX_CONVERGENCE] * 20
            y = y + [0] * 20

            coeffs = np.polyfit(np.log(x), y, deg=1)
            avg_cpc = list(coeffs[0] * np.log(
                np.linspace(1.0, rs.ACPCPOS_MAX_POS, rs.ACPCPOS_N_POINTS)) +
                           coeffs[1])
        else:
            avg_cpc = [np.nan] * rs.ACPCPOS_N_POINTS

        avg_position = [
            round(x, 1) for x in list(
                np.linspace(1.0, rs.ACPCPOS_MAX_POS, rs.ACPCPOS_N_POINTS))
        ]

        feed_dict, _ = utils.format_output(group=groups[i],
                                           metric='avgcpc',
                                           metric_data=avg_cpc,
                                           position=avg_position)

        df_aux = pd.DataFrame(feed_dict)

        if i == 0:
            df_ret = df_aux
        else:
            df_ret = df_ret.append(df_aux)

    logger.info("msg=finish")

    return df_ret
Пример #4
0
def main():
    #get stuff stuff from configs

    DB_NAME, DB_USER = cfg.database_name, cfg.database_user
    try:
        connection = psycopg2.connect("dbname=%s user=%s" %(DB_NAME, DB_USER))
    except Exception as e:
        print(e)
    utils.init_tables()
    cursor = connection.cursor()
    #cursor.execute("SELECT * FROM subgroups;")
    available_groups = list(i['subgroup'] for i in d.available_subgroups)
    if not available_groups:
        print("No Subgroups configured")
        return False
    print("Subgroups available: ")
    utils.selection(available_groups)
    eval('subgroups.%s(cursor)' %available_groups[utils.selector('Select Subgroup: ', len(available_groups))])
    #subgroups.HorribleSubs(cursor)
    connection.commit()
    connection.close()
Пример #5
0
def get_position_ctr(data):
    logger = logging.getLogger(ms.LOGNAME)
    logger.info("msg=get relations")

    fields_check = rs.FILTER + ['avgposition', 'clickthroughrate']
    check = [i for i in fields_check if i in list(data.columns)]
    if len(fields_check) != len(check):
        logger.error('error=cannot find all records in data')
        raise ValueError(
            "Error get_position_cpc: I cannot find all records in data")

    groups = data[rs.FILTER].drop_duplicates().values

    df_ret = pd.DataFrame()  # initialization to avoid PEP8 warning
    for i in range(0, groups.shape[0]):

        selected, _ = utils.selection(data=data, group=groups[i], metric='ctr')

        if selected.shape[0] >= 2:
            selected_avgposition = list(
                selected['avgposition']) + [rs.CTR_MAX_CONVERGENCE] * 20
            selected_clickthroughrate = list(
                selected['clickthroughrate']) + [0] * 20

            x = list(
                1 /
                selected['avgposition']) + [1 / rs.CTR_MAX_CONVERGENCE] * 20
            y = list(selected['clickthroughrate']) + [0] * 20
            w = list(selected['impressions']
                     ) + [np.mean(selected['impressions'])] * 20

            coeffs = np.polyfit(x, y, deg=1)
            ctr = list((coeffs[0] * 1 /
                        np.linspace(1.0, rs.CTR_MAX_POS, rs.CTR_N_POINTS)) +
                       coeffs[1])

            check_decreasing = all(x1 >= x2 for x1, x2 in zip(ctr, ctr[1:]))

            if not check_decreasing:
                coeffsw = np.polyfit(x, y, w=w, deg=1)
                ctr = list(
                    (coeffsw[0] * 1 /
                     np.linspace(1.0, rs.CTR_MAX_POS, rs.CTR_N_POINTS)) +
                    coeffsw[1])

                check_decreasing2 = all(x1 >= x2
                                        for x1, x2 in zip(ctr, ctr[1:]))
                if not check_decreasing2:
                    logger.info(
                        "msg=NON DECREASING SHAPE FOR CLICKTHROUGHRATE")

            check_negatives = [1 for xi in ctr if xi < 0]
            if len(check_negatives) > 0:
                msg = "NON DECREASING SHAPE FOR CLICKTHROUGHRATE: " + str(
                    groups[i][0]) + ' ' + str(groups[i][1])
                logger.info("msg= " + msg)

        else:
            ctr = [np.nan] * rs.CTR_N_POINTS
            selected_avgposition = list(selected['avgposition'])
            selected_clickthroughrate = list(selected['clickthroughrate'])

        avg_position = [
            round(x, 1)
            for x in list(np.linspace(1.0, rs.CTR_MAX_POS, rs.CTR_N_POINTS))
        ]

        feed_dict, feed_dict_input_data = utils.format_output(
            group=groups[i],
            metric='ctr',
            metric_data=ctr,
            position=avg_position,
            selected_data=selected,
            selected_metric_data=selected_clickthroughrate,
            selected_position=selected_avgposition)

        df_aux = pd.DataFrame(feed_dict)
        df_aux_input_data = pd.DataFrame(feed_dict_input_data)

        if i == 0:
            df_ret = df_aux
        else:
            df_ret = df_ret.append(df_aux)

    logger.info("msg=finish")

    return df_ret