def DeadFish(c): subgroup_data = next(sg for sg in d.available_subgroups if sg['subgroup'] == 'DeadFish') if not subgroup_data: print("None Found") #user_id = '64513' user_id = subgroup_data['nyaa_id'] title_regex = subgroup_data['regex']['title'] quality_regex = subgroup_data['regex']['quality'] #title_regex = r'(?<=\[HorribleSubs\] ).*(?= \- \d{2,3} \[)' #quality_regex = r'(?<=\[)\d*p(?=\])' param = {} anime_list = [] i = 1 input_title = input('Anime name: ').replace(' ', '+') # Used to query all pages from RSS feed. while True: results = feedparser.parse(utils.get_rss_feed(input_title, user_id, i)) i = i + 1 if not results['entries']: break for hit in results['entries']: try: parsed_title = re.search(title_regex, hit['title']).group(0) parsed_quality = re.search(quality_regex, hit['title']).group(0) except Exception as e: parsed_title = '' parsed_quality = '' pass title_found = False #optimize this if parsed_title: if anime_list: for t in anime_list: if t[0] == parsed_title and parsed_quality: t[1].append(parsed_quality) if parsed_quality not in t[1] else None title_found = True if not title_found and parsed_quality: anime_list.append((parsed_title, [parsed_quality])) if anime_list: print('Found titles: ') utils.selection(list([i[0] for i in anime_list])) #asks for input for title choice anime_selection = anime_list[utils.selector('Select the title: ', len(anime_list))] param['title'] = anime_selection[0] #asks for input for quality choice print('Available qualities: ') utils.selection(anime_selection[1]) param['quality'] = anime_selection[1][utils.selector('Quality: ', len(anime_selection[1]))] try: c.execute("INSERT INTO DeadFish VALUES('%s', '%s');" %(param['title'].replace("'","''"), param['quality'])) except Exception as e: print(e) else: print("No title found under %s" %input_title)
def get_position_avgcpc_based_on_maxcpc(data, maxcpc_relation): logger = logging.getLogger(ms.LOGNAME) logger.info("msg=get relations") fields_check = rs.FILTER + ['avgposition', 'avgcpc'] check = [i for i in fields_check if i in list(data.columns)] if len(fields_check) != len(check): logger.error('error=cannot find all records in data') raise ValueError( "Error get_position_cpc: I cannot find all records in data") groups = data[rs.FILTER].drop_duplicates().values df_ret = pd.DataFrame() # initialization to avoid PEP8 warning for i in range(0, groups.shape[0]): selected, selected_maxcpc = utils.selection( data=data, group=groups[i], metric='maxcpc_avgcpc', maxcpc_relation=maxcpc_relation) selected = selected[pd.notnull(selected['avgcpc'])] if selected.shape[0] >= 2: x = np.array(selected['maxcpc']) x = np.log(x[:, np.newaxis] + 1) y = np.array(selected['avgcpc']) coeffs, _, _, _ = np.linalg.lstsq(x, y, rcond=None) avg_cpc = list(coeffs[0] * np.log(selected_maxcpc['maxcpc'] + 1)) else: avg_cpc = [np.nan] * rs.ACPCPOS_N_POINTS avg_position = [ round(x, 1) for x in list( np.linspace(1.0, rs.ACPCPOS_MAX_POS, rs.ACPCPOS_N_POINTS)) ] feed_dict, feed_dict_input_data = utils.format_output( group=groups[i], metric='maxcpc_avgcpc', metric_data=avg_cpc, position=avg_position, selected_metric_data=selected['maxcpc'], selected_position=selected['avgposition'], selected_data=selected) df_aux = pd.DataFrame(feed_dict) df_aux_input_data = pd.DataFrame(feed_dict_input_data) if i == 0: df_ret = df_aux else: df_ret = df_ret.append(df_aux) logger.info("msg=finish") # TODO: WHY COPY??? return df_ret
def get_position_avgcpc(data): logger = logging.getLogger(ms.LOGNAME) logger.info("msg=get relations") fields_check = rs.FILTER + ['avgposition', 'avgcpc'] check = [i for i in fields_check if i in list(data.columns)] if len(fields_check) != len(check): logger.error('error=cannot find all records in data') raise ValueError( "Error get_position_cpc: I cannot find all records in data") groups = data[rs.FILTER].drop_duplicates().values df_ret = pd.DataFrame() # initialization to avoid PEP8 warning for i in range(0, groups.shape[0]): selected, _ = utils.selection(data=data, group=groups[i], metric='maxcpc') selected = selected[pd.notnull(selected['avgcpc'])] if selected.shape[0] >= 2: x = list(selected['avgposition']) y = list(selected['avgcpc']) # force the logarithm to be 0 at position 5 x = x + [rs.ACPCPOS_MAX_CONVERGENCE] * 20 y = y + [0] * 20 coeffs = np.polyfit(np.log(x), y, deg=1) avg_cpc = list(coeffs[0] * np.log( np.linspace(1.0, rs.ACPCPOS_MAX_POS, rs.ACPCPOS_N_POINTS)) + coeffs[1]) else: avg_cpc = [np.nan] * rs.ACPCPOS_N_POINTS avg_position = [ round(x, 1) for x in list( np.linspace(1.0, rs.ACPCPOS_MAX_POS, rs.ACPCPOS_N_POINTS)) ] feed_dict, _ = utils.format_output(group=groups[i], metric='avgcpc', metric_data=avg_cpc, position=avg_position) df_aux = pd.DataFrame(feed_dict) if i == 0: df_ret = df_aux else: df_ret = df_ret.append(df_aux) logger.info("msg=finish") return df_ret
def main(): #get stuff stuff from configs DB_NAME, DB_USER = cfg.database_name, cfg.database_user try: connection = psycopg2.connect("dbname=%s user=%s" %(DB_NAME, DB_USER)) except Exception as e: print(e) utils.init_tables() cursor = connection.cursor() #cursor.execute("SELECT * FROM subgroups;") available_groups = list(i['subgroup'] for i in d.available_subgroups) if not available_groups: print("No Subgroups configured") return False print("Subgroups available: ") utils.selection(available_groups) eval('subgroups.%s(cursor)' %available_groups[utils.selector('Select Subgroup: ', len(available_groups))]) #subgroups.HorribleSubs(cursor) connection.commit() connection.close()
def get_position_ctr(data): logger = logging.getLogger(ms.LOGNAME) logger.info("msg=get relations") fields_check = rs.FILTER + ['avgposition', 'clickthroughrate'] check = [i for i in fields_check if i in list(data.columns)] if len(fields_check) != len(check): logger.error('error=cannot find all records in data') raise ValueError( "Error get_position_cpc: I cannot find all records in data") groups = data[rs.FILTER].drop_duplicates().values df_ret = pd.DataFrame() # initialization to avoid PEP8 warning for i in range(0, groups.shape[0]): selected, _ = utils.selection(data=data, group=groups[i], metric='ctr') if selected.shape[0] >= 2: selected_avgposition = list( selected['avgposition']) + [rs.CTR_MAX_CONVERGENCE] * 20 selected_clickthroughrate = list( selected['clickthroughrate']) + [0] * 20 x = list( 1 / selected['avgposition']) + [1 / rs.CTR_MAX_CONVERGENCE] * 20 y = list(selected['clickthroughrate']) + [0] * 20 w = list(selected['impressions'] ) + [np.mean(selected['impressions'])] * 20 coeffs = np.polyfit(x, y, deg=1) ctr = list((coeffs[0] * 1 / np.linspace(1.0, rs.CTR_MAX_POS, rs.CTR_N_POINTS)) + coeffs[1]) check_decreasing = all(x1 >= x2 for x1, x2 in zip(ctr, ctr[1:])) if not check_decreasing: coeffsw = np.polyfit(x, y, w=w, deg=1) ctr = list( (coeffsw[0] * 1 / np.linspace(1.0, rs.CTR_MAX_POS, rs.CTR_N_POINTS)) + coeffsw[1]) check_decreasing2 = all(x1 >= x2 for x1, x2 in zip(ctr, ctr[1:])) if not check_decreasing2: logger.info( "msg=NON DECREASING SHAPE FOR CLICKTHROUGHRATE") check_negatives = [1 for xi in ctr if xi < 0] if len(check_negatives) > 0: msg = "NON DECREASING SHAPE FOR CLICKTHROUGHRATE: " + str( groups[i][0]) + ' ' + str(groups[i][1]) logger.info("msg= " + msg) else: ctr = [np.nan] * rs.CTR_N_POINTS selected_avgposition = list(selected['avgposition']) selected_clickthroughrate = list(selected['clickthroughrate']) avg_position = [ round(x, 1) for x in list(np.linspace(1.0, rs.CTR_MAX_POS, rs.CTR_N_POINTS)) ] feed_dict, feed_dict_input_data = utils.format_output( group=groups[i], metric='ctr', metric_data=ctr, position=avg_position, selected_data=selected, selected_metric_data=selected_clickthroughrate, selected_position=selected_avgposition) df_aux = pd.DataFrame(feed_dict) df_aux_input_data = pd.DataFrame(feed_dict_input_data) if i == 0: df_ret = df_aux else: df_ret = df_ret.append(df_aux) logger.info("msg=finish") return df_ret