def get_position_avgcpc_based_on_maxcpc(data, maxcpc_relation): logger = logging.getLogger(ms.LOGNAME) logger.info("msg=get relations") fields_check = rs.FILTER + ['avgposition', 'avgcpc'] check = [i for i in fields_check if i in list(data.columns)] if len(fields_check) != len(check): logger.error('error=cannot find all records in data') raise ValueError( "Error get_position_cpc: I cannot find all records in data") groups = data[rs.FILTER].drop_duplicates().values df_ret = pd.DataFrame() # initialization to avoid PEP8 warning for i in range(0, groups.shape[0]): selected, selected_maxcpc = utils.selection( data=data, group=groups[i], metric='maxcpc_avgcpc', maxcpc_relation=maxcpc_relation) selected = selected[pd.notnull(selected['avgcpc'])] if selected.shape[0] >= 2: x = np.array(selected['maxcpc']) x = np.log(x[:, np.newaxis] + 1) y = np.array(selected['avgcpc']) coeffs, _, _, _ = np.linalg.lstsq(x, y, rcond=None) avg_cpc = list(coeffs[0] * np.log(selected_maxcpc['maxcpc'] + 1)) else: avg_cpc = [np.nan] * rs.ACPCPOS_N_POINTS avg_position = [ round(x, 1) for x in list( np.linspace(1.0, rs.ACPCPOS_MAX_POS, rs.ACPCPOS_N_POINTS)) ] feed_dict, feed_dict_input_data = utils.format_output( group=groups[i], metric='maxcpc_avgcpc', metric_data=avg_cpc, position=avg_position, selected_metric_data=selected['maxcpc'], selected_position=selected['avgposition'], selected_data=selected) df_aux = pd.DataFrame(feed_dict) df_aux_input_data = pd.DataFrame(feed_dict_input_data) if i == 0: df_ret = df_aux else: df_ret = df_ret.append(df_aux) logger.info("msg=finish") # TODO: WHY COPY??? return df_ret
def sale(sale_id, format): """Retrieves deals for a given sale.""" deals = [] for deal in get_deals(sale_id): deals.append([deal['title'], deal['platform'], deal['type'], deal['original_price'], deal['sale_price'], deal['link']]) click.echo(format_output(deals, headers=['Title', 'Platform', 'Type', 'Original Price', 'Sale Price', 'Link'], format=format))
def list(format): """Retrieves any currently running sales.""" sales = [] for i, sale in enumerate(current_sales()): sales.append([str(i + 1), sale['title'], sale['link']]) click.echo(format_output(sales, headers=['ID', 'Sale', 'Link'], format=format))
def get_position_avgcpc(data): logger = logging.getLogger(ms.LOGNAME) logger.info("msg=get relations") fields_check = rs.FILTER + ['avgposition', 'avgcpc'] check = [i for i in fields_check if i in list(data.columns)] if len(fields_check) != len(check): logger.error('error=cannot find all records in data') raise ValueError( "Error get_position_cpc: I cannot find all records in data") groups = data[rs.FILTER].drop_duplicates().values df_ret = pd.DataFrame() # initialization to avoid PEP8 warning for i in range(0, groups.shape[0]): selected, _ = utils.selection(data=data, group=groups[i], metric='maxcpc') selected = selected[pd.notnull(selected['avgcpc'])] if selected.shape[0] >= 2: x = list(selected['avgposition']) y = list(selected['avgcpc']) # force the logarithm to be 0 at position 5 x = x + [rs.ACPCPOS_MAX_CONVERGENCE] * 20 y = y + [0] * 20 coeffs = np.polyfit(np.log(x), y, deg=1) avg_cpc = list(coeffs[0] * np.log( np.linspace(1.0, rs.ACPCPOS_MAX_POS, rs.ACPCPOS_N_POINTS)) + coeffs[1]) else: avg_cpc = [np.nan] * rs.ACPCPOS_N_POINTS avg_position = [ round(x, 1) for x in list( np.linspace(1.0, rs.ACPCPOS_MAX_POS, rs.ACPCPOS_N_POINTS)) ] feed_dict, _ = utils.format_output(group=groups[i], metric='avgcpc', metric_data=avg_cpc, position=avg_position) df_aux = pd.DataFrame(feed_dict) if i == 0: df_ret = df_aux else: df_ret = df_ret.append(df_aux) logger.info("msg=finish") return df_ret
def encrypt_atbash(self): "Applies reverse alphabet to plaintext as Atbash Cipher" rev_alpha = self.make_alphabet()[::-1] for l in self.crib: n = ord(l) - 65 self.ciphertext += rev_alpha[n] self.ciphertext = utils.format_output(self.ciphertext) self.logger.debug("Ciphertext is %s", self.ciphertext) return self.ciphertext
def author(author_id, output_format='dictionary', pretty_print=None, xml_library='dicttoxml'): worker_result = [] worker(author_id, worker_result) return utils.format_output(worker_result[0], output_format, pretty_print, xml_library)
def search_author(term, output_format='dictionary', pretty_print=None, xml_library='dicttoxml'): ''' Search the SINTA database for matching author name. Input: author name (string) Output: library containing id, name, NIDN, affiliation, and research areas ''' worker_result = [] worker(term, worker_result) return utils.format_output(worker_result[0], output_format, pretty_print, xml_library)
def encrypt_caesar(self, shift): "Applies single shift Caesar Encryption." for l in self.crib: n = ord(l) + shift if n > 90: n -= 26 self.ciphertext += chr(n) self.ciphertext = utils.format_output(self.ciphertext) self.logger.debug("Ciphertext is %s", self.ciphertext) return self.ciphertext
def authors(author_ids, output_format='dictionary', pretty_print=None, xml_library='dicttoxml', max_workers=None): worker_result = [] with ThreadPoolExecutor(max_workers=max_workers) as executor: for author_id in author_ids: executor.submit(worker, author_id, worker_result) return utils.format_output(worker_result, output_format, pretty_print, xml_library)
def author_comm_services(author_id, output_format='dictionary', pretty_print=None, xml_library='dicttoxml', max_workers=None): url = f'http://sinta.ristekbrin.go.id/authors/detail?id={author_id}&view=services' html = get(url) soup = BeautifulSoup(html.content, 'html.parser') page_info = soup.select('.uk-width-large-1-2.table-footer') n_page = utils.cast(page_info[0].text.strip().split()[3]) worker_result = parse(soup) with ThreadPoolExecutor(max_workers=max_workers) as executor: for page in range(2, n_page + 1): executor.submit(worker, author_id, page, worker_result) return utils.format_output(worker_result, output_format, pretty_print, xml_library)
def encrypt(self): self.table = self._make_table() self.logger.debug("Constructed Vigenere table as:\n%s", self.table) self.full_key = self._key_maker(self._user_key, len(self.crib)) self.logger.debug("Full key is %s", self.full_key) _coordinates = [] for i in range(len(self.full_key)): _row = ord(self.full_key[i]) - 65 _col = ord(self.crib[i]) - 65 _coordinates.append([_row, _col]) self.logger.debug("Table coordinates are %s", _coordinates) for x, y in _coordinates: self.ciphertext += self.table[x][y] self.ciphertext = utils.format_output(self.ciphertext) self.logger.debug("Ciphertext is %s", self.ciphertext) return self.ciphertext
def upload_file(): """ Receive a file from form and check the words in file. Recebe as palavras enviadas através do formulário e verifica cada palavra no arquivo. """ # Get file submitted by form file = request.files.get('file') # if a file is not sended return home if not file: return render_template('index.html') # save uploaded file, wih multiple words to check filename = secure_filename(file.filename) path_to_save = os.path.join(app.config['UPLOAD_FOLDER'], filename) file.save(path_to_save) # return a list with words in file; A word by line. # carrega as palavras, uma palavra por linha(Eg. palavras_aceitas) words = load_data(path_to_save) responses = [] # verifica as palavras presentes no arquivo # verifica cada palavra for word in words: # Check if a word is accepted in AFD. # verifica se a palavra é aceita pelo automato response, transitions = check_word(word, states, initial_state, final_state) responses.append({ "word": word, "status": response, "transition": format_output(transitions) }) return render_template('multiples_words.html', multiples_words=responses)
'message': 'Give me the second date:', 'validate': DateValidator, 'filter': date_filter, }, { 'type': 'list', 'name': 'output_type', 'message': 'Would you like to get information on screen, in file or both?', 'choices': ['screen', 'file', 'both'], }, { 'type': 'list', 'name': 'order', 'message': 'Do you want this information in ascending or descending order?', 'choices': ['ascending', 'descending'], } ] if __name__ == '__main__': answers = prompt(questions) total_sundays = count_sundays(answers['start_date'], answers['end_date']) result_output = format_output(answers, total_sundays) output_type = answers['output_type'] if output_type in ('screen', 'both'): print(result_output) if output_type in ('file', 'both'): with open(OUTPUT_FILENAME, 'w+') as file: file.write(result_output)
def test_formatter(self): self.logger.info("Testing the output formatter") self.assertEqual( utils.format_output("ABCDEFGHIJKLMNOPQRSTUVWYXZ"), "ABCDE FGHIJ KLMNO PQRST UVWYX Z")
X, y = utils.load_hog(PPC, TRAIN_TOTAL) if HOG else utils.load_images(TRAIN_TOTAL) print 'Done loading', str(time.time() - t) t = time.time() # split data X_train, X_val, y_train, y_val = cross_validation.train_test_split(X, y, test_size=VALSIZE) print 'Done splitting train', str(time.time() - t) t = time.time() # sweep hyperparameters field and pick best model params, acc, model = choose_best_model(X_train, X_val, y_train, y_val, MODEL, PARAMS[MODEL]) # if not SVM: # reg, acc, model = find_regularization(X_train, X_val, y_train, y_val, REGS_HOG if HOG else REGS) # print "Best regularization strength is: " + str(reg) # else: # params, acc, model = choose_best_model(X_train, X_val, y_train, y_val, MODEL, PARAMS[MODEL]) print "Best params are " + str(params) print "Accuracy was: " + str(acc) print 'Done training model', str(time.time() - t) t = time.time() X_test, _ = utils.load_hog(PPC, TEST_TOTAL, False) if HOG else utils.load_images(TEST_TOTAL) print 'Done loading test', str(time.time() - t) t = time.time() y_test = model.predict(X_test, chunk=2) print 'Done predicting test', str(time.time() - t) t = time.time() utils.format_output(y_test) print 'Done writing test', str(time.time() - t) t = time.time()
def get_position_impressions(data): logger = logging.getLogger(ms.LOGNAME) logger.info("msg=get relations") # create the aggregation field data['aggregation'] = np.nan fields_check = rs.FILTER + ['avgposition', 'impressions'] check = [i for i in fields_check if i in list(data.columns)] if len(fields_check) != len(check): logger.error('error=cannot find all records in data') raise ValueError( "Error get_position_impressions: I cannot find all records in data" ) groups = data[rs.FILTER].drop_duplicates().values if ms.ab_test == 1: ab_test_file = pd.read_csv(ms.abtest_campaign_file) ab_test_file['draft-campaignid'] = ab_test_file[ 'draft-campaignid'].astype('str') groups = groups[np.isin(groups[:, 0], list(ab_test_file['draft-campaignid']))] df_ret = pd.DataFrame() # initialization to avoid PEP8 warning for i in range(0, groups.shape[0]): ret = utils.fallback_impressions(data, groups[i]) selected = ret['data'] model_df = pd.DataFrame({ 'AvgPosition': round(selected['avgposition'], 1), 'Impressions': selected['impressions'] }) # aggregate by position, add the dummy tail to force the spline # pass through 0 at MAX_POS_BID and calculate the CDF model_df = model_df.groupby('AvgPosition').agg('mean').reset_index() full_range = pd.DataFrame({ 'AvgPosition': np.linspace(1, rs.IMP_MAX_CONVERGENCE, rs.IMP_N_POINTS_CONVERGENCE) }) # complete the full positions observations model_df = pd.merge(full_range, model_df, left_on='AvgPosition', right_on='AvgPosition', how='left') model_df = model_df.fillna(0) x = list(model_df['AvgPosition']) y = list(model_df['Impressions']) # get the CDF # FIXME: statement #1 was changed for statement #2 possible conflict # STETEMENT #1: probabilities = y / sum(y) # STETEMENT #2: probabilities = [i / sum(y) for i in y] probabilities = [i / sum(y) for i in y] cdf = np.cumsum(probabilities[::-1])[::-1] # scale the cdf cdf = cdf * sum(y) / float(len(y)) # reshape x to meet function requirements new_x = np.reshape(np.asarray(x), (np.asarray(x).shape[0], 1)) gam = LinearGAM(n_splines=rs.IMP_N_SPLINES).fit(new_x, cdf) # FIXME: apparently predict() warning is due to PyCharm configuration impressions = np.clip(gam.predict(new_x), a_min=0, a_max=float('Inf'))[:rs.IMP_N_POINTS] avg_position = [ round(x, 1) for x in list(np.linspace(1.0, rs.IMP_MAX_POS, rs.IMP_N_POINTS)) ] # TODO: parametrise (in settings) the indices for aggregation [3 or 4] # replace groups nan aggregation by the calculated in the fallback if 'device' in ms.GRANULARITY: groups[i, 4] = ret['aggregation'] else: groups[i, 3] = ret['aggregation'] feed_dict, feed_dict_input_data = utils.format_output( group=groups[i], metric='impressions', metric_data=impressions, position=avg_position, selected_data=selected, selected_metric_data=y, selected_position=x) df_aux = pd.DataFrame(feed_dict) df_aux_input_data = pd.DataFrame(feed_dict_input_data) if i == 0: df_ret = df_aux else: df_ret = df_ret.append(df_aux) logger.info("msg=finish") return df_ret
def get_position_ctr(data): logger = logging.getLogger(ms.LOGNAME) logger.info("msg=get relations") fields_check = rs.FILTER + ['avgposition', 'clickthroughrate'] check = [i for i in fields_check if i in list(data.columns)] if len(fields_check) != len(check): logger.error('error=cannot find all records in data') raise ValueError( "Error get_position_cpc: I cannot find all records in data") groups = data[rs.FILTER].drop_duplicates().values df_ret = pd.DataFrame() # initialization to avoid PEP8 warning for i in range(0, groups.shape[0]): selected, _ = utils.selection(data=data, group=groups[i], metric='ctr') if selected.shape[0] >= 2: selected_avgposition = list( selected['avgposition']) + [rs.CTR_MAX_CONVERGENCE] * 20 selected_clickthroughrate = list( selected['clickthroughrate']) + [0] * 20 x = list( 1 / selected['avgposition']) + [1 / rs.CTR_MAX_CONVERGENCE] * 20 y = list(selected['clickthroughrate']) + [0] * 20 w = list(selected['impressions'] ) + [np.mean(selected['impressions'])] * 20 coeffs = np.polyfit(x, y, deg=1) ctr = list((coeffs[0] * 1 / np.linspace(1.0, rs.CTR_MAX_POS, rs.CTR_N_POINTS)) + coeffs[1]) check_decreasing = all(x1 >= x2 for x1, x2 in zip(ctr, ctr[1:])) if not check_decreasing: coeffsw = np.polyfit(x, y, w=w, deg=1) ctr = list( (coeffsw[0] * 1 / np.linspace(1.0, rs.CTR_MAX_POS, rs.CTR_N_POINTS)) + coeffsw[1]) check_decreasing2 = all(x1 >= x2 for x1, x2 in zip(ctr, ctr[1:])) if not check_decreasing2: logger.info( "msg=NON DECREASING SHAPE FOR CLICKTHROUGHRATE") check_negatives = [1 for xi in ctr if xi < 0] if len(check_negatives) > 0: msg = "NON DECREASING SHAPE FOR CLICKTHROUGHRATE: " + str( groups[i][0]) + ' ' + str(groups[i][1]) logger.info("msg= " + msg) else: ctr = [np.nan] * rs.CTR_N_POINTS selected_avgposition = list(selected['avgposition']) selected_clickthroughrate = list(selected['clickthroughrate']) avg_position = [ round(x, 1) for x in list(np.linspace(1.0, rs.CTR_MAX_POS, rs.CTR_N_POINTS)) ] feed_dict, feed_dict_input_data = utils.format_output( group=groups[i], metric='ctr', metric_data=ctr, position=avg_position, selected_data=selected, selected_metric_data=selected_clickthroughrate, selected_position=selected_avgposition) df_aux = pd.DataFrame(feed_dict) df_aux_input_data = pd.DataFrame(feed_dict_input_data) if i == 0: df_ret = df_aux else: df_ret = df_ret.append(df_aux) logger.info("msg=finish") return df_ret