Exemplo n.º 1
0
def get_position_avgcpc_based_on_maxcpc(data, maxcpc_relation):

    logger = logging.getLogger(ms.LOGNAME)
    logger.info("msg=get relations")

    fields_check = rs.FILTER + ['avgposition', 'avgcpc']
    check = [i for i in fields_check if i in list(data.columns)]
    if len(fields_check) != len(check):
        logger.error('error=cannot find all records in data')
        raise ValueError(
            "Error get_position_cpc: I cannot find all records in data")

    groups = data[rs.FILTER].drop_duplicates().values

    df_ret = pd.DataFrame()  # initialization to avoid PEP8 warning
    for i in range(0, groups.shape[0]):

        selected, selected_maxcpc = utils.selection(
            data=data,
            group=groups[i],
            metric='maxcpc_avgcpc',
            maxcpc_relation=maxcpc_relation)

        selected = selected[pd.notnull(selected['avgcpc'])]

        if selected.shape[0] >= 2:
            x = np.array(selected['maxcpc'])
            x = np.log(x[:, np.newaxis] + 1)
            y = np.array(selected['avgcpc'])
            coeffs, _, _, _ = np.linalg.lstsq(x, y, rcond=None)
            avg_cpc = list(coeffs[0] * np.log(selected_maxcpc['maxcpc'] + 1))

        else:
            avg_cpc = [np.nan] * rs.ACPCPOS_N_POINTS

        avg_position = [
            round(x, 1) for x in list(
                np.linspace(1.0, rs.ACPCPOS_MAX_POS, rs.ACPCPOS_N_POINTS))
        ]

        feed_dict, feed_dict_input_data = utils.format_output(
            group=groups[i],
            metric='maxcpc_avgcpc',
            metric_data=avg_cpc,
            position=avg_position,
            selected_metric_data=selected['maxcpc'],
            selected_position=selected['avgposition'],
            selected_data=selected)

        df_aux = pd.DataFrame(feed_dict)
        df_aux_input_data = pd.DataFrame(feed_dict_input_data)

        if i == 0:
            df_ret = df_aux
        else:
            df_ret = df_ret.append(df_aux)

    logger.info("msg=finish")
    # TODO: WHY COPY???
    return df_ret
Exemplo n.º 2
0
def sale(sale_id, format):
    """Retrieves deals for a given sale."""
    deals = []

    for deal in get_deals(sale_id):
        deals.append([deal['title'], deal['platform'], deal['type'], deal['original_price'], deal['sale_price'], deal['link']])

    click.echo(format_output(deals, headers=['Title', 'Platform', 'Type', 'Original Price', 'Sale Price', 'Link'], format=format))
Exemplo n.º 3
0
def list(format):
    """Retrieves any currently running sales."""
    sales = []

    for i, sale in enumerate(current_sales()):
        sales.append([str(i + 1), sale['title'], sale['link']])

    click.echo(format_output(sales, headers=['ID', 'Sale', 'Link'], format=format))
Exemplo n.º 4
0
def get_position_avgcpc(data):

    logger = logging.getLogger(ms.LOGNAME)
    logger.info("msg=get relations")

    fields_check = rs.FILTER + ['avgposition', 'avgcpc']
    check = [i for i in fields_check if i in list(data.columns)]
    if len(fields_check) != len(check):
        logger.error('error=cannot find all records in data')
        raise ValueError(
            "Error get_position_cpc: I cannot find all records in data")

    groups = data[rs.FILTER].drop_duplicates().values

    df_ret = pd.DataFrame()  # initialization to avoid PEP8 warning
    for i in range(0, groups.shape[0]):

        selected, _ = utils.selection(data=data,
                                      group=groups[i],
                                      metric='maxcpc')
        selected = selected[pd.notnull(selected['avgcpc'])]

        if selected.shape[0] >= 2:
            x = list(selected['avgposition'])
            y = list(selected['avgcpc'])

            # force the logarithm to be 0 at position 5
            x = x + [rs.ACPCPOS_MAX_CONVERGENCE] * 20
            y = y + [0] * 20

            coeffs = np.polyfit(np.log(x), y, deg=1)
            avg_cpc = list(coeffs[0] * np.log(
                np.linspace(1.0, rs.ACPCPOS_MAX_POS, rs.ACPCPOS_N_POINTS)) +
                           coeffs[1])
        else:
            avg_cpc = [np.nan] * rs.ACPCPOS_N_POINTS

        avg_position = [
            round(x, 1) for x in list(
                np.linspace(1.0, rs.ACPCPOS_MAX_POS, rs.ACPCPOS_N_POINTS))
        ]

        feed_dict, _ = utils.format_output(group=groups[i],
                                           metric='avgcpc',
                                           metric_data=avg_cpc,
                                           position=avg_position)

        df_aux = pd.DataFrame(feed_dict)

        if i == 0:
            df_ret = df_aux
        else:
            df_ret = df_ret.append(df_aux)

    logger.info("msg=finish")

    return df_ret
Exemplo n.º 5
0
    def encrypt_atbash(self):
        "Applies reverse alphabet to plaintext as Atbash Cipher"
        rev_alpha = self.make_alphabet()[::-1]
        for l in self.crib:
            n = ord(l) - 65
            self.ciphertext += rev_alpha[n]

        self.ciphertext = utils.format_output(self.ciphertext)
        self.logger.debug("Ciphertext is %s", self.ciphertext)
        return self.ciphertext
Exemplo n.º 6
0
def author(author_id,
           output_format='dictionary',
           pretty_print=None,
           xml_library='dicttoxml'):
    worker_result = []

    worker(author_id, worker_result)

    return utils.format_output(worker_result[0], output_format, pretty_print,
                               xml_library)
def search_author(term, output_format='dictionary', pretty_print=None, xml_library='dicttoxml'):
    '''
    Search the SINTA database for matching author name. 
    Input: author name (string)
    Output: library containing id, name, NIDN, affiliation, and research areas 
    '''
    worker_result = []

    worker(term, worker_result)

    return utils.format_output(worker_result[0], output_format, pretty_print, xml_library)
Exemplo n.º 8
0
    def encrypt_caesar(self, shift):
        "Applies single shift Caesar Encryption."
        for l in self.crib:
            n = ord(l) + shift
            if n > 90:
                n -= 26
            self.ciphertext += chr(n)

        self.ciphertext = utils.format_output(self.ciphertext)
        self.logger.debug("Ciphertext is %s", self.ciphertext)
        return self.ciphertext
Exemplo n.º 9
0
def authors(author_ids,
            output_format='dictionary',
            pretty_print=None,
            xml_library='dicttoxml',
            max_workers=None):
    worker_result = []

    with ThreadPoolExecutor(max_workers=max_workers) as executor:
        for author_id in author_ids:
            executor.submit(worker, author_id, worker_result)

    return utils.format_output(worker_result, output_format, pretty_print,
                               xml_library)
Exemplo n.º 10
0
def author_comm_services(author_id, output_format='dictionary', pretty_print=None, xml_library='dicttoxml',
                         max_workers=None):
    url = f'http://sinta.ristekbrin.go.id/authors/detail?id={author_id}&view=services'
    html = get(url)
    soup = BeautifulSoup(html.content, 'html.parser')
    page_info = soup.select('.uk-width-large-1-2.table-footer')
    n_page = utils.cast(page_info[0].text.strip().split()[3])
    worker_result = parse(soup)

    with ThreadPoolExecutor(max_workers=max_workers) as executor:
        for page in range(2, n_page + 1):
            executor.submit(worker, author_id, page, worker_result)

    return utils.format_output(worker_result, output_format, pretty_print, xml_library)
Exemplo n.º 11
0
    def encrypt(self):
        self.table = self._make_table()
        self.logger.debug("Constructed Vigenere table as:\n%s", self.table)

        self.full_key = self._key_maker(self._user_key, len(self.crib))
        self.logger.debug("Full key is %s", self.full_key)

        _coordinates = []
        for i in range(len(self.full_key)):
            _row = ord(self.full_key[i]) - 65
            _col = ord(self.crib[i]) - 65
            _coordinates.append([_row, _col])
        self.logger.debug("Table coordinates are %s", _coordinates)

        for x, y in _coordinates:
            self.ciphertext += self.table[x][y]
        self.ciphertext = utils.format_output(self.ciphertext)

        self.logger.debug("Ciphertext is %s", self.ciphertext)
        return self.ciphertext
Exemplo n.º 12
0
def upload_file():
    """
    Receive a file from form and check the words in file.
    Recebe as palavras enviadas através do formulário e verifica cada palavra
    no arquivo.
    """
    # Get file submitted by form
    file = request.files.get('file')

    # if a file is not sended return home
    if not file:
        return render_template('index.html')

    # save uploaded file, wih multiple words to check
    filename = secure_filename(file.filename)
    path_to_save = os.path.join(app.config['UPLOAD_FOLDER'], filename)
    file.save(path_to_save)

    # return a list with words in file; A word by line.
    # carrega as palavras, uma palavra por linha(Eg. palavras_aceitas)
    words = load_data(path_to_save)

    responses = []
    # verifica as palavras presentes no arquivo
    # verifica cada palavra
    for word in words:
        # Check if a word is accepted in AFD.
        # verifica se a palavra é aceita pelo automato
        response, transitions = check_word(word, states, initial_state,
                                           final_state)
        responses.append({
            "word": word,
            "status": response,
            "transition": format_output(transitions)
        })

    return render_template('multiples_words.html', multiples_words=responses)
Exemplo n.º 13
0
        'message': 'Give me the second date:',
        'validate': DateValidator,
        'filter': date_filter,
    },
    {
        'type': 'list',
        'name': 'output_type',
        'message': 'Would you like to get information on screen, in file or both?',
        'choices': ['screen', 'file', 'both'],
    },
    {
        'type': 'list',
        'name': 'order',
        'message': 'Do you want this information in ascending or descending order?',
        'choices': ['ascending', 'descending'],
    }
]


if __name__ == '__main__':
    answers = prompt(questions)
    total_sundays = count_sundays(answers['start_date'], answers['end_date'])
    result_output = format_output(answers, total_sundays)

    output_type = answers['output_type']
    if output_type in ('screen', 'both'):
        print(result_output)
    if output_type in ('file', 'both'):
        with open(OUTPUT_FILENAME, 'w+') as file:
            file.write(result_output)
Exemplo n.º 14
0
 def test_formatter(self):
     self.logger.info("Testing the output formatter")
     self.assertEqual(
         utils.format_output("ABCDEFGHIJKLMNOPQRSTUVWYXZ"),
         "ABCDE FGHIJ KLMNO PQRST UVWYX Z")
Exemplo n.º 15
0
    X, y = utils.load_hog(PPC, TRAIN_TOTAL) if HOG else utils.load_images(TRAIN_TOTAL)
    print 'Done loading', str(time.time() - t)
    t = time.time()

    # split data
    X_train, X_val, y_train, y_val = cross_validation.train_test_split(X, y, test_size=VALSIZE)
    print 'Done splitting train', str(time.time() - t)
    t = time.time()

    # sweep hyperparameters field and pick best model
    params, acc, model = choose_best_model(X_train, X_val, y_train, y_val, MODEL, PARAMS[MODEL])
    # if not SVM:
    #     reg, acc, model = find_regularization(X_train, X_val, y_train, y_val, REGS_HOG if HOG else REGS)
    #     print "Best regularization strength is: " + str(reg)
    # else:
    #     params, acc, model = choose_best_model(X_train, X_val, y_train, y_val, MODEL, PARAMS[MODEL])
    print "Best params are " + str(params)
    print "Accuracy was: " + str(acc)
    print 'Done training model', str(time.time() - t)
    t = time.time()

    X_test, _ = utils.load_hog(PPC, TEST_TOTAL, False) if HOG else utils.load_images(TEST_TOTAL)
    print 'Done loading test', str(time.time() - t)
    t = time.time()
    y_test = model.predict(X_test, chunk=2)
    print 'Done predicting test', str(time.time() - t)
    t = time.time()
    utils.format_output(y_test)
    print 'Done writing test', str(time.time() - t)
    t = time.time()
Exemplo n.º 16
0
def get_position_impressions(data):
    logger = logging.getLogger(ms.LOGNAME)
    logger.info("msg=get relations")

    # create the aggregation field
    data['aggregation'] = np.nan

    fields_check = rs.FILTER + ['avgposition', 'impressions']
    check = [i for i in fields_check if i in list(data.columns)]
    if len(fields_check) != len(check):
        logger.error('error=cannot find all records in data')
        raise ValueError(
            "Error get_position_impressions: I cannot find all records in data"
        )

    groups = data[rs.FILTER].drop_duplicates().values

    if ms.ab_test == 1:
        ab_test_file = pd.read_csv(ms.abtest_campaign_file)
        ab_test_file['draft-campaignid'] = ab_test_file[
            'draft-campaignid'].astype('str')
        groups = groups[np.isin(groups[:, 0],
                                list(ab_test_file['draft-campaignid']))]

    df_ret = pd.DataFrame()  # initialization to avoid PEP8 warning
    for i in range(0, groups.shape[0]):

        ret = utils.fallback_impressions(data, groups[i])

        selected = ret['data']

        model_df = pd.DataFrame({
            'AvgPosition':
            round(selected['avgposition'], 1),
            'Impressions':
            selected['impressions']
        })

        # aggregate by position,  add the dummy tail to force the spline
        # pass through 0 at MAX_POS_BID and calculate the CDF
        model_df = model_df.groupby('AvgPosition').agg('mean').reset_index()
        full_range = pd.DataFrame({
            'AvgPosition':
            np.linspace(1, rs.IMP_MAX_CONVERGENCE, rs.IMP_N_POINTS_CONVERGENCE)
        })

        # complete the full positions observations
        model_df = pd.merge(full_range,
                            model_df,
                            left_on='AvgPosition',
                            right_on='AvgPosition',
                            how='left')
        model_df = model_df.fillna(0)

        x = list(model_df['AvgPosition'])
        y = list(model_df['Impressions'])

        # get the CDF
        # FIXME: statement #1 was changed for statement #2 possible conflict
        # STETEMENT #1:  probabilities = y / sum(y)
        # STETEMENT #2:  probabilities = [i / sum(y) for i in y]
        probabilities = [i / sum(y) for i in y]
        cdf = np.cumsum(probabilities[::-1])[::-1]

        # scale the cdf
        cdf = cdf * sum(y) / float(len(y))

        # reshape x to meet function requirements
        new_x = np.reshape(np.asarray(x), (np.asarray(x).shape[0], 1))
        gam = LinearGAM(n_splines=rs.IMP_N_SPLINES).fit(new_x, cdf)

        # FIXME: apparently predict() warning is due to PyCharm configuration
        impressions = np.clip(gam.predict(new_x), a_min=0,
                              a_max=float('Inf'))[:rs.IMP_N_POINTS]

        avg_position = [
            round(x, 1)
            for x in list(np.linspace(1.0, rs.IMP_MAX_POS, rs.IMP_N_POINTS))
        ]

        # TODO: parametrise (in settings) the indices for aggregation [3 or 4]
        # replace groups nan aggregation by the calculated in the fallback
        if 'device' in ms.GRANULARITY:
            groups[i, 4] = ret['aggregation']
        else:
            groups[i, 3] = ret['aggregation']

        feed_dict, feed_dict_input_data = utils.format_output(
            group=groups[i],
            metric='impressions',
            metric_data=impressions,
            position=avg_position,
            selected_data=selected,
            selected_metric_data=y,
            selected_position=x)

        df_aux = pd.DataFrame(feed_dict)
        df_aux_input_data = pd.DataFrame(feed_dict_input_data)

        if i == 0:
            df_ret = df_aux
        else:
            df_ret = df_ret.append(df_aux)

    logger.info("msg=finish")

    return df_ret
Exemplo n.º 17
0
def get_position_ctr(data):
    logger = logging.getLogger(ms.LOGNAME)
    logger.info("msg=get relations")

    fields_check = rs.FILTER + ['avgposition', 'clickthroughrate']
    check = [i for i in fields_check if i in list(data.columns)]
    if len(fields_check) != len(check):
        logger.error('error=cannot find all records in data')
        raise ValueError(
            "Error get_position_cpc: I cannot find all records in data")

    groups = data[rs.FILTER].drop_duplicates().values

    df_ret = pd.DataFrame()  # initialization to avoid PEP8 warning
    for i in range(0, groups.shape[0]):

        selected, _ = utils.selection(data=data, group=groups[i], metric='ctr')

        if selected.shape[0] >= 2:
            selected_avgposition = list(
                selected['avgposition']) + [rs.CTR_MAX_CONVERGENCE] * 20
            selected_clickthroughrate = list(
                selected['clickthroughrate']) + [0] * 20

            x = list(
                1 /
                selected['avgposition']) + [1 / rs.CTR_MAX_CONVERGENCE] * 20
            y = list(selected['clickthroughrate']) + [0] * 20
            w = list(selected['impressions']
                     ) + [np.mean(selected['impressions'])] * 20

            coeffs = np.polyfit(x, y, deg=1)
            ctr = list((coeffs[0] * 1 /
                        np.linspace(1.0, rs.CTR_MAX_POS, rs.CTR_N_POINTS)) +
                       coeffs[1])

            check_decreasing = all(x1 >= x2 for x1, x2 in zip(ctr, ctr[1:]))

            if not check_decreasing:
                coeffsw = np.polyfit(x, y, w=w, deg=1)
                ctr = list(
                    (coeffsw[0] * 1 /
                     np.linspace(1.0, rs.CTR_MAX_POS, rs.CTR_N_POINTS)) +
                    coeffsw[1])

                check_decreasing2 = all(x1 >= x2
                                        for x1, x2 in zip(ctr, ctr[1:]))
                if not check_decreasing2:
                    logger.info(
                        "msg=NON DECREASING SHAPE FOR CLICKTHROUGHRATE")

            check_negatives = [1 for xi in ctr if xi < 0]
            if len(check_negatives) > 0:
                msg = "NON DECREASING SHAPE FOR CLICKTHROUGHRATE: " + str(
                    groups[i][0]) + ' ' + str(groups[i][1])
                logger.info("msg= " + msg)

        else:
            ctr = [np.nan] * rs.CTR_N_POINTS
            selected_avgposition = list(selected['avgposition'])
            selected_clickthroughrate = list(selected['clickthroughrate'])

        avg_position = [
            round(x, 1)
            for x in list(np.linspace(1.0, rs.CTR_MAX_POS, rs.CTR_N_POINTS))
        ]

        feed_dict, feed_dict_input_data = utils.format_output(
            group=groups[i],
            metric='ctr',
            metric_data=ctr,
            position=avg_position,
            selected_data=selected,
            selected_metric_data=selected_clickthroughrate,
            selected_position=selected_avgposition)

        df_aux = pd.DataFrame(feed_dict)
        df_aux_input_data = pd.DataFrame(feed_dict_input_data)

        if i == 0:
            df_ret = df_aux
        else:
            df_ret = df_ret.append(df_aux)

    logger.info("msg=finish")

    return df_ret