Пример #1
0
def main():
    args = argparser.parse_args()
    LOGGER.info('Process Start', {
        'affiliate': args.affiliate,
        'month': args.month,
        'type': args.type
    })

    # 0. calculate time frame
    (year, month) = map(int, args.month.split("-"))
    start_date = datetime.date(year, month, 1)
    end_date = datetime.date(year, month, calendar.monthrange(year, month)[1])

    # 1. download
    if args.affiliate.upper() == "ALL":
        affiliates = os.listdir(os.path.abspath("affiliates"))
    else:
        affiliates = [args.affiliate + ".py"]

    data = downloader.get_data(affiliates, start_date, end_date, args.type)

    if len(data) > 0:
        # fixing log id for coupons with tracking id (5 chars) instead of log id (9 chars)
        data['ipg:source'] = data.apply(
            lambda x: parse.fix_broken_log_id(x['ipg:source']), axis=1)

        # filter out only old system transactions
        data['old_system'] = data.apply(
            lambda x: ('ipg:sourceAffiliate' in data.columns and x[
                'ipg:sourceAffiliate'] == 'ipricegroup'
                       ) or not parse.is_new_system_source(x['ipg:source']),
            axis=1)
        data = data[data['old_system'] == True]

        data = downloader.process_data(data)
        if type == 'transactions':
            LOGGER.info("Calculating basket size data")
            data = calculation.calc_basket_size(data)
        downloader.export_csv(data)

    LOGGER.info('Process Ended: ', {
        'affiliate': args.affiliate,
        'month': args.month,
        'type': args.type
    })
def main(dataset_folder, temporal_folder, experiment):
    """Run the script.

    Args:
        dataset_folder : specifies output folder
        experiment : tuple (<experiment name>, <sequence number>), that
                     specifies which experiment to download.
                     We sequences 1,2,3,4 of indoor_flying experiment.
        temporal_folder : specifies folder were the original dataset
                          will be placed.
    """
    if not _is_experiment_correctly_defined(experiment):
        raise ValueError('"experiments" are not correctly defined.')

    dataset_folder = os.path.abspath(dataset_folder)
    temporal_folder = os.path.abspath(temporal_folder)

    _make_if_does_not_exist(temporal_folder)
    _make_if_does_not_exist(dataset_folder)

    downloader.TMP_FOLDER = temporal_folder
    experiment_name, experiment_number = experiment
    paths = dataset_constants.experiment_paths(experiment_name,
                                               experiment_number,
                                               dataset_folder)
    dataset_constants.create_folders(paths)
    calibration_data = calibration.Calibration(experiment_name)

    data_path = downloader.get_data(experiment_name, experiment_number)[0]
    data_bag = bag_indexer.get_bag_indexer(data_path)

    gt_bag_path = downloader.get_ground_truth(experiment_name,
                                              experiment_number)[0]
    gt_bag = bag_indexer.get_bag_indexer(gt_bag_path)

    depth_topic_reader = gt_bag.get_topic_reader(TOPICS['depth'])
    focal_length_x_baseline = calibration_data.intrinsic_extrinsic['cam1'][
        'projection_matrix'][0][3]
    synchronization_timestamps = []
    for index, depth_message in enumerate(depth_topic_reader):
        depth_image, timestamp = _load_image_message(depth_message)
        disparity_image = _depth2disparity(depth_image,
                                           focal_length_x_baseline)
        disparity_path = paths['disparity_file'] % index
        cv2.imwrite(disparity_path, disparity_image)
        synchronization_timestamps.append(timestamp)

    np.savetxt(paths['timestamps_file'],
               np.array(synchronization_timestamps),
               fmt='%f',
               header="timestamp")

    distorted_to_rectified = {
        'cam0': calibration_data.left_map,
        'cam1': calibration_data.right_map
    }

    for camera in ['cam0', 'cam1']:
        rectified_to_distorted_x, rectified_to_distorted_y = \
         _rectification_map(
            calibration_data.intrinsic_extrinsic[camera])
        image_size = calibration_data.intrinsic_extrinsic[camera]['resolution']

        events_topic_reader = data_bag.get_topic_reader(
            TOPICS[camera]['events'])
        images_topic_reader = data_bag.get_topic_reader(
            TOPICS[camera]['image'])
        event_bags_timestamps = _get_bags_timestamps(events_topic_reader)
        image_bags_timestamps = _get_bags_timestamps(images_topic_reader)

        for synchronization_index, synchronization_timestamp in enumerate(
                synchronization_timestamps):

            synchronized_events = _get_synchronized_events(
                synchronization_timestamp, event_bags_timestamps,
                events_topic_reader)
            rectified_synchronized_events = _rectify_events(
                synchronized_events, distorted_to_rectified[camera],
                image_size)
            events_path = paths[camera]['event_file'] % synchronization_index
            np.save(events_path, np.array(rectified_synchronized_events))
            synchronized_image = _get_synchronized_image(
                synchronization_timestamp, image_bags_timestamps,
                images_topic_reader)
            rectified_synchronized_image = cv2.remap(synchronized_image,
                                                     rectified_to_distorted_x,
                                                     rectified_to_distorted_y,
                                                     cv2.INTER_LINEAR)
            image_path = paths[camera]['image_file'] % synchronization_index
            cv2.imwrite(image_path, rectified_synchronized_image)
Пример #3
0
######polynomial trial
#degree of polynomial of 3 was tested and
#provide results with only power of 1
from downloader import get_data
df_dict = get_data()
listings = df_dict['listings']
calendar = df_dict['calendar']
reviews = df_dict['reviews']
from clean_data import clean_preprocess_listings
data = clean_preprocess_listings(listings)
##data downloaded , building model
import pandas as pd
from functions import subtract_elem
from model_functions import Pipeline, sklearn_model, ml_model_setup
pipe1 = Pipeline([
    ('poly', PolynomialFeatures(degree=2)),
    ('scaler', StandardScaler()),
    ('regressor', AdaBoostRegressor(n_estimators=200, loss='exponential')),
])
regressor = sklearn_model(model=pipe1)
model = ml_model_setup(data=None, model=regressor)

tmp = pd.get_dummies(
    data,
    columns=['neighbourhood_cleansed', 'bathrooms_type', 'room_type'],
    prefix=['neighbourhoods', 'bathroom', 'room_type'],
    prefix_sep='-')
outcomes = ['annual_earnings']
variables = subtract_elem(tmp.columns, outcomes)
model.set_inputs(data=tmp, outcomes=outcomes, variables=variables)
model.train()
Пример #4
0
#! /usr/bin/env ipython
'''main file to execute the project'''

print('Downloading the data')
#downloading data into memory
from datetime import datetime
from downloader import get_data
city = "Boston"
results_path="results/"
df_dict=get_data(city="Boston", 
                latest_by=datetime.today().strftime("%d-%B-%Y"))
listings=df_dict['listings']
calendar=df_dict['calendar']
reviews=df_dict['reviews']

print('cleaning and preprocessing the data')
#cleaning & preprocessing the data
from clean_data import clean_preprocess_listings
data=clean_preprocess_listings(listings)

#let's plot the results from the exploratory analysis

print('Plotting the results')
#results1/plot1 : Broad understanding of earning & avg.price
from plot_functions import plot_graph
from functions import range_without_outliers
inputs= {
    'title':f'Annual earning across Airbnb listing in {city} ',
    'kind' : 'hist',
    'bins' : 600,
    'x_label' : f'Earning by a single listing',
Пример #5
0
def main():
    parser = argparse.ArgumentParser(
        description='Ingest Blue Planet data using REST API.\
		Lab Username, Password and Id can also be set as an environment variables.')

    parser.add_argument('--lab-host',
                        '-lh',
                        help='Cienna Emulation Cloud, Lab Host')
    parser.add_argument('--lab-username',
                        '-lu',
                        help='Cienna Emulation Cloud, Lab Username')
    parser.add_argument('--lab-password',
                        '-lp',
                        help='Cienna Emulation Cloud, Lab Password')
    parser.add_argument('--lab-id',
                        '-lid',
                        help='Cienna Emulation Cloud, Lab Id')

    parser.add_argument(
        '--data-type',
        '-dt',
        help=
        'Type of data (alarms, user, device, connections) to pull from MCP',
        required=True,
        choices=PATH_MAP.keys())

    parser.add_argument('--recreate-table',
                        '-r',
                        help='To recreate table in database.',
                        required=False,
                        default=True)

    parser.add_argument('--dest-db-type',
                        help='Destination database type (MySQL or Postgres',
                        default='Postgres')

    parser.add_argument('--mysql-host', '-mh', help='MySQL host to store data')
    parser.add_argument('--mysql-username', '-mu', help='MySQL username')
    parser.add_argument('--mysql-password', '-mp', help='MySQL Password')
    parser.add_argument('--mysql-db', '-db', help='MySQL DB')
    parser.add_argument('--mysql-port', '-p', help='MySQL Port', default=3306)

    parser.add_argument('--postgres-host',
                        '-ph',
                        help='Postgres host to store data')
    parser.add_argument('--postgres-username', '-pu', help='Postgres username')
    parser.add_argument('--postgres-password', help='Postgres Password')
    parser.add_argument('--postgres-db', '-pdb', help='Postgres DB')
    parser.add_argument('--postgres-port', help='Postgres Port', default=3306)

    parser.add_argument('--params',
                        default='{}',
                        type=valid_dict,
                        help='URL parameters for MCP APIs.\
						Accepts valid dictionary as a string.\
						e.g. \'{"params":"1", "type":"mcp"}\'')

    args = parser.parse_args()

    host = get_environment_data("HOST", args.lab_host, DEFAULT_HOST)
    username = get_environment_data("USERNAME",
                                    args.lab_username,
                                    default="dev")
    password = get_environment_data("PASSWORD", args.lab_password)
    lab_id = get_environment_data("LABID", args.lab_id, default=254)

    host = "%s/%s" % (host, lab_id)

    # End-to-End data pipelines
    # 1. Authentication:
    token = authorize_with_MCP(host, username, password)

    # 2. Get Data:
    data = get_data(host,
                    get_path(args.data_type, params=args.params),
                    token,
                    key=get_data_key(args.data_type))

    if args.dest_db_type.lower() == 'mysql':
        import mysqlhelper
        # 3. convert json to flat csv
        data_file = mysqlhelper.write_data_to_file(data, args.data_type)

        mysql_host = get_environment_data("MYSQL_HOST",
                                          args.mysql_host,
                                          default='mcp_db')
        mysql_username = get_environment_data("MYSQL_USERNAME",
                                              args.mysql_username,
                                              default="limpid")
        mysql_password = get_environment_data("MYSQL_PASSWORD",
                                              args.mysql_password)
        mysql_db = get_environment_data("MYSQL_DB",
                                        args.mysql_db,
                                        default=MYSQL_DB)
        mysql_port = get_environment_data("MYSQL_PORT",
                                          args.mysql_port,
                                          default=3306)

        # 4. Recreate table in database
        if args.recreate_table:
            mysqlhelper.recreate_table(args.data_type, mysql_host,
                                       mysql_username, mysql_password,
                                       mysql_db, mysql_port)

        mysqlhelper.load_data_in_db(args.data_type, data_file, mysql_host,
                                    mysql_username, mysql_password, mysql_db,
                                    mysql_port)

    elif args.dest_db_type.lower() == 'postgres':
        import postgreshelper
        # 3. convert json to jsonl
        data_file = postgreshelper.write_data_to_file(data, args.data_type)

        postgres_host = get_environment_data("POSTGRES_HOST",
                                             args.postgres_host,
                                             default='mcp_db_pg')
        postgres_username = get_environment_data("POSTGRES_USERNAME",
                                                 args.postgres_username,
                                                 default="limpid")
        postgres_password = get_environment_data("POSTGRES_PASSWORD",
                                                 args.postgres_password)
        postgres_db = get_environment_data("POSTGRES_DB",
                                           args.postgres_db,
                                           default=POSTGRES_DB)
        postgres_port = get_environment_data("POSTGRES_PORT",
                                             args.postgres_port,
                                             default=5432)

        # 4. Recreate table in database
        if args.recreate_table:
            postgreshelper.recreate_table(args.data_type, postgres_host,
                                          postgres_username, postgres_password,
                                          postgres_db, postgres_port)

        postgreshelper.load_data_in_db(args.data_type, data_file,
                                       postgres_host, postgres_username,
                                       postgres_password, postgres_db,
                                       postgres_port)
    else:
        print(
            "%s is not supported currently. Supported databases are MySQL and Postgres"
            % args.dest_db_type)
        raise
Пример #6
0
def main(argv):
    args = argparser.parse_args()
    LOGGER.info('Sync Process Starts', {
        'month': args.month,
        'affiliate': args.affiliate
    })

    # 0. calculate time frame
    (year, month) = map(int, args.month.split("-"))
    start_date = datetime.date(year, month, 1)
    end_date = datetime.date(year, month, calendar.monthrange(year, month)[1])

    if args.affiliate.upper() == "ALL":
        affiliates = os.listdir(os.path.abspath("affiliates"))
        affiliates.remove('ipricegroup.py')
    else:
        affiliates = [args.affiliate + ".py"]

    dry_run = True if args.dry_run == 'True' else False
    testing = False if args.testing == 'False' else True

    data = downloader.get_data(affiliates, start_date, end_date,
                               'transactions')

    if len(data) > 0:
        # fixing log id for coupons with tracking id (5 chars) instead of log id (9 chars)
        data['ipg:source'] = data.apply(
            lambda x: parse.fix_broken_log_id(x['ipg:source']), axis=1)

        # filter out only new system transactions
        data['new_system'] = data.apply(
            lambda x: parse.is_new_system_source(x['ipg:source']), axis=1)
        data = data[data['new_system'] == True]

    if len(data) > 0:
        # enrich information from log database
        data['ipg:logId'] = data.apply(
            lambda x: parse.detect_log_id(x['ipg:source']), axis=1)
        LOGGER.info("Downloading Sync Log data")
        data = log.get_sync_log_info(data)

        ipricegroup = IpriceGroup()
        existing_conversions = ipricegroup.get_existing_conversions(
            start_date, end_date)

        count = {'created': 0, 'updated': 0, 'processed': 0}
        for index, row in data.iterrows():
            count['processed'] += 1
            conversion_unique_id = 'id:' + row[
                'ipg:affiliateNetwork'] + '-' + str(
                    row['ipg:originalConversionId'])
            if testing:
                conversion_unique_id += '-testoffer'
            conversion_datetime = parse.get_datetime_from_timestamp(
                row['ipg:timestamp'])

            existing_conversion = existing_conversions.get(
                conversion_unique_id)
            # todo handle the case is sometime calling create/update api continuously doesn't work. HO is not real-time

            affiliate_id = parse.detect_aff_id(row['ipg:source'])
            try:
                if existing_conversion is None:
                    fields = {
                        'ad_id':
                        parse.detect_transaction_id(row['ipg:source']),
                        'advertiser_info':
                        row['ipg:advertiserInfo'],
                        'affiliate_id':
                        affiliate_id,
                        'affiliate_info1':
                        'testoffer' if testing else row['ipg:cc'],
                        'affiliate_info2':
                        '',
                        'affiliate_info3':
                        row['ipg:affCustom'],
                        'affiliate_info4':
                        conversion_unique_id,
                        'affiliate_info5':
                        'createdByIprice',
                        'country_code':
                        row['ipg:cc'],
                        'currency':
                        row['ipg:currency'],
                        'datetime':
                        conversion_datetime,
                        'ip':
                        row['ipg:ip'],
                        'is_adjustment':
                        '0',
                        'offer_id':
                        parse.detect_offer_id(row['ipg:source']),
                        'payout':
                        row['ipg:commission'],
                        'refer':
                        row['ipg:exitUrl'],
                        'revenue':
                        row['ipg:commission'],
                        'sale_amount':
                        row['ipg:orderValue'],
                        'session_datetime':
                        conversion_datetime,
                        'session_ip':
                        row['ipg:ip'],
                        'source':
                        row['ipg:source'],
                        'status':
                        row['ipg:status'],
                        'user_agent':
                        row['ipg:userAgent']
                    }

                    LOGGER.info(
                        "Create Conversion", {
                            'affiliate_id': affiliate_id,
                            'conversion_id': conversion_unique_id
                        })
                    if dry_run:
                        LOGGER.info("Create dry run fields", fields)
                    else:
                        ipricegroup.upsert_conversion(fields)
                        count['created'] += 1
                    data.loc[index, 'ipg:sync'] = 'created'
                else:
                    fields = {
                        'ip': row['ipg:ip'],
                        'payout': row['ipg:commission'],
                        'refer': row['ipg:exitUrl'],
                        'revenue': row['ipg:commission'],
                        'sale_amount': row['ipg:orderValue'],
                        'session_datetime': conversion_datetime,
                        'session_ip': row['ipg:ip'],
                        'status': row['ipg:status'],
                        'user_agent': row['ipg:userAgent']
                    }

                    if ipricegroup.is_updated_conversion(
                            existing_conversion, fields):
                        LOGGER.info(
                            "Update Conversion", {
                                'affiliate_id': affiliate_id,
                                'existing_conversion_id':
                                existing_conversion['id'],
                                'conversion_id': conversion_unique_id
                            })
                        if dry_run:
                            LOGGER.info("Update dry run fields", fields)
                        else:
                            # if we have these fields changed we cannot update the conversion
                            # but we can log it to sync_failed 2 times
                            # 1- the old conversion with negative payout, revenue & sale_Amount
                            # 2- the new conversion with the new values
                            non_updatable_fields = {
                                'currency': 'ipg:currency',
                                'source': 'ipg:source',
                            }
                            for key, field in non_updatable_fields.iteritems():
                                if str(existing_conversion[key]) != str(
                                        row[field]):
                                    raise Exception(
                                        'change of ' + key + ' to ' +
                                        str(row[field]) +
                                        ' are not allowed for an existing conversion: '
                                        + json.dumps(existing_conversion))

                            ipricegroup.upsert_conversion(
                                fields, existing_conversion['id'])
                            count['updated'] += 1
                        data.loc[index, 'ipg:sync'] = 'updated'
            except Exception as e:
                data.loc[index, 'ipg:sync'] = 'failed'
                LOGGER.error(str(e), {'affiliate_id': affiliate_id})

        LOGGER.info("Processed Conversion", {'count': count['processed']})
        LOGGER.info("Created Conversion", {'count': count['created']})
        LOGGER.info("Updated Conversion", {'count': count['updated']})

        data = downloader.process_data(data)
        downloader.export_csv(data)

    LOGGER.info('Sync Process Ended', {
        'month': args.month,
        'affiliate': args.affiliate
    })
Пример #7
0
def main():

    mode = input('Mode selection (nodata, update, db_init): ')
    flag_keyboard_interrupt = False
    engine, session = db.connect()
    # db_tickers = db.load_available_tickers(session)  # TODO
    db_tickers = []  # testing
    new_tickers_df = fio.us_equities('resources/generic_backup101518.json')
    # TODO remove from list according to error csv
    # TODO remove from list according to db

    if mode == 'nodata':
        start_date = pd.to_datetime(params.START_DATE)
        for ticker_row in new_tickers_df.iterrows(
        ):  # looping over all tickers
            ticker_row = ticker_row[1]  # get rid of the indexing
            metadata = ticker_row.to_dict()

            print('Debugging - printing metadata')  # debugging
            print(metadata)  # debugging

            if flag_keyboard_interrupt:  # interruption logic
                print('Keyboard interrupted - exiting...')
                break

            if ticker_row[
                    'ticker'] not in db_tickers:  # skip over existing tickers
                try:
                    data = dl.get_data(ticker_row['ticker'],
                                       start_date=start_date)

                    print(f"Inserting {ticker_row['ticker']} into db"
                          )  # debugging

                    db.insert_data(session, data, metadata=metadata)
                    session.commit()

                except KeyboardInterrupt as error:
                    flag_keyboard_interrupt = True
                # except Exception:  # can't debug if errors don't show
                #     fio.append_error_csv(ticker_row)  # TODO

    elif mode == 'update':
        for ticker in db_tickers:
            if flag_keyboard_interrupt:  # ctrl-c break
                break
            last_date = db.last_data_date(ticker)  # TODO
            if last_date < pd.Timestamp.today() - pd.TimeDelta(1, 'D'):

                try:
                    data = dl.get_data(ticker, last_date=last_date)
                    db.insert_table(session, data)  # TODO

                except KeyboardInterrupt as error:
                    flag_keyboard_interrupt = True

    elif mode == 'db_init':
        verify = input('You are about to delete everything '
                       'in the finance_data database (y/n): ')
        if verify == 'y':
            base.metadata.create_all(engine)  # create all tables
        fio.setup_error_csv(verify=True)

    else:
        print('Not a valid mode')