Exemplo n.º 1
0
def upload_to_segment():
    dir_path = dirname(realpath(__file__))
    audit_file_path = join(dir_path, "auditing")
    client = Client(config.SEGMENT_API_KEY)
    for txt_file in [
            join(audit_file_path, filename)
            for filename in listdir(audit_file_path)
            if filename.endswith(".txt")
    ]:
        renamed_to = txt_file.replace(".txt", ".toupload")
        rename(txt_file, renamed_to)
        with open(renamed_to, "rb") as output_file:
            for line in output_file.readlines():
                properties = json.loads(line)
                user = properties.pop("user")
                event = properties.pop("event")
                timestamp = parse(properties.pop("timestamp"))
                kwargs = {
                    "user_id": user,
                    "event": event,
                    "timestamp": timestamp,
                    "properties": properties,
                }
                client.track(**kwargs)
                client.flush()
        afterwards_to = renamed_to.replace(
            ".toupload",
            datetime.now().strftime(".%Y%m%d%H%M"))
        rename(renamed_to, afterwards_to)
def alias():
    write_key = request.args.get('writeKey')
    params = request.get_json(True)
    
    analytics = Client(write_key)
    analytics.alias(params['previousId'], params['userId'])
    analytics.flush()
    
    return json.dumps({'status': 'success'})
def track():
    write_key = request.args.get('writeKey')
    params = request.get_json(True)
    
    analytics = Client(write_key)
    analytics.track(params['userId'], params['event'], params['properties'])
    analytics.flush()
    
    return json.dumps({'status': 'success'})
Exemplo n.º 4
0
def alias():
    write_key = request.args.get('writeKey')
    params = request.get_json(True)

    analytics = Client(write_key)
    analytics.alias(params['previousId'], params['userId'])
    analytics.flush()

    return json.dumps({'status': 'success'})
Exemplo n.º 5
0
def track():
    write_key = request.args.get('writeKey')
    params = request.get_json(True)

    analytics = Client(write_key)
    analytics.track(params['userId'], params['event'], params['properties'])
    analytics.flush()

    return json.dumps({'status': 'success'})
def identify():
    write_key = request.args.get('writeKey')
    params = request.get_json(True)
    
    # Shouldn't have any concurrency issue since we are not multi-threading here
    analytics = Client(write_key)
    analytics.identify(params['userId'], params['traits'])
    analytics.flush()
    
    return json.dumps({'status': 'success'})
Exemplo n.º 7
0
def identify():
    write_key = request.args.get('writeKey')
    params = request.get_json(True)

    # Shouldn't have any concurrency issue since we are not multi-threading here
    analytics = Client(write_key)
    analytics.identify(params['userId'], params['traits'])
    analytics.flush()

    return json.dumps({'status': 'success'})
Exemplo n.º 8
0
# Load dataframe in memory
logger.info("Reading data from GCS")
df = pd.read_csv(f"{gcs_directory}/tables_1.csv")
logger.info(f"Loaded {df.shape[0]} rows")

# Remove rows with empty account IDs
logger.info(f"Removing {df['account_id'].isna().sum()} rows with no account_id")
clean_df = df[~df["account_id"].isna()]

# Generate the proper schema and send it to Segment
logger.info("Sending data to Segment")
for index, row in clean_df.iterrows():

    properties = {
        "product": row["product"],
        "productUserId": row["user_id"],
        "subscriptionId": row["subscription_id"],
        "trialId": row["trial_id"],
        "planId": row["plan_id"],
        "modelName": MODEL_NAME,
        "modelVersion": MODEL_VERSION,
        "predictedAt": predicted_at,
        "score": row["converted_true_score"],
    }

    segment_client.track(row["account_id"], "Trial Conversion Predicted", properties)

# Flush before finishing the script
logger.info("Flushing queue")
segment_client.flush()