Python Dataframe примеры использования

Язык программирования: Python

Пространство имен/Пакет: pandas

Класс/Тип: Dataframe

Примеров на hotexamples.com: 9

Python Dataframe - 9 примеров найдено. Это лучшие примеры Python кода для pandas.Dataframe, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Основные методы

Показать Скрыть

drop(3)

apply(2)

Dataframe(1)

append(1)

explode(1)

groupby(1)

isna(1)

iterrows(1)

Пример #1

Показать файл

Файл: script_utils.py Проект: trabant70/wattnet-fx-trading

def augment_with_pluses(dataframe: pd.Dataframe, usd_is_1: pd.Series,
                        usd_is_2: pd.Series):
    """Augment DataFrame with bool feature flagging whether currency amount strings contain '+'"""
    find_plus = lambda elem: str(elem).find('+')
    plus_1 = dataframe['ROUNDED_NOTIONAL_AMOUNT_1'].astype(str).apply(
        find_plus) != -1
    plus_2 = dataframe['ROUNDED_NOTIONAL_AMOUNT_2'].astype(str).apply(
        find_plus) != -1
    dataframe.loc[:, 'PLUS_USD'] = (usd_is_1 & plus_1) | (usd_is_2 & plus_2)
    dataframe.loc[:, 'PLUS_CCY'] = (usd_is_2 & plus_1) | (usd_is_1 & plus_2)

Пример #2

Показать файл

Файл: script_utils.py Проект: trabant70/wattnet-fx-trading

def remove_dissemination_id_changes(dataframe: pd.Dataframe):
    """Drops rows in pandas.DataFrame with updated DISSEMINATION_ID information"""
    n_corrections = len(dataframe[dataframe['ACTION'] == 'CORRECT'])
    n_cancels = len(dataframe[dataframe['ACTION'] == 'CANCEL'])
    to_drop = []
    print(f'There have been {n_cancels} cancels and '
          f'{n_corrections} corrections in dissemination IDs')
    for row_idx, row in dataframe.iterrows():
        if row['ACTION'] in ['CORRECT', 'CANCEL']:
            o_id = row['ORIGINAL_DISSEMINATION_ID']
            o_id = int(o_id)
            if o_id in dataframe.index:
                to_drop.append(o_id)
    if len(to_drop) > 0:
        dataframe = dataframe.drop(to_drop, axis=0)
    return dataframe

Пример #3

Показать файл

 def get_data(data: pd.Dataframe) -> pd.Dataframe:
     with zipfile.ZipFile(countpath) as z:
         data["text"] = data.apply(
             lambda row: [
                 s.strip().decode("utf-8").split("\t")
                 for s in z.open(row.path, "r").readlines()
             ],
             axis=1,
         )
     data = data.explode("text")
     data[["word", "count"]] = data["text"].tolist()
     data.drop(columns=["text", "path"], inplace=True)
     data["count"] = data["count"].astype(int)
     if filter is not None:
         data = data[data["word"].map(filter)]
     return data

Пример #4

Показать файл

Файл: script_utils.py Проект: trabant70/wattnet-fx-trading

def amounts_to_ndf_rate(dataframe: pd.Dataframe, usd_is_1: pd.Series,
                        usd_is_2: pd.Series) -> None:
    """Computes NDF rates from notional amounts and augments `dataframe` with an NDF rate column"""
    dataframe.loc[usd_is_1,
                  'CURRENCY'] = dataframe[usd_is_1]['NOTIONAL_CURRENCY_2']
    dataframe.loc[usd_is_2,
                  'CURRENCY'] = dataframe[usd_is_2]['NOTIONAL_CURRENCY_1']

    dataframe.loc[usd_is_1,
                  'USD_AMOUNT'] = dataframe['ROUNDED_NOTIONAL_AMOUNT_1']
    dataframe.loc[usd_is_2,
                  'USD_AMOUNT'] = dataframe['ROUNDED_NOTIONAL_AMOUNT_2']
    dataframe.loc[usd_is_2,
                  'CCY_AMOUNT'] = dataframe['ROUNDED_NOTIONAL_AMOUNT_1']
    dataframe.loc[usd_is_1,
                  'CCY_AMOUNT'] = dataframe['ROUNDED_NOTIONAL_AMOUNT_2']

    dataframe.loc[:, 'NDF_RATE'] = dataframe['CCY_AMOUNT'] / dataframe[
        'USD_AMOUNT']

Пример #5

Показать файл

def _adjust_tstamp_drift_of_triplet(df: pd.Dataframe) -> List[pd.DataFrame]:
    """Return list of pandas DataFrames where timestamp offsets has been adjusted.

    Sorts dataframe based on timestamp, finds triplets where timestamp is equal +-2, and
    adjusts any timestamps +-2 from 2nd timestamp to be equal to 2nd timestamp. Returns
    a list of all valid triplets.

    Args:
        df: pd.DataFrame where columns "timestamp" and "millisecond" are used to adjust.

    Returns:
        Returns list of pd.DataFrame where timestamps offset +-2 from middle timestamp
        is adjusted. For example:

        | timestamp  | millisecond | frequency | tagID | tagData |
        | 1556555369 |     995     |     69    |   12  |   3.5   |
        | 1556555370 |     005     |     69    |   12  |   3.5   |
        | 1556555371 |     010     |     69    |   12  |   3.5   |

        becomes -->

        | timestamp  | millisecond | frequency | tagID | tagData |
        | 1556555370 |     995     |     69    |   12  |   3.5   |
        | 1556555370 |     005     |     69    |   12  |   3.5   |
        | 1556555370 |     010     |     69    |   12  |   3.5   |
    """
    ts_drift_threshold = 2
    ms_1km = 0.667

    # Sort dataframe by timestamps in case some timestamps are in the wrong order
    df = df.sort_values("timestamp")
    df = df.reset_index(drop=True)

    # Extract timestamps and find all triplets within dataframe
    ts = df["timestamp"]
    last_indices = ts.index[ts.diff(periods=2) <= ts_drift_threshold]
    all_indices = last_indices.append([last_indices - 1,
                                       last_indices - 2]).sort_values()

    # Mask out all detections that aren't triplets
    mask_values = [i for i in range(len(last_indices)) for _ in range(3)]
    df.loc[all_indices, "mask"] = mask_values
    df = df[df["mask"].notnull()]
    if df.empty:
        return []

    # Adjust timestamps that have drifted
    # | if 2nd timestamp in triplet is much larger than the 1st, add 2nd index to list
    # | if 3rd timestamp in triplet is much larger than the 2nd, add 2nd index to list
    df["drift"] = df.apply(lambda x: x["timestamp"] + x["millisecond"] / 1000,
                           axis=1)
    drift = df["drift"].diff()
    drift_3rd = drift[last_indices].where(abs(drift[last_indices]) >= ms_1km)
    drift_1st = drift[last_indices -
                      1].where(abs(drift[last_indices - 1]) >= ms_1km)
    drift_indices = drift_3rd.dropna(
    ).index - 1  # -1 to get index of 2nd timestamp
    drift_indices = drift_indices.append(drift_1st.dropna().index)

    # Set timestamp 1 and 3 of each triplet with drift has equal to 2nd timestamp
    df.loc[drift_indices - 1, "timestamp"] = ts[drift_indices].values
    df.loc[drift_indices + 1, "timestamp"] = ts[drift_indices].values

    # get and return triplets as list of dataframes
    triplets = [
        v.drop(["mask", "drift"], axis=1) for _, v in df.groupby("mask")
    ]
    # triplets = [v.drop(["mask"], axis=1) for _, v in df.groupby("mask")]
    return triplets

Пример #6

Показать файл

Файл: bai70.py Проект: duongdinh24/learn-python

import pandas as pd
import numpy as np
from pandas import Series, Dataframe
from numpy.random import randn
sinhvien = ['Teo', 'Ty', 'Tun', 'Tuan', 'Tien']
data = {'diem': diem, 'sinhvien': sinhvien}
df2 = Dataframe(data)  #chuyển diction thành dataframe

indx = 'A B C D E'.split()
cols = 'Col1 Col2 Col3 Col4 Col5'.split()
x = []
for i in range(25):
    x.append(np.random.randint(1, 100))
x = np.array(x)
x = x.reshape(5, 5)

df3 = DataFrame(x, index=indx, columns=cols)
#reindex row
newind = 'A B C D E F G'.split()
df4 = df3.reindex(newind, fill_value=0)
#reindex columns
cols = 'Col1 Col2 Col3 Col4 Col5 col6 col7'.split()
df4 = df3.reindex(columns=cols, fill_value=0)
df4

Пример #7

Показать файл

Файл: filter_dataset.py Проект: fridex/samuelmacko_master_thesis

def substitude_row(dataset: Dataframe, repo_name: str,
                   new_row: List[str]) -> None:
    dataset.drop(labels=dataset[dataset['repo_name'] == repo_name].index,
                 inplace=True)
    dataset = dataset.append(other=new_row)

Пример #8

Показать файл

Файл: filter_dataset.py Проект: fridex/samuelmacko_master_thesis

def find_incomplete_rows(dataset: Dataframe) -> Series:
    return dataset[dataset.isna().any(axis=1)]['repo_name']

Пример #9

Показать файл

Файл: filter_dataset.py Проект: fridex/samuelmacko_master_thesis

def save_to_csv(dataset: Dataframe, file_name: str) -> None:
    dataset.to_csv(file_name, index=False, header=True)