Python get_df примеры, labbookdb.db.query.get_df Python примеры использования

Пример #1

0

Показать файл

Файл: selection.py Проект: cequn/LabbookDB

def stimulation_protocol(db_path, code):
    """Select a `pandas.DataFrame`object containing all events and associated measurement units for a specific stimulation protocol.

	Parameters
	----------

	db_path : string
		Path to the database file to query.

	code : string
		Code (valid `StimulationProtocol.code` value) which identifies the stimulation protocol to format.
	"""

    col_entries = [
        ("StimulationProtocol", ),
        ("StimulationEvent", ),
        ("MeasurementUnit", ),
    ]
    join_entries = [
        ("StimulationProtocol.events", ),
        ("StimulationEvent.unit", ),
    ]
    my_filters = []
    my_filter = ["StimulationProtocol", "code", code]
    my_filters.append(my_filter)

    df = query.get_df(db_path,
                      col_entries=col_entries,
                      join_entries=join_entries,
                      filters=my_filters)

    return df

Пример #2

0

Показать файл

Файл: selection.py Проект: cequn/LabbookDB

def timetable(
    db_path,
    filters,
    default_join="outer",
    join_types=[],
):
    """Select a dataframe with animals as rown and all timetable-relevant events as columns.

	Parameters
	----------

	db_path : str
	Path to a LabbookDB formatted database.

	filters : list of list
	A list of lists giving filters for the query. It is passed to `..query.get_df()`.

	outerjoin_all : bool
	Pased as outerjoin_all to `..query.get_df()`
	"""

    col_entries = [
        ("Animal", "id"),
        ("Treatment", ),
        ("FMRIMeasurement", "date"),
        ("OpenFieldTestMeasurement", "date"),
        ("ForcedSwimTestMeasurement", "date"),
        ("TreatmentProtocol", "code"),
        ("Cage", "id"),
        ("Cage", "Treatment", ""),
        ("Cage", "TreatmentProtocol", "code"),
        ("SucrosePreferenceMeasurement", "date"),
    ]
    join_entries = [
        ("Animal.treatments", ),
        ("FMRIMeasurement", ),
        ("OpenFieldTestMeasurement", "Animal.measurements"),
        ("ForcedSwimTestMeasurement", "Animal.measurements"),
        ("Treatment.protocol", ),
        ("Animal.cage_stays", ),
        ("CageStay.cage", ),
        ("Cage_Treatment", "Cage.treatments"),
        ("Cage_TreatmentProtocol", "Cage_Treatment.protocol"),
        ("SucrosePreferenceMeasurement", "Cage.measurements"),
    ]

    # if treatment_start_dates:
    # 	my_filter = ["Treatment","start_date"]
    # 	my_filter.extend(treatment_start_dates)

    # setting outerjoin to true will indirectly include controls
    df = query.get_df(db_path,
                      col_entries=col_entries,
                      join_entries=join_entries,
                      filters=filters,
                      default_join=default_join,
                      join_types=join_types)

    return df

Пример #3

0

Показать файл

Файл: selection.py Проект: cequn/LabbookDB

def animals_by_cage_treatment(
    db_path,
    codes=[],
    end_dates=[],
    start_dates=[],
):
    """Select a dataframe of animals and all related tables through to cage treatments based on cage_treatment filters.

	Parameters
	----------

	db_path : string
	Path to a LabbookDB formatted database.

	codes : list, optional
	Related TreatmentProtocol.code values based on which to filter dataframe

	end_dates : list, optional
	Related Treatment.end_date values based on which to filter dataframe

	start_dates : list, optional
	Related Treatment.start_date values based on which to filter dataframe
	"""

    col_entries = [
        ("Animal", "id"),
        ("Cage", "id"),
        ("Treatment", ),
        ("TreatmentProtocol", "code"),
    ]
    join_entries = [
        ("Animal.cage_stays", ),
        ("CageStay.cage", ),
        ("Cage.treatments", ),
        ("Treatment.protocol", ),
    ]
    my_filters = []
    if codes:
        my_filter = ["TreatmentProtocol", "code"]
        my_filter.extend(codes)
        my_filters.append(my_filter)
    if end_dates:
        my_filter = ["Treatment", "end_date"]
        my_filter.extend(end_dates)
        my_filters.append(my_filter)
    if start_dates:
        my_filter = ["Treatment", "start_date"]
        my_filter.extend(start_dates)
        my_filters.append(my_filter)
    if not my_filters:
        my_filters = [None]

    df = query.get_df(db_path,
                      col_entries=col_entries,
                      join_entries=join_entries,
                      filters=my_filters)

    return df

Пример #4

0

Показать файл

Файл: selection.py Проект: cequn/LabbookDB

def animal_id(db_path, database, identifier, reverse=False):
    """Return the main LabbookDB animal identifier given an external database identifier.

	Parameters
	----------

	db_path : string
		Path to the database file to query.
	database : string
		Valid `AnimalExternalIdentifier.database` value.
	identifier : string
		Valid `AnimalExternalIdentifier.identifier` value.
	reverse : bool, optional
		Whether to reverse the query.
		A reverse query means that a LabbookDB `Animal.id` filter is applied and an `AnimalExternalIdentifier.identifier` value is returned.

	Returns
	-------
	int
		LabbookDB animal identifier.
	"""

    col_entries = [
        ("Animal", "id"),
        ("AnimalExternalIdentifier", ),
    ]
    join_entries = [
        ("Animal.external_ids", ),
    ]

    my_filters = []
    if reverse:
        my_filter = ["Animal", "id", identifier]
        my_filters.append(my_filter)
    else:
        my_filter = ["AnimalExternalIdentifier", "identifier", identifier]
        my_filters.append(my_filter)
    my_filter = ["AnimalExternalIdentifier", "database", database]
    my_filters.append(my_filter)

    df = query.get_df(db_path,
                      col_entries=col_entries,
                      join_entries=join_entries,
                      filters=my_filters)
    if reverse:
        labbookdb_id = df['AnimalExternalIdentifier_identifier'].item()
    else:
        labbookdb_id = df['Animal_id'].item()

    return labbookdb_id

Пример #5

0

Показать файл

Файл: selection.py Проект: cequn/LabbookDB

def cage_drinking_measurements(db_path, treatments=[]):
    """"""
    filters = []
    join_type = 'inner'
    col_entries = [("Cage", "id"), ("Treatment", ),
                   ("TreatmentProtocol", 'code'), ('DrinkingMeasurement', )]
    join_entries = [("Cage.treatments", ), ("Treatment.protocol", ),
                    ('DrinkingMeasurement', )]
    my_filter = []
    if treatments:
        my_filter = ["TreatmentProtocol", "code"]
        my_filter.extend(treatments)
    filters.append(my_filter)

    df = query.get_df(db_path,
                      col_entries=col_entries,
                      join_entries=join_entries,
                      filters=filters,
                      default_join=join_type)

    return df

Пример #6

0

Показать файл

Файл: selection.py Проект: cequn/LabbookDB

def parameterized(
    db_path,
    data_type,
    animal_filter=[],
    cage_filter=[],
    treatment_start_dates=[],
):
    """Select dataframe from a LabbookDB style database.

	Parameters
	----------

	db_path : string
		Path to a LabbookDB formatted database.
	data_type : {"animals id", "animals info", "animals measurements", "animals measurements irregularities", "cage list", "forced swim"}
		What type of data should be selected values can be:
	animal_filter : list, optional
		A list of animal identifiers (`Animal.id` attributes) for which to limit the query.
	treatment_start_dates : list, optional
		A list containing the treatment start date or dates by which to filter the cages for the sucrose preference measurements.
		Items should be strings in datetime format, e.g. "2016,4,25,19,30".
	"""

    default_join = "inner"
    my_filter = []

    allowed_data_types = [
        "animals id",
        "animals info",
        "animals measurements",
        "animals measurements irregularities",
        "cage list",
        "forced swim",
    ]

    if data_type == "animals id":
        col_entries = [
            ("Animal", "id"),
            ("AnimalExternalIdentifier", ),
        ]
        join_entries = [
            ("Animal.external_ids", ),
        ]
    elif data_type == "animals info":
        col_entries = [
            ("Animal", "death_date"),
            ("AnimalExternalIdentifier", ),
            ("Genotype", ),
        ]
        join_entries = [
            ("Animal.external_ids", ),
            ("Animal.genotypes", ),
        ]
    elif data_type == "animals measurements":
        col_entries = [
            ("Animal", "id"),
            ("Measurement", "id"),
            ("StimulationProtocol", "code"),
        ]
        join_entries = [
            ("Animal.measurements", ),
            ("FMRIMeasurement.stimulations", ),
        ]
    elif data_type == "animals measurements irregularities":
        col_entries = [
            ("Animal", "id"),
            ("Measurement", "id"),
            ("Irregularity", "description"),
        ]
        join_entries = [
            ("Animal.measurements", ),
            ("FMRIMeasurement.irregularities", ),
        ]
    elif data_type == "animals weights":
        col_entries = [
            ("Animal", "id"),
            ("AnimalExternalIdentifier", ),
            ("WeightMeasurement", ),
        ]
        join_entries = [
            ("Animal.external_ids", ),
            ("WeightMeasurement", ),
        ]
        my_filter = ["Measurement", "type", "weight"]
    elif data_type == "cage list":
        col_entries = [
            ("Animal", ),
            ("CageStay", ),
            ("Cage", "id"),
        ]
        join_entries = [
            ("Animal.cage_stays", ),
            ("CageStay.cage", ),
        ]
        if animal_filter:
            my_filter = ['Animal', 'id']
            # for some reason this needs to be str
            my_filter.extend([str(i) for i in animal_filter])
        if cage_filter:
            my_filter = ['Cage', 'id']
            # for some reason this needs to be str
            my_filter.extend([str(i) for i in cage_filter])
    elif data_type == "forced swim":
        col_entries = [
            ("Animal", "id"),
            ("Cage", "id"),
            ("Treatment", ),
            ("TreatmentProtocol", "code"),
            ("ForcedSwimTestMeasurement", ),
            ("Evaluation", ),
        ]
        join_entries = [
            ("Animal.cage_stays", ),
            ("ForcedSwimTestMeasurement", ),
            ("Evaluation", ),
            ("CageStay.cage", ),
            ("Cage.treatments", ),
            ("Treatment.protocol", ),
        ]
    else:
        raise ValueError(
            "The `data_type` value needs to be one of: {}. You specified \"{}\""
            .format(", ".join(allowed_data_types), data_type))

    if treatment_start_dates:
        my_filter = ["Treatment", "start_date"]
        my_filter.extend(treatment_start_dates)
    df = query.get_df(db_path,
                      col_entries=col_entries,
                      join_entries=join_entries,
                      filters=[my_filter],
                      default_join=default_join)
    return df

Пример #7

0

Показать файл

Файл: selection.py Проект: cequn/LabbookDB

def by_animals(db_path, select, animals):
    """Select a dataframe of animals and all related tables through to the "select" table based on animal filters.

	Parameters
	----------

	db_path : str
	Path to a LabbookDB formatted database.

	select : str
	For which kind of evaluation to select dataframe.

	animals : list of str
	Animal.id values based on which to filter dataframe
	"""

    accepted_select_values = ["sucrose preference", "timetable"]

    if select == "sucrose preference":
        join_types = ["inner", "inner", "inner"]
        col_entries = [
            ("Animal", "id"),
            ("Cage", "id"),
            ("SucrosePreferenceMeasurement", ),
        ]
        join_entries = [
            ("Animal.cage_stays", ),
            ("CageStay.cage", ),
            ("SucrosePreferenceMeasurement", ),
        ]
    elif select == "timetable":
        col_entries = [
            ("Animal", "id"),
            ("Treatment", ),
            ("FMRIMeasurement", "date"),
            ("OpenFieldTestMeasurement", "date"),
            ("ForcedSwimTestMeasurement", "date"),
            ("TreatmentProtocol", "code"),
            ("Cage", "id"),
            ("Cage", "Treatment", ""),
            ("Cage", "TreatmentProtocol", "code"),
            ("SucrosePreferenceMeasurement", "date"),
        ]
        join_entries = [
            ("Animal.treatments", ),
            ("FMRIMeasurement", ),
            ("OpenFieldTestMeasurement", "Animal.measurements"),
            ("ForcedSwimTestMeasurement", "Animal.measurements"),
            ("Treatment.protocol", ),
            ("Animal.cage_stays", ),
            ("CageStay.cage", ),
            ("Cage_Treatment", "Cage.treatments"),
            ("Cage_TreatmentProtocol", "Cage_Treatment.protocol"),
            ("SucrosePreferenceMeasurement", "Cage.measurements"),
        ]
    else:
        raise ValueError("The value for select needs to be one of {}".format(
            accepted_select_values))

    animals = [str(i) for i in animals
               ]  #for some reason the Animal.id values need to be string :-/
    my_filter = ["Animal", "id"]
    my_filter.extend(animals)

    df = query.get_df(db_path,
                      col_entries=col_entries,
                      join_entries=join_entries,
                      filters=[my_filter],
                      join_types=join_types)

    return df

Пример #8

0

Показать файл

Файл: selection.py Проект: cequn/LabbookDB

def animal_treatments(
    db_path,
    animal_ids=[],
    animal_treatments=[],
    cage_treatments=[],
    conjunctive=True,
):
    """Select a dataframe of animals and all treatments including animal-level or cage-level treatments.

	Parameters
	----------

	db_path : str
		Path to a LabbookDB formatted database.

	animal_ids : list, optional
		A List of LabbookDB `Animal.id` values by which to filter the query.
		It is faster to filter using this mechanism than to return a dataframe for all animals and then filter that.

	animal_treatments : list, optional
		A List of LabbookDB `Treatment.code` values which should be used to filter the query, while being joined to `Animal` objects.

	cage_treatments : list, optional
		A List of LabbookDB `Treatment.code` values which should be used to filter the query, while being joined to `Cage` objects - and further to `Animal` objects via `CageStay` objects.
		An onset check is also applied by the function, to ascertain that there is an overlap between the animal's presence in the cage and the cage treatment application.

	conjunctive : bool, optional
		Whether both `cage_treatments` and `animal_treatments` need to be satisfied (statements within each list are always disjunctive).

	Notes
	-----

	Currently `conjunctive=False` does not work; cage treatment and animal treatment filters are always conjunctive.
	"""

    filters = []
    join_type = 'outer'
    col_entries = [
        ("Animal", "id"),
        ("Animal", "death_date"),
        ("Treatment", ),
        ("TreatmentProtocol", "code"),
        ("CageStay", "start_date"),
        ("Cage", "id"),
        ("Cage", "Treatment", ""),
        ("Cage", "TreatmentProtocol", "code"),
    ]
    join_entries = [
        ("Animal.treatments", ),
        ("Treatment.protocol", ),
        ("Animal.cage_stays", ),
        ("CageStay.cage", ),
        ("Cage_Treatment", "Cage.treatments"),
        ("Cage_TreatmentProtocol", "Cage_Treatment.protocol"),
    ]
    my_filter = []
    if animal_ids:
        my_filter = ["Animal", "id"]
        my_filter.extend(animal_ids)
    if animal_treatments:
        my_filter = ["TreatmentProtocol", "code"]
        my_filter.extend(animal_treatments)
    if cage_treatments:
        my_filter = ["Cage_TreatmentProtocol", "code"]
        my_filter.extend(cage_treatments)
    filters.append(my_filter)

    df = query.get_df(db_path,
                      col_entries=col_entries,
                      join_entries=join_entries,
                      filters=filters,
                      default_join=join_type)

    # Generally dataframe operations should be performed in `labbookdb.report.tracking`, however, if animals are selected by cage treatment, we need to determine which animals actually received the treatment.
    # The following is therefore a selection issue.
    cage_treatment_columns = [
        'Cage_Treatment_id', 'Cage_Treatment_end_date',
        'Cage_Treatment_start_date', 'Cage_TreatmentProtocol_code',
        'Cage_Treatment_protocol_id'
    ]
    animals = list(df["Animal_id"].unique())
    cage_stays = cage_periods(db_path, animal_filter=animals)
    for subject in list(df['Animal_id'].unique()):
        for stay_start in df[df['Animal_id'] ==
                             subject]['CageStay_start_date'].tolist():
            stay_end = cage_stays[(cage_stays['Animal_id'] == subject) & (
                cage_stays['CageStay_start_date'] == stay_start
            )]['CageStay_end_date'].tolist()[0]
            treatment_start = df[(df['Animal_id'] == subject)
                                 & (df['CageStay_start_date'] == stay_start
                                    )]['Cage_Treatment_start_date'].tolist()[0]
            death_date = df[df['Animal_id'] ==
                            subject]['Animal_death_date'].tolist()[0]
            # We do not check for treatment end dates, because often you may want to include recipients of incomplete treatments (e.g. due to death) when filtering based on cagestays.
            # Filtering based on death should be done elsewhere.
            if not pd.isnull(treatment_start):
                if not stay_start <= treatment_start and not treatment_start >= stay_end:
                    df.loc[(df['Animal_id'] == subject) &
                           (df['CageStay_start_date'] == stay_start),
                           cage_treatment_columns] = None
                elif treatment_start >= death_date:
                    df.loc[(df['Animal_id'] == subject) &
                           (df['CageStay_start_date'] == stay_start),
                           cage_treatment_columns] = None
    df = df.drop_duplicates(subset=[
        'Animal_id', 'Cage_id', 'Cage_Treatment_start_date',
        'Cage_TreatmentProtocol_code', 'TreatmentProtocol_code'
    ])

    return df

Пример #9

0

Показать файл

Файл: selection.py Проект: cequn/LabbookDB

def animal_operations(
    db_path,
    animal_ids=[],
    implant_targets=[],
    virus_targets=[],
):
    """Select a dataframe of animals having been subjected to operative interventions with the given anatomical targets.

	Parameters
	----------

	db_path : str
	Path to a LabbookDB formatted database.

	animal_ids : list, optional
		A List of LabbookDB `Animal.id` values by which to filter the query.
		It is faster to filter using this mechanism than to return a dataframe for all animals and then filter that.

	implant_targets : list, optional
		A List of LabbookDB `OrthogonalStereotacticTarget.code` values which should be used to filter the query, while being joined to `Operation` objects via the `OpticFiberImplantProtocol` class.
		It is faster to filter using this mechanism than to return a dataframe for all animals and then filter that.

	virus_targets : list, optional
		A List of LabbookDB `OrthogonalStereotacticTarget.code` values which should be used to filter the query, while being joined to `Operation` objects via the `VirusInjectionProtocol` class.
		It is faster to filter using this mechanism than to return a dataframe for all animals and then filter that.

	Notes
	-----

	CAREFUL: Providing both `implant_targets` and `virus_targets` will return entries including only animals which have undergone an operation which has included protocols targeting both areas.
	If the areas were targeted by protocols included in different operations, the correspondence will not be detected.
	To obtain such a list please call the function twice and create a new dataframe from the intersection of the inputs on the `Animal_id` column.
	"""

    filters = []
    join_type = 'outer'
    col_entries = [
        ("Animal", "id"),
        ("Operation", ),
        ("OpticFiberImplantProtocol", ),
        ("VirusInjectionProtocol", ),
        ("OrthogonalStereotacticTarget", ),
        ("Virus", "OrthogonalStereotacticTarget", ""),
    ]
    join_entries = [
        ("Animal.operations", ),
        ("OpticFiberImplantProtocol", "Operation.protocols"),
        ("VirusInjectionProtocol", "Operation.protocols"),
        ("OrthogonalStereotacticTarget",
         "OpticFiberImplantProtocol.stereotactic_target"),
        ("Virus_OrthogonalStereotacticTarget",
         "VirusInjectionProtocol.stereotactic_target"),
    ]
    my_filter = []
    if animal_ids:
        my_filter = ["Animal", "id"]
        my_filter.extend(animal_ids)
    if implant_targets:
        my_filter = ["OrthogonalStereotacticTarget", "code"]
        my_filter.extend(implant_targets)
    if virus_targets:
        my_filter = ["Virus_OrthogonalStereotacticTarget", "code"]
        my_filter.extend(virus_targets)
    filters.append(my_filter)

    df = query.get_df(db_path,
                      col_entries=col_entries,
                      join_entries=join_entries,
                      filters=filters,
                      default_join=join_type)

    return df

Пример #10

0

Показать файл

def behaviour(db_path,
              test_type,
              animal_ids=[],
              animals_id_column="id_eth",
              dates=[],
              evaluations_dir="~/data/behaviour",
              author="",
              volume=0,
              random_order=True):
    """Wrapper for evaluate() passing data from a LabbookDB model database.
	"""

    if not author:
        print(
            "It is advisable to add your name's three-letter abbreviation via the \"author\" argument. This helps identify your work and protects it from being overwritten."
        )

    db_path = os.path.expanduser(db_path)
    evaluations_dir = os.path.expanduser(evaluations_dir)

    if test_type == "forced_swim_test":
        measurements_table = "ForcedSwimTestMeasurement"
        if os.path.basename(
                os.path.normpath(evaluations_dir)) != "forced_swim_test":
            evaluations_dir = os.path.join(evaluations_dir, "forced_swim_test")
        if not os.path.exists(evaluations_dir):
            os.makedirs(evaluations_dir)
        trial_duration = 360  #in seconds
        events = {"s": "swimming", "i": "immobility"}
    else:
        raise ValueError("The function does not support test_type=\'" +
                         test_type + "\'.")

    col_entries = [
        ("Animal", "id"),
        (measurements_table, ),
    ]
    join_entries = [
        ("Animal.measurements", ),
        (measurements_table, ),
    ]

    filters = []
    if animal_ids:
        filter_entry = ["Animal", animals_id_column]
        for animal_id in animal_ids:
            filter_entry.extend([animal_id])
        filters.append(filter_entry)
    if dates:
        filter_entry = ["measurements_table", "date"]
        for date in dates:
            filter_entry.extend([date])
        filters.append(filter_entry)

    reference_df = get_df(db_path,
                          col_entries=col_entries,
                          join_entries=join_entries,
                          filters=filters)
    if random_order:
        reference_df = reference_df.iloc[np.random.permutation(
            len(reference_df))]

    for _, measurement_df in reference_df.iterrows():
        recording_path = measurement_df.loc[measurements_table + "_data_path"]
        if measurements_table + "_recording_bracket" in reference_df.columns:
            bracket = measurement_df.loc[measurements_table +
                                         "_recording_bracket"]
        else:
            bracket = ""
        outfile_name = os.path.splitext(
            os.path.basename(recording_path))[0] + "_" + bracket + "_" + author
        output_path = os.path.join(evaluations_dir, outfile_name)
        tracking.manual_events(recording_path,
                               trial_duration,
                               events=events,
                               bracket=bracket,
                               volume=volume,
                               output_path=output_path)

Python get_df примеры использования