Пример #1
0
    def _transform_unidify(
        self, results_dir: Path, twitter_api_settings: TwitterApiSettings,
    ) -> Counter[_ExecuteResult]:
        result_counter = Counter[_ExecuteResult]()

        head, entries_tweet_ids = spy(
            self._iter_entries_tweet_ids(results_dir, result_counter)
        )
        if not head:  # Check if any entries with Tweet-IDs exist (else unzip fails).
            return result_counter

        entries, tweet_ids = cast(
            Tuple[Iterator[BatchEntry], Iterator[TweetId]], unzip(entries_tweet_ids)
        )
        for entry, tweets in groupby_transform(
            zip(entries, statuses_lookup(tweet_ids, twitter_api_settings)),
            keyfunc=itemgetter(0),
            valuefunc=itemgetter(1),
        ):
            write_jsonl_lines(
                results_dir / entry.data_file_name,
                (tweet for tweet in tweets if tweet is not None),
                use_lzma=True,
            )
            write_json(
                results_dir / entry.meta_file_name, entry, overwrite_existing=True
            )
            result_counter[_ExecuteResult.SUCCESS] += 1

        return result_counter
Пример #2
0
def _grouped_by_user(potential_targets):
    """Groups users with their child-study notification pairs.

    Note: Depends on sorted output (hence the ORDER BY in SQL) See:
    https://more-itertools.readthedocs.io/en/stable/api.html#more_itertools.groupby_transform
    """
    get_user_id = attrgetter("user_id")
    get_child_study_pair = attrgetter("child_id", "study_id")
    yield from (
        (
            user_id,
            tuple(pair),
        )  # Apparently, valuefunc gets wrapped to return a map object?
        for user_id, pair in groupby_transform(
            potential_targets,
            keyfunc=get_user_id,
            valuefunc=get_child_study_pair,
        ))
Пример #3
0
def combine_times(movie_names, movie_times):
    """Combine times for duplicate movienames

    :movie_names: [str]
    :movie_times: [[str], [str]]
    :returns: (list of movie names, list of lists of movie times)
    """
    assert len(movie_names) == len(movie_times), '{} != {}'.format(
        movie_name, movie_times)

    if not movie_names:
        return [], []

    movie_names, movie_times = zip(
        *[(k, list(chain.from_iterable(g))) for k,g in groupby_transform(
            zip(movie_names, movie_times), itemgetter(0), itemgetter(1))])

    return list(movie_names), list(movie_times)
Пример #4
0
def combine_times(movie_names, movie_times):
    """Combine times for duplicate movienames

    :movie_names: [str]
    :movie_times: [[str], [str]]
    :returns: (list of movie names, list of lists of movie times)
    """
    assert len(movie_names) == len(
        movie_times), f'{len(movie_names)} != {len(movie_times)}'

    if not movie_names:
        return [], []

    movie_names, movie_times = zip(*[
        (k, list(chain.from_iterable(g))) for k, g in groupby_transform(
            sorted(zip(movie_names, movie_times)),
            itemgetter(0),  # group by name
            itemgetter(1))
    ])  # output grouped times

    return list(movie_names), list(movie_times)
def update_dart_fields(config: Config, samples: List[SampleDoc]) -> bool:
    """Updates DART plates and wells following updates to the filtered positive fields

    Arguments:
        config {Config} -- application config specifying database details
        samples {List[Dict[str, str]]} -- the list of samples to update in DART

    Returns:
        bool -- whether the updates completed successfully
    """
    sql_server_connection = create_dart_sql_server_conn(config)
    if sql_server_connection is None:
        raise ValueError("Unable to establish DART SQL Server connection")

    dart_updated_successfully = True
    labclass_by_centre_name = biomek_labclass_by_centre_name(config.CENTRES)
    try:
        logger.info("Writing to DART")

        cursor = sql_server_connection.cursor()

        for plate_barcode, samples_in_plate in groupby_transform(
                samples,
                lambda x: x[FIELD_PLATE_BARCODE],
                reducefunc=lambda x: list(x)):
            try:
                labware_class = labclass_by_centre_name[samples_in_plate[0]
                                                        [FIELD_SOURCE]]
                plate_state = add_dart_plate_if_doesnt_exist(
                    cursor,
                    plate_barcode,
                    labware_class  # type:ignore
                )
                if plate_state == DART_STATE_PENDING:
                    for sample in samples_in_plate:
                        if sample[FIELD_RESULT] == POSITIVE_RESULT_VALUE:
                            well_index = get_dart_well_index(
                                sample.get(FIELD_COORDINATE, None))
                            if well_index is not None:
                                well_props = map_mongo_doc_to_dart_well_props(
                                    sample)
                                set_dart_well_properties(
                                    cursor,
                                    plate_barcode,
                                    well_props,
                                    well_index  # type:ignore
                                )
                            else:
                                raise ValueError(
                                    "Unable to determine DART well index for sample "
                                    f"{sample[FIELD_ROOT_SAMPLE_ID]} in plate {plate_barcode}"
                                )
                cursor.commit()
                dart_updated_successfully &= True
            except Exception as e:
                logger.error(
                    f"Failed updating DART for samples in plate {plate_barcode}"
                )
                logger.exception(e)
                cursor.rollback()
                dart_updated_successfully = False

        logger.info("Updating DART completed")
    except Exception as e:
        logger.error("Failed updating DART")
        logger.exception(e)
        dart_updated_successfully = False
    finally:
        sql_server_connection.close()

    return dart_updated_successfully