Пример #1
0
def get_study_names(configs_dir_path: str = None,
                    default_config=True) -> List[str]:
    """
    CerebralCortex constructor

    Args:
        configs_dir_path (str): Directory path of cerebralcortex configurations.
    Returns:
        list(str): list of study names available
    Raises:
        ValueError: If configuration_filepath is None or empty.
    Examples:
        >>> get_study_names("/directory/path/of/configs/")
    """
    if default_config:
        config = Configuration(configs_dir_path, "default").config
    else:
        config = Configuration(configs_dir_path).config

    nosql_store = config['nosql_storage']

    if nosql_store == "hdfs":
        fs = pa.hdfs.connect(config['hdfs']['host'], config['hdfs']['port'])
        study_path = config['hdfs']['raw_files_dir']
        study_names = []
        all_studies = fs.ls(study_path)
        for strm in all_studies:
            study_names.append(
                strm.replace(study_path, "").replace("study=", ""))
        return study_names
    elif nosql_store == "filesystem":
        filesystem_path = config["filesystem"]["filesystem_path"]
        if not os.access(filesystem_path, os.W_OK):
            raise Exception(
                filesystem_path +
                " path is not writable. Please check your cerebralcortex.yml configurations."
            )
        return [
            d.replace("study=", "") for d in os.listdir(filesystem_path)
            if os.path.isdir(os.path.join(filesystem_path, d))
            and d.startswith("study=")
        ]
    else:
        raise ValueError(nosql_store + " is not supported.")
Пример #2
0
                CC, config)


if __name__ == '__main__':
    # create and load CerebralCortex object and configs
    parser = argparse.ArgumentParser(
        description='CerebralCortex Kafka Message Handler.')
    parser.add_argument("-cc",
                        "--cc_config_filepath",
                        help="Configuration file path",
                        required=True)
    parser.add_argument("-mdc",
                        "--mdebugger_config_filepath",
                        help="mDebugger configuration file path",
                        required=True)
    args = vars(parser.parse_args())

    CC = CerebralCortex(args["cc_config_filepath"])

    # load data diagnostic configs
    md_config = Configuration(args["mdebugger_config_filepath"]).config

    # get/create spark context
    spark_context = get_or_create_sc(type="sparkContext")

    # run for one participant
    # DiagnoseData().one_user_data(["cd7c2cd6-d0a3-4680-9ba2-0c59d0d0c684"], md_config, CC, spark_context)

    # run for all the participants in a study
    all_users_data("mperf", md_config, CC, spark_context)
Пример #3
0
        files.append(csv_files_path + username + motionsense_right_led)
        files.append(csv_files_path + username + motionsense_left_accel)
        files.append(csv_files_path + username + motionsense_right_accel)
        files.append(csv_files_path + username + autosense_ble)
        files.append(csv_files_path + username + autosense_respiration)

        dfs = []
        for f in files:
            if os.path.exists(f):
                dfs.append(pd.read_csv(f))

        merged = pd.concat([df for df in dfs], axis=1)
        merged.to_csv(merged_file_path + username + ".csv", sep=",")


if __name__ == '__main__':
    # create and load CerebralCortex object and configs
    parser = argparse.ArgumentParser(
        description='CerebralCortex Kafka Message Handler.')
    parser.add_argument("-cr",
                        "--cr_reporting_config_filepath",
                        help="mDebugger configuration file path",
                        required=True)
    args = vars(parser.parse_args())

    # load data reporting configs
    cr_config_file = args["cr_reporting_config_filepath"]
    cr_config = Configuration(cr_config_file).config

    post_process(cr_config)
Пример #4
0
    def __init__(self,
                 configs_dir_path: str = "",
                 cc_configs: dict = None,
                 study_name: str = "default",
                 new_study: bool = False,
                 enable_spark: bool = True,
                 enable_spark_ui=False):
        """
        CerebralCortex constructor

        Args:
            configs_dir_path (str): Directory path of cerebralcortex configurations.
            cc_configs (dict or str): if sets to cc_configs="default" all defaults configs would be loaded. Or you can provide a dict of all available cc_configs as a param
            study_name (str): name of the study. If there is no study, you can pass study name as study_name="default"
            new_study (bool): create a new study with study_name if it does not exist
            enable_spark (bool): enable spark
            enable_spark_ui (bool): enable spark ui
        Raises:
            ValueError: If configuration_filepath is None or empty.
        Examples:
            >>> CC = Kernel(cc_configs="default", study_name="default")
            >>> # if you want to change any of the configs, pass cc_configs as dict with new configurations
            >>> updated_cc_configs = {"nosql_storage": "filesystem", "filesystem_path": "/path/to/store/data/"}
            >>> CC = Kernel(cc_configs=updated_cc_configs, study_name="default")
            >>> # for complete configs, have a look at default configs at: https://github.com/MD2Korg/CerebralCortex-Kernel/blob/3.3/cerebralcortex/core/config_manager/default.yml
        """
        try:

            if not os.getenv("PYSPARK_PYTHON"):
                os.environ["PYSPARK_PYTHON"] = os.popen(
                    'which python3').read().replace("\n", "")
            if not os.getenv("PYSPARK_DRIVER_PYTHON"):
                os.environ["PYSPARK_DRIVER_PYTHON"] = os.popen(
                    'which python3').read().replace("\n", "")
        except:
            raise Exception(
                "Please set PYSPARK_PYTHON and PYSPARK_DRIVER_PYTHON environment variable. For example, export PYSPARK_DRIVER_PYTHON=/path/to/python/dir"
            )

        try:
            if not os.getenv("SPARK_HOME"):
                import pyspark
                spark_installation_path = os.path.dirname(pyspark.__file__)
                import findspark
                findspark.init(spark_installation_path)
        except:
            raise Exception("Set SPARK_HOME environment variable.")

        if not configs_dir_path and not cc_configs:
            raise ValueError("Please provide configs_dir_path or cc_configs.")
        elif configs_dir_path and cc_configs:
            raise ValueError("Provide only configs_dir_path OR cc_configs.")

        self.version = __version__
        self.config_filepath = configs_dir_path
        self.study_name = study_name
        os.environ["STUDY_NAME"] = study_name
        self.config = Configuration(configs_dir_path, cc_configs).config

        if enable_spark:
            self.sparkContext = get_or_create_sc(
                enable_spark_ui=enable_spark_ui)
            self.sqlContext = get_or_create_sc(type="sqlContext",
                                               enable_spark_ui=enable_spark_ui)
            self.sparkSession = get_or_create_sc(
                type="sparkSession", enable_spark_ui=enable_spark_ui)
        else:
            self.sparkContext = None
            self.sqlContext = None
            self.sparkSession = None

        if self.config["mprov"] == "pennprov":
            os.environ["MPROV_HOST"] = self.config["pennprov"]["host"]
            os.environ["MPROV_USER"] = self.config["pennprov"]["user"]
            os.environ["MPROV_PASSWORD"] = self.config["pennprov"]["password"]
            os.environ["ENABLE_MPROV"] = "True"
        elif self.config["mprov"] == "none":
            os.environ["ENABLE_MPROV"] = "False"
        else:
            raise ValueError(
                "Please check cerebralcortex.yml file. mprov is not properly configured."
            )

        self.new_study = new_study

        if not study_name:
            raise Exception("Study name cannot be None.")

        self.debug = self.config["cc"]["debug"]
        self.logging = CCLogging(self)
        self.logtypes = LogTypes()
        self.SqlData = SqlData(self)
        self.RawData = RawData(self)
        self.TimeSeriesData = None

        warnings.simplefilter('always', DeprecationWarning)

        if not new_study and not self.RawData.is_study():
            raise Exception(
                "Study name does not exist. If this is a new study set new_study param to True"
            )

        if self.config["visualization_storage"] != "none":
            self.TimeSeriesData = TimeSeriesData(self)
Пример #5
0
                        help="mDebugger configuration file path",
                        required=True)
    parser.add_argument("-sn",
                        "--study_name",
                        help="mDebugger configuration file path",
                        required=True)
    parser.add_argument("-spm",
                        "--spark_master",
                        help="mDebugger configuration file path",
                        required=False)
    args = vars(parser.parse_args())

    CC = CerebralCortex(args["cc_config_filepath"])

    # load data reporting configs
    cr_config = Configuration(args["cc_reporting_config_filepath"]).config
    cc_config_file = args["cc_config_filepath"]
    # get/create spark context
    if args["spark_master"]:
        spark_context = get_or_create_sc(type="sparkContext",
                                         master=args["spark_master"])
    else:
        spark_context = get_or_create_sc(type="sparkContext")

    # run for all the participants in a study
    #all_users_data("mperf", md_config, CC, spark_context)

    #TESTING
    all_users_data(args["study_name"], cc_config_file, cr_config, CC,
                   spark_context)