Exemplo n.º 1
0
 def add_column(self):
     temp = ''
     if self.columns_cbox.current() > -1:
         temp = self.parent.csv_name.replace(
             ".csv", "") + '.' + self.columns_cbox.get()
     else:
         LOGGER.warning("Table or header not selected.")
     self.expressions_text.insert(tk.INSERT, temp)
Exemplo n.º 2
0
    def __check_db_container(self, mode='running'):
        """Checks if the db container already running or exist.
        Arguments:
        :param mode: 'running' for container is up and running
                      or 'exist' when container exists but is down.
        """
        if mode == 'running':
            cmd_docker = ['docker', 'ps']
        elif mode == 'exist':
            cmd_docker = ['docker', 'ps', '-a']
        else:
            raise DockerExecError(
                'Invalid container check mode: {}.'.format(mode))

        proc_docker = subprocess.Popen(cmd_docker, stdout=subprocess.PIPE)
        proc_grep = subprocess.Popen(['grep', self.__db_cont_name],
                                     stdin=proc_docker.stdout,
                                     stdout=subprocess.PIPE,
                                     stderr=subprocess.PIPE)
        stdout, stderr = proc_grep.communicate()
        output = str(stdout).split()
        LOGGER.debug(output)
        try:
            container_image = output[1]
            container_name = output[-1]
            container_port = output[-2]
            # remove new line spacial character
            container_name = container_name.rstrip("\\n'")
            container_port = find_xtport(container_port)
        except IndexError:
            container_name = None
            container_image = None
            container_port = None

        LOGGER.debug(
            'Found that there is an existing container with the name: {}'.
            format(container_name))

        if container_name == self.__db_cont_name:
            if container_image == self.__db_image:
                if mode == 'running':
                    self.__is_db_running = True
                elif mode == 'exist':
                    self.__is_db_exist = True
                if container_port != self.__dbport:
                    LOGGER.warning(
                        'Using as external container port: {}'.format(
                            container_port))
                    self.__dbport = container_port
            else:
                msg = ('The name \"{}\" is used by another container.'
                       'Could not create postgres database container.'
                       'Please use other db container name.').format(
                           self.__db_cont_name)
                raise DockerExecError(msg)
Exemplo n.º 3
0
    def suggest_corr(self, cdedict, threshold):
        """
        Arguments:
        :param cdedict: CdeDict object
        :param threshold: 0-1 similarity threshold, below that not a cde is suggested
        """
        cde_sugg_dict = {}  # {cdecode:sourcecolumn}
        source_table = self.__srctbl.filename
        target_table = self.__target_filename
        sugg_replacemnts = {
        }  # here will be stored the suggestions replacments {cdecode:[Replacemsnts]}
        #source_raw_headers = self.__mapping.sourcedb.get_raw_table_headers(source_table)

        # for each source column
        for name, columnreport in self.__tblreport.columnreports.items():

            cde = cdedict.suggest_cde(columnreport, threshold=threshold)
            # check if a cde mapping already exist
            if cde and (cde.code not in cde_sugg_dict.keys()):
                cde_sugg_dict[
                    cde.code] = self.__mapping.sourcedb.raw_2_mipmap_header(
                        self.__src_filename, columnreport.name)
                # suggest category replacements for cases where source col and cde are nominal
                sugg_reps = cdedict.suggest_replecements(cde.code,
                                                         columnreport,
                                                         threshold=threshold)
                if sugg_reps:
                    sugg_replacemnts[cde.code] = sugg_reps
        for cdecode, source_var in cde_sugg_dict.items():
            source_paths = [(source_table, source_var, None)]
            target_path = (target_table, cdecode, None)
            filename_column = '.'.join(
                [os.path.splitext(source_table)[0], source_var])
            # lets see if this cde have value replacements suggestions, if so create the if statment
            if cdecode in sugg_replacemnts.keys():
                expression = ifstr(filename_column, sugg_replacemnts[cdecode])
            else:
                expression = filename_column

            # let's try to create the correspondence now
            try:
                self.__mapping.add_corr(
                    source_paths=source_paths,
                    target_path=target_path,
                    expression=expression,
                    replacements=sugg_replacemnts.get(cdecode))
            # If a cde correspondance already exists then pass
            except MappingError:
                LOGGER.warning(
                    'found cde macth for source column "{}" but cde "{}" \
                               is not included in the selected cde pathology.'.
                    format(source_var, cdecode))

        self.__update_cde_mapped()
Exemplo n.º 4
0
    def __create_db_container(self):
        """Creates a postgres 9.6 container.
        """
        self.__check_db_container(mode='running')
        self.__check_db_container(mode='exist')

        if self.__is_db_running:
            LOGGER.info('db container ({}) is already up and'
                        ' running. Skipping creation step...'.format(
                            self.__db_cont_name))
            self.__remove_create_db()
            pass
        elif self.__is_db_exist and not self.__is_db_running:
            LOGGER.info('db container({}) already exists. '
                        'Restarting db container'.format(self.__db_cont_name))
            subprocess.run(['docker', 'restart', self.__db_cont_name])
            time.sleep(10)
            self.__remove_create_db()

        else:
            # create the db container
            LOGGER.debug('Creating db container with name {}'.format(
                self.__db_cont_name))
            arg_port = ['-p', '{}:5432'.format(self.__dbport)]
            arg_name = ['--name', self.__db_cont_name]
            arg_env1 = ['-e', 'POSTGRES_PASSWORD={}'.format(self.__dbpassword)]
            arg_env2 = ['-e', 'POSTGRES_USER={}'.format(self.__dbuser)]
            arg_img = ['-d', self.__db_image]
            command2 = ['docker', 'run'
                        ] + arg_port + arg_name + arg_env1 + arg_env2 + arg_img
            try:
                createproc = subprocess.run(command2)
                time.sleep(50)
                self.__remove_create_db()
            except subprocess.CalledProcessError:
                LOGGER.warning(
                    'There was an error while attempting creating the db container.'
                )
                raise DockerExecError(
                    'There was an error while attempting creating the db container.'
                )
Exemplo n.º 5
0
    def createreport(self):
        self.button_exec.config(state='disabled')
        LOGGER.info('Checking if the necessary fields are filled in...')
        warningtitle = 'Cannot create report'
        if not self.dname:
            tkmessagebox.showwarning(warningtitle,
                                     'Please, select dataset file')
        #elif not self.d_headers_cbox.get():
        #    tkmessagebox.showwarning(warningtitle,
        #                             'Please, select ColumnID')
        elif self.md_frame.from_disk.get() and not self.md_frame.metafilepath:
            tkmessagebox.showwarning(warningtitle,
                                     'Please, select metadata file')
        elif self.md_frame.from_dc.get() and not self.md_frame.dc_json:
            tkmessagebox.showwarning(warningtitle,
                                     'Could not get metadata from Data Cataloge')
        elif not self.__reportfilepath:
            tkmessagebox.showwarning(warningtitle,
                                     'Please, select report file first')
        else:
            try:
                threshold = float(self.outlier_threshold.get())
                LOGGER.info('Outlier threshold: %s' % self.outlier_threshold.get())
            except ValueError:
                LOGGER.warning('Could not retrieve outlier threshold. \
                                Setting it to default value: 3')
                threshold = 3
            LOGGER.info('Everything looks ok...')
            #filedir = self.__exportfiledir
            #basename = os.path.splitext(self.dname)[0]
            #pdfreportfile = os.path.join(filedir, basename + '_report.pdf')
            #xlsxreportfile = os.path.join(filedir, basename + '_report.xlsx')
            schema_type = 'qc'

            if self.md_frame.from_disk.get():
                LOGGER.info('Retrieving Metadata from localdisk...')
                LOGGER.info('Using metadata file: %s' % self.md_frame.metafilepath)
                with open(self.md_frame.metafilepath) as json_file:
                    dict_schema = json.load(json_file)
                if self.md_frame.json_type.get() == 2:
                    schema_type = 'dc'

            elif self.md_frame.from_dc.get():
                LOGGER.info('Retrieving Metadata from Data Catalogue...')
                LOGGER.info('Selected pathology is {}, CDE version: {}'.format(
                    self.md_frame.selected_pathology.get(),                                              
                    self.md_frame.selected_version.get())
                )             
                dict_schema = self.md_frame.dc_json
                schema_type = 'dc'

            try:
                self.reportcsv = TableReport.from_disc(self.datasetpath,
                                                       dict_schema=dict_schema,
                                                       schema_type=schema_type,                                                      
                                                       threshold=threshold)#id_column=self.d_headers_cbox.current())
                if self.reportcsv.isvalid:
                    LOGGER.info('The dataset is valid.')
                else:
                    LOGGER.info('CAUTION! The dataset is invalid!')

                # Perform Data Cleaning?
                #if self.cleaning.get():
                 #   self.reportcsv.apply_corrections()

                    #self.reportcsv.save_corrected(correctedcsvfile)

                # Create the  report
                if self.report_type.get() == 1:
                    self.reportcsv.printexcel(self.__reportfilepath)
                else:
                    self.reportcsv.printpdf(self.__reportfilepath)

                #self.label_export2.config(text=filedir)
                tkmessagebox.showinfo(
                    title='Status info',
                    message='Reports have been created successully'
                )

                self.show_sugg_button.config(state='normal')
                self.clean_button.config(state='normal')

            except QCToolException as e:
                errortitle = 'Something went wrong!'
                tkmessagebox.showerror(errortitle, e)
        self.button_exec.config(state='normal')