def get_dir_contents(url):
        """
        This method will return the contents of the remote directory, based
        on the URL path passed.

        The method will raise the HTTPError exception if there is an issue
        reading the URL passed in.

        :param url: The URL of the directory for which the contents are returned.
        :return: dir_contents: The contents of the remote URL directory.
        """

        try:
            url_io = urllib2.urlopen(url)
            dir_contents = url_io.read().decode('utf-8')

        except urllib2.HTTPError as ex:
            log.warning('URL does not exist: %s: %s', url, ex)
            raise

        return dir_contents
    def get_dir_contents(path, reverse_order=False):
        """
        This method will return the contents of the remote directory, based
        on the path passed.

        The method will raise the OSError exception if there is an issue
        reading the path passed in.

        :param reverse_order: If true the contents are sorted in reverse order.
        :param path: The path of the directory for which the contents are returned.
        :return: dir_contents: The contents of the remote directory.
        """

        try:
            dir_contents = os.listdir(path)
            dir_contents.sort(reverse=reverse_order)

        except OSError as ex:
            log.warning('Path does not exist: %s', ex)
            raise

        return dir_contents
    def get_dir_contents(path, reverse_order=False):
        """
        This method will return the contents of the remote directory, based
        on the path passed.

        The method will raise the OSError exception if there is an issue
        reading the path passed in.

        :param reverse_order: If true the contents are sorted in reverse order.
        :param path: The path of the directory for which the contents are returned.
        :return: dir_contents: The contents of the remote directory.
        """

        try:
            dir_contents = os.listdir(path)
            dir_contents.sort(reverse=reverse_order)

        except OSError as ex:
            log.warning('Path does not exist: %s', ex)
            raise

        return dir_contents
    def generate_zplsc_echograms(self):
        """
        This method will get the subsites, deployments and dates from the
        command line or all of the subsites, deployments and dates for the
        daily process.  It will generate the echograms based on those inputs
        and upload the echograms to the raw data server.

        :return:
        """

        # If we are creating a 1-hour echogram, generate the echogram.
        if self.zplsc_datafile is not None:
            # Send the 1-hour raw data file to the zplsc C Series parser to generate the echogram.
            with open(self.zplsc_datafile) as file_handle:
                base_directory = os.path.expanduser(self.base_echogram_directory)
                path_structure, filename = os.path.split(self.zplsc_datafile)
                zplsc_echogram_file_path = None
                for subsite in self.zplsc_subsites:
                    subsite_index = path_structure.find(subsite)
                    if subsite_index >= 0:
                        zplsc_echogram_file_path = os.path.join(base_directory, path_structure[subsite_index:])
                        # Create the ZPLSC Echogram directory structure if it doesn't exist.
                        try:
                            os.makedirs(zplsc_echogram_file_path)
                        except OSError as ex:
                            if ex.errno == errno.EEXIST and os.path.isdir(zplsc_echogram_file_path):
                                pass
                            else:
                                log.error('Error creating local ZPLSC Echogram storage directory: %s', ex.message)
                                raise
                        break

                if zplsc_echogram_file_path is not None:
                    # Get the parser for this file and generate the echogram.
                    parser = ZplscCParser(CONFIG, file_handle, self.rec_exception_callback)
                    parser.create_echogram(zplsc_echogram_file_path)
                else:
                    log.warning('The subsite is not one of the subsites containing a ZPLSC-C instrument.')

        else:  # We are creating 24-hour echograms ...
            # Create the temporary data file directory.
            self.temp_directory = os.path.join(os.path.expanduser(USER_HOME), TEMP_DIR)
            if not os.path.exists(self.temp_directory):
                os.mkdir(self.temp_directory)

            # Create the echograms for the zplsc instruments of each subsite.
            for subsite in self.subsites:
                zplsc_24_subsite_prefix = subsite + '-'

                try:
                    deployments = self.get_deployment_dirs(subsite)
                except OSError:
                    continue

                for deployment in deployments:
                    zplsc_24_deployment_prefix = zplsc_24_subsite_prefix + 'R' + str(deployment) + '-'

                    try:
                        echogram_dates, date_dirs_path = self.get_date_dirs(subsite, deployment)
                    except OSError:
                        continue

                    for date_dir, entire_month in echogram_dates.items():
                        self.zplsc_24_datafile_prefix = zplsc_24_deployment_prefix + 'sn' + self.serial_num + '-'

                        if entire_month:
                            number_of_days_in_the_month = calendar.monthrange(date_dir.year, date_dir.month)[1]
                            for day in range(number_of_days_in_the_month):
                                echogram_date = date_dir + timedelta(days=day)

                                # Aggregate the 24 raw data files for the given instrument to 1 24-hour data file.
                                zplsc_24_datafile, zplsc_echogram_file_path = self.aggregate_raw_data(date_dirs_path,
                                                                                                      echogram_date)
                                if not zplsc_24_datafile:
                                    log.warning('Unable to aggregate raw data files for %s under %s',
                                                echogram_date, date_dirs_path)
                                    continue

                                # Send the 24-hour raw data file to the zplsc C Series parser to generate the echogram.
                                with open(zplsc_24_datafile) as file_handle:
                                    parser = ZplscCParser(CONFIG, file_handle, self.rec_exception_callback)
                                    parser.create_echogram(zplsc_echogram_file_path)

                                if not self.keep_temp_files:
                                    self.purge_temporary_files()

                        else:
                            # Aggregate the 24 raw data files for the given instrument to 1 24-hour data file.
                            zplsc_24_datafile, zplsc_echogram_file_path = self.aggregate_raw_data(date_dirs_path, date_dir)

                            if not zplsc_24_datafile:
                                log.warning('Unable to aggregate raw data files for %s under %s', date_dir, date_dirs_path)
                                continue

                            # Send the 24-hour raw data file to the zplsc C Series parser to generate the echogram.
                            with open(zplsc_24_datafile) as file_handle:
                                parser = ZplscCParser(CONFIG, file_handle, self.rec_exception_callback)
                                parser.create_echogram(zplsc_echogram_file_path)

                            if not self.keep_temp_files:
                                self.purge_temporary_files()

            # Remove the temporary data file directory and its content.
            if not self.keep_temp_files:
                shutil.rmtree(self.temp_directory)

            # If it's running as a daily process, wait 24 hours and re-run this method
            if self.process_mode:
                threading.Timer(SECONDS_IN_DAY, self.generate_zplsc_echograms).start()
    def get_date_dirs(self, subsite, deployment):
        """
        This method will generate the path to the directory of date directories
        in the format of YYYYMM.

        Exceptions raised by this method:
            OSError
            ValueError

        :param subsite: The subsite of the ZPLSC instrument.
        :param deployment: The deployment number of the data of interest.
        :return: echogram_dates: The mapping of echogram dates to the entire month flag
                 date_dirs_path: The path to the date directories.
        """

        # Generate the portion of the path up to the DCL directory to get the all the instrument sub-directories.
        deployment_dir = os.path.join(self.raw_data_dir, subsite.upper(), 'R%05d' % deployment)
        dcl_path = ''
        instrument_dirs = ''
        for dcl_rel_path in DCL_PATHS:
            dcl_path = os.path.join(deployment_dir, dcl_rel_path)
            try:
                instrument_dirs = self.get_dir_contents(dcl_path, True)
                break
            except OSError:
                log.info('Could not find path: %s: checking alternate path', dcl_path)
                if dcl_path is DCL_PATHS[-1]:
                    raise

        # Generate the portion of the path up to the ZPLSC Instrument serial number.
        serial_num_found = None
        for instrument in instrument_dirs:
            serial_num_found = SERIAL_NUM_DIR_MATCHER.match(instrument)
            if serial_num_found:
                break

        if serial_num_found is None:
            log.warning('Could not find ZPLSC data for subsite: %s and recovered deployment: %s', subsite, deployment)
            raise OSError

        self.serial_num = serial_num_found.group(1)
        serial_num_dir = os.path.join(dcl_path, serial_num_found.group())
        sub_dirs = self.get_dir_contents(serial_num_dir)

        # Generate the portion of the path that contains the recovered data path.
        recovered_path = RECOVERED_DIR % (subsite.lower(), self.serial_num)
        recovered_dir = ''
        for sub_dir in sub_dirs:
            if sub_dir.startswith(recovered_path):
                recovered_dir = sub_dir
                break

        if recovered_dir:
            # Create the raw data path including the recovered path
            date_dirs_path = os.path.join(serial_num_dir, recovered_dir, DATA_PATH)
        else:
            log.warning('Could not find ZPLSC recovered data path starting with: %s', recovered_path)
            raise OSError

        # If no dates were entered on the command line, get the entire  list of date directories.
        echogram_dates = self.echogram_dates
        if not echogram_dates:
            echogram_dates = {}

            # Get all the year/month date subdirectories for this subsite/deployment the get contents of the directory.
            date_dirs = self.get_dir_contents(date_dirs_path, True)
            date_dirs = [(date_dir[:4], date_dir[4:]) for date_dir in date_dirs]

            # If in process mode, get the latest date that has 24 1-hour data files for echogram generation.
            if self.process_mode:
                echogram_dates[self.get_latest_echogram_date(date_dirs_path, date_dirs)] = False

            # Otherwise, get all the year/month date subdirectories for this subsite and deployment.
            else:
                for date_dir in date_dirs:
                    year = int(date_dir[0])
                    month = int(date_dir[1])

                    # Save the date and indicate that the entire month should be generated.
                    echogram_dates[date(year, month, 1)] = True

        return echogram_dates, date_dirs_path
    def generate_zplsc_echograms(self):
        """
        This method will get the subsites, deployments and dates from the
        command line or all of the subsites, deployments and dates for the
        daily process.  It will generate the echograms based on those inputs
        and upload the echograms to the raw data server.

        :return:
        """

        # If we are creating a 1-hour echogram, generate the echogram.
        if self.zplsc_datafile is not None:
            # Send the 1-hour raw data file to the zplsc C Series parser to generate the echogram.
            with open(self.zplsc_datafile) as file_handle:
                base_directory = os.path.expanduser(
                    self.base_echogram_directory)
                path_structure, filename = os.path.split(self.zplsc_datafile)
                zplsc_echogram_file_path = None
                for subsite in self.zplsc_subsites:
                    subsite_index = path_structure.find(subsite)
                    if subsite_index >= 0:
                        zplsc_echogram_file_path = os.path.join(
                            base_directory, path_structure[subsite_index:])
                        # Create the ZPLSC Echogram directory structure if it doesn't exist.
                        try:
                            os.makedirs(zplsc_echogram_file_path)
                        except OSError as ex:
                            if ex.errno == errno.EEXIST and os.path.isdir(
                                    zplsc_echogram_file_path):
                                pass
                            else:
                                log.error(
                                    'Error creating local ZPLSC Echogram storage directory: %s',
                                    ex.message)
                                raise
                        break

                if zplsc_echogram_file_path is not None:
                    # Get the parser for this file and generate the echogram.
                    parser = ZplscCParser(CONFIG, file_handle,
                                          self.rec_exception_callback)
                    parser.create_echogram(zplsc_echogram_file_path)
                else:
                    log.warning(
                        'The subsite is not one of the subsites containing a ZPLSC-C instrument.'
                    )

        else:  # We are creating 24-hour echograms ...
            # Create the temporary data file directory.
            self.temp_directory = os.path.join(os.path.expanduser(USER_HOME),
                                               TEMP_DIR)
            if not os.path.exists(self.temp_directory):
                os.mkdir(self.temp_directory)

            # Create the echograms for the zplsc instruments of each subsite.
            for subsite in self.subsites:
                zplsc_24_subsite_prefix = subsite + '-'

                try:
                    deployments = self.get_deployment_dirs(subsite)
                except OSError:
                    continue

                for deployment in deployments:
                    zplsc_24_deployment_prefix = zplsc_24_subsite_prefix + 'R' + str(
                        deployment) + '-'

                    try:
                        echogram_dates, date_dirs_path = self.get_date_dirs(
                            subsite, deployment)
                    except OSError:
                        continue

                    for date_dir, entire_month in echogram_dates.items():
                        self.zplsc_24_datafile_prefix = zplsc_24_deployment_prefix + 'sn' + self.serial_num + '-'

                        if entire_month:
                            number_of_days_in_the_month = calendar.monthrange(
                                date_dir.year, date_dir.month)[1]
                            for day in range(number_of_days_in_the_month):
                                echogram_date = date_dir + timedelta(days=day)

                                # Aggregate the 24 raw data files for the given instrument to 1 24-hour data file.
                                zplsc_24_datafile, zplsc_echogram_file_path = self.aggregate_raw_data(
                                    date_dirs_path, echogram_date)
                                if not zplsc_24_datafile:
                                    log.warning(
                                        'Unable to aggregate raw data files for %s under %s',
                                        echogram_date, date_dirs_path)
                                    continue

                                # Send the 24-hour raw data file to the zplsc C Series parser to generate the echogram.
                                with open(zplsc_24_datafile) as file_handle:
                                    parser = ZplscCParser(
                                        CONFIG, file_handle,
                                        self.rec_exception_callback)
                                    parser.create_echogram(
                                        zplsc_echogram_file_path)

                                if not self.keep_temp_files:
                                    self.purge_temporary_files()

                        else:
                            # Aggregate the 24 raw data files for the given instrument to 1 24-hour data file.
                            zplsc_24_datafile, zplsc_echogram_file_path = self.aggregate_raw_data(
                                date_dirs_path, date_dir)

                            if not zplsc_24_datafile:
                                log.warning(
                                    'Unable to aggregate raw data files for %s under %s',
                                    date_dir, date_dirs_path)
                                continue

                            # Send the 24-hour raw data file to the zplsc C Series parser to generate the echogram.
                            with open(zplsc_24_datafile) as file_handle:
                                parser = ZplscCParser(
                                    CONFIG, file_handle,
                                    self.rec_exception_callback)
                                parser.create_echogram(
                                    zplsc_echogram_file_path)

                            if not self.keep_temp_files:
                                self.purge_temporary_files()

            # Remove the temporary data file directory and its content.
            if not self.keep_temp_files:
                shutil.rmtree(self.temp_directory)

            # If it's running as a daily process, wait 24 hours and re-run this method
            if self.process_mode:
                threading.Timer(SECONDS_IN_DAY,
                                self.generate_zplsc_echograms).start()
    def get_date_dirs(self, subsite, deployment):
        """
        This method will generate the path to the directory of date directories
        in the format of YYYYMM.

        Exceptions raised by this method:
            OSError
            ValueError

        :param subsite: The subsite of the ZPLSC instrument.
        :param deployment: The deployment number of the data of interest.
        :return: echogram_dates: The mapping of echogram dates to the entire month flag
                 date_dirs_path: The path to the date directories.
        """

        # Generate the portion of the path up to the DCL directory to get the all the instrument sub-directories.
        deployment_dir = os.path.join(self.raw_data_dir, subsite.upper(),
                                      'R%05d' % deployment)
        dcl_path = ''
        instrument_dirs = ''
        for dcl_rel_path in DCL_PATHS:
            dcl_path = os.path.join(deployment_dir, dcl_rel_path)
            try:
                instrument_dirs = self.get_dir_contents(dcl_path, True)
                break
            except OSError:
                log.info('Could not find path: %s: checking alternate path',
                         dcl_path)
                if dcl_path is DCL_PATHS[-1]:
                    raise

        # Generate the portion of the path up to the ZPLSC Instrument serial number.
        serial_num_found = None
        for instrument in instrument_dirs:
            serial_num_found = SERIAL_NUM_DIR_MATCHER.match(instrument)
            if serial_num_found:
                break

        if serial_num_found is None:
            log.warning(
                'Could not find ZPLSC data for subsite: %s and recovered deployment: %s',
                subsite, deployment)
            raise OSError

        self.serial_num = serial_num_found.group(1)
        serial_num_dir = os.path.join(dcl_path, serial_num_found.group())
        sub_dirs = self.get_dir_contents(serial_num_dir)

        # Generate the portion of the path that contains the recovered data path.
        recovered_path = RECOVERED_DIR % (subsite.lower(), self.serial_num)
        recovered_dir = ''
        for sub_dir in sub_dirs:
            if sub_dir.startswith(recovered_path):
                recovered_dir = sub_dir
                break

        if recovered_dir:
            # Create the raw data path including the recovered path
            date_dirs_path = os.path.join(serial_num_dir, recovered_dir,
                                          DATA_PATH)
        else:
            log.warning(
                'Could not find ZPLSC recovered data path starting with: %s',
                recovered_path)
            raise OSError

        # If no dates were entered on the command line, get the entire  list of date directories.
        echogram_dates = self.echogram_dates
        if not echogram_dates:
            echogram_dates = {}

            # Get all the year/month date subdirectories for this subsite/deployment the get contents of the directory.
            date_dirs = self.get_dir_contents(date_dirs_path, True)
            date_dirs = [(date_dir[:4], date_dir[4:])
                         for date_dir in date_dirs]

            # If in process mode, get the latest date that has 24 1-hour data files for echogram generation.
            if self.process_mode:
                echogram_dates[self.get_latest_echogram_date(
                    date_dirs_path, date_dirs)] = False

            # Otherwise, get all the year/month date subdirectories for this subsite and deployment.
            else:
                for date_dir in date_dirs:
                    year = int(date_dir[0])
                    month = int(date_dir[1])

                    # Save the date and indicate that the entire month should be generated.
                    echogram_dates[date(year, month, 1)] = True

        return echogram_dates, date_dirs_path
    def generate_zplsc_echograms(self):
        """
        This method will get the subsites, deployments and dates from the
        command line or all of the subsites, deployments and dates for the
        daily process.  It will generate the echograms based on those inputs
        and upload the echograms to the raw data server.

        :return:
        """

        # Create the temporary data file directory.
        self.temp_directory = os.path.join(os.path.expanduser(USER_HOME), TEMP_DIR)
        if not os.path.exists(self.temp_directory):
            os.mkdir(self.temp_directory)

        # Create the echograms for the zplsc instruments of each subsite.
        for subsite in self.subsites:
            zplsc_24_subsite_prefix = subsite + '-'

            try:
                deployments = self.get_deployment_dirs(subsite)
            except urllib2.HTTPError:
                continue

            for deployment in deployments:
                zplsc_24_deployment_prefix = zplsc_24_subsite_prefix + 'R' + str(deployment) + '-'

                try:
                    echogram_dates, date_dirs_url = self.get_date_dirs(subsite, deployment)
                except urllib2.HTTPError:
                    continue

                for date_dir, entire_month in echogram_dates.items():
                    self.zplsc_24_datafile_prefix = zplsc_24_deployment_prefix + 'sn' + self.serial_num + '-'

                    if entire_month:
                        number_of_days_in_the_month = calendar.monthrange(date_dir.year, date_dir.month)[1]
                        for day in range(number_of_days_in_the_month):
                            echogram_date = date_dir + timedelta(days=day)

                            # Aggregate the 24 raw data files for the given instrument to 1 24-hour data file.
                            zplsc_24_datafile, zplsc_echogram_file_path = self.aggregate_raw_data(date_dirs_url,
                                                                                                  echogram_date)
                            if not zplsc_24_datafile:
                                log.warning('Unable to aggregate raw data files for: %s', echogram_date)
                                continue

                            # Send the 24-hour raw data file to the zplsc C Series parser to generate the echogram.
                            with open(zplsc_24_datafile) as file_handle:
                                parser = ZplscCParser(CONFIG, file_handle, self.rec_exception_callback)
                                parser.create_echogram(zplsc_echogram_file_path)

                            if not self.keep_temp_files:
                                self.purge_temporary_files()

                    else:
                        # Aggregate the 24 raw data files for the given instrument to 1 24-hour data file.
                        zplsc_24_datafile, zplsc_echogram_file_path = self.aggregate_raw_data(date_dirs_url, date_dir)

                        if not zplsc_24_datafile:
                            log.warning('Unable to aggregate raw data files for: %s', date_dir)
                            continue

                        # Send the 24-hour raw data file to the zplsc C Series parser to generate the echogram.
                        with open(zplsc_24_datafile) as file_handle:
                            parser = ZplscCParser(CONFIG, file_handle, self.rec_exception_callback)
                            parser.create_echogram(zplsc_echogram_file_path)

                        if not self.keep_temp_files:
                            self.purge_temporary_files()

        # Remove the temporary data file directory and its content.
        if not self.keep_temp_files:
            shutil.rmtree(self.temp_directory)

        # If it's running as a daily process, wait 24 hours and re-run this method
        if self.process_mode:
            threading.Timer(SECONDS_IN_DAY, self.generate_zplsc_echograms).start()
    def get_date_dirs(self, subsite, deployment):
        """
        This method will generate the path to the directory of date directories
        in the format of YYYYMM.

        :param subsite: The subsite of the ZPLSC instrument.
        :param deployment: The deployment number of the data of interest.
        :return: echogram_dates: The mapping of echogram dates to the entire month flag
                 date_dirs_url: The path to the date directories.
        """

        # echogram_dates = {}
        # date_dirs_url = ''

        # Generate the portion of the URL up to the DCL directory to get the all the instrument sub-directories.
        deployment_url = os.path.join(RAW_DATA_URL, subsite.upper(), 'R%05d' % deployment)
        dcl_url = ''
        instrument_dirs = ''
        for dcl_path in DCL_PATHS:
            dcl_url = os.path.join(deployment_url, dcl_path)
            try:
                instrument_dirs = self.get_dir_contents(dcl_url)
                break

            except urllib2.HTTPError:
                log.info('Could not find path: %s: checking alternate path', dcl_path)
                if dcl_path is DCL_PATHS[-1]:
                    raise

        # Generate the portion of the URL up to the ZPLSC Instrument serial number.
        serial_num_found = SERIAL_NUM_DIR_MATCHER.search(instrument_dirs)
        if serial_num_found is None:
            log.warning('Could not find ZPLSC data for subsite: %s and deployment: %s', subsite, deployment)
            raise ValueError

        self.serial_num = serial_num_found.group(2)
        serial_num_url = os.path.join(dcl_url, SERIAL_NUM_DIR_MATCHER.search(instrument_dirs).group(1))
        sub_dirs = self.get_dir_contents(serial_num_url)

        # Generate the portion of the URL that contains the recovered data path.
        recovered_path = RECOVERED_DIR % (subsite.lower(), self.serial_num)
        start_idx = sub_dirs.find(recovered_path)

        # If this is the directory structure that has the recovered directory, add it to the URL.
        date_dirs_url = serial_num_url
        if start_idx != -1:
            end_idx = start_idx + len(recovered_path) + len(RECOVERED_DATE_FMT)
            recovered_path = os.path.join(sub_dirs[start_idx:end_idx], DATA_PATH)

            # Create the raw data URL with the recovered path
            date_dirs_url = os.path.join(serial_num_url, recovered_path)

        # If no dates were entered on the command line, get the entire  list of date directories.
        echogram_dates = self.echogram_dates
        if not echogram_dates:
            # Get all the year/month date subdirectories for this subsite and deployment.
            date_dirs_response = self.get_dir_contents(date_dirs_url)

            # Generate the list of the date directories.
            echogram_dates = {}
            date_dirs_list = DATE_DIR_RE_MATCHER.findall(date_dirs_response)
            date_dirs_list = sorted(date_dirs_list, key=lambda x: (x[0], x[1]), reverse=True)

            # If in process mode, get the latest date that has 24 1-hour data files for echogram generation.
            if self.process_mode:
                echogram_dates[self.get_latest_echogram_date(date_dirs_url, date_dirs_list)] = False

            # Otherwise, get all the year/month date subdirectories for this subsite and deployment.
            else:
                for date_dir in date_dirs_list:
                    year = int(date_dir[0])
                    month = int(date_dir[1])

                    # Save the date and indicate that the entire month should be generated.
                    echogram_dates[date(year, month, 1)] = True

        return echogram_dates, date_dirs_url