Пример #1
0
 def sync_instrument(self, instrument: Instrument) -> None:
     blob_filepaths = instrument.get_blob_filepaths()
     for file in instrument.files:
         blob_filepath = blob_filepaths[file]
         sftp_path = f"{instrument.sftp_path}/{file}"
         log.info(
             f"Syncing file from SFTP: {sftp_path} to GCP: {blob_filepath}")
         self.sync_file(blob_filepath, sftp_path)
 def get_instrument_folders(self) -> Dict[str, Instrument]:
     instruments = {}
     for folder in self.sftp_connection.listdir(
             self.sftp_config.survey_source_path):
         if re.compile(self.config.instrument_regex).match(folder):
             log.info(f"Instrument folder found - {folder}")
             instruments[folder] = Instrument(
                 sftp_path=f"{self.sftp_config.survey_source_path}/{folder}"
             )
     return instruments
 def _filter_non_bdbx(
         _self, instruments: Dict[str,
                                  Instrument]) -> Dict[str, Instrument]:
     filtered_instruments = {}
     for instrument_name, instrument in instruments.items():
         file_types = [
             pathlib.Path(file).suffix.lower() for file in instrument.files
         ]
         if ".bdbx" in file_types:
             filtered_instruments[instrument_name] = instrument
         else:
             log.info("Instrument database file not found - " +
                      f"{instrument_name} - not importing")
     return filtered_instruments
 def _get_instrument_files_for_instrument(
         self, instrument: Instrument) -> List[str]:
     instrument_file_list = []
     for instrument_file in self.sftp_connection.listdir_attr(
             instrument.sftp_path):
         file_extension = pathlib.Path(
             instrument_file.filename).suffix.lower()
         if file_extension == ".bdbx":
             instrument.bdbx_updated_at = datetime.fromtimestamp(
                 instrument_file.st_mtime, tz=timezone.utc)
         if file_extension in self.config.extension_list:
             log.info(f"Instrument file found - {instrument_file.filename}")
             instrument_file_list.append(instrument_file.filename)
     return instrument_file_list
 def generate_bdbx_md5(self, instrument: Instrument) -> str:
     bdbx_file = instrument.bdbx_file()
     if not bdbx_file:
         log.info(
             f"No bdbx file for '{instrument.sftp_path}' cannot generate an md5"
         )
         return ""
     bdbx_details = self.sftp_connection.stat(bdbx_file)
     md5sum = hashlib.md5()
     chunks = math.ceil(bdbx_details.st_size / self.config.bufsize)
     sftp_file = self.sftp_connection.open(bdbx_file,
                                           bufsize=self.config.bufsize)
     for chunk in range(chunks):
         sftp_file.seek(chunk * self.config.bufsize)
         md5sum.update(sftp_file.read(self.config.bufsize))
     return md5sum.hexdigest()
Пример #6
0
    def send_request_to_api(self, instrument_name):
        # added 10 second timeout exception pass to the api request
        # because the connection to the api was timing out before
        # it completed the work. this also allows parallel requests
        # to be made to the api.

        log.info(f"Sending request to {self.config.blaise_api_url} " +
                 f"for instrument {instrument_name}")
        try:
            requests.post(
                (f"http://{self.config.blaise_api_url}/api/v1/serverparks/" +
                 f"{self.config.server_park}/instruments/{instrument_name}/data"
                 ),
                headers={"content-type": "application/json"},
                json={"instrumentDataPath": instrument_name},
                timeout=10,
            )
        except requests.exceptions.ReadTimeout:
            pass
 def _get_latest_conflicting_instrument(
     _self,
     instruments: Dict[str, Instrument],
     confilcting_instruments: Dict[str, List[str]],
     instrument_name: str,
 ) -> Instrument:
     conflict_instruments = confilcting_instruments[instrument_name.lower()]
     instrument_conflicts = {
         instrument_name: instruments[instrument_name]
         for instrument_name in conflict_instruments
     }
     sorted_conflicts = sorted(
         [instrument for _, instrument in instrument_conflicts.items()],
         key=operator.attrgetter("bdbx_updated_at"),
         reverse=True,
     )
     latest_instrument = sorted_conflicts[0]
     for conflict in sorted_conflicts[1:]:
         log.info(
             f"Found newer instrument '{latest_instrument.sftp_path}' " +
             f"folder - Skipping this folder '{conflict.sftp_path}'")
     return latest_instrument
Пример #8
0
def process_instrument(case_mover: CaseMover, instrument_name: str,
                       instrument: Instrument) -> None:
    log.info(
        f"Processing instrument - {instrument_name} - {instrument.sftp_path}")
    if case_mover.bdbx_md5_changed(instrument):
        log.info(f"Instrument - {instrument_name} - " +
                 "has no changes to the databse file, skipping...")
    else:
        log.info(f"Syncing instrument - {instrument_name}")
        case_mover.sync_instrument(instrument)
        case_mover.send_request_to_api(instrument.gcp_folder())
Пример #9
0
def handle_exception(exception):
    log.error("Exception - %s", exception)
    log.info("SFTP connection closed")
    return "Exception occurred", 500
Пример #10
0
def main():
    config = current_app.nisra_config
    sftp_config = current_app.sftp_config
    google_storage = init_google_storage(config)
    if google_storage.bucket is None:
        return "Connection to bucket failed", 500

    log.info("Connecting to SFTP server")
    cnopts = pysftp.CnOpts()
    cnopts.hostkeys = None

    with pysftp.Connection(
            host=sftp_config.host,
            username=sftp_config.username,
            password=sftp_config.password,
            port=int(sftp_config.port),
            cnopts=cnopts,
    ) as sftp_connection:
        log.info("Connected to SFTP server")

        sftp = SFTP(sftp_connection, sftp_config, config)
        case_mover = CaseMover(google_storage, config, sftp)
        instruments = get_filtered_instruments(sftp)
        log.info(f"Processing survey - {sftp_config.survey_source_path}")

        if len(instruments) == 0:
            log.info("No instrument folders found")
            return "No instrument folders found, exiting", 200

        for instrument_name, instrument in instruments.items():
            process_instrument(case_mover, instrument_name, instrument)

    log.info("SFTP connection closed")
    log.info("Process complete")
    return "Process complete", 200
 def log(self):
     log.info(f"bucket_name - {self.bucket_name}")
     log.info(f"instrument_regex - {self.instrument_regex}")
     log.info(f"extension_list - {str(self.extension_list)}")
     log.info(f"server_park - {self.server_park}")
     log.info(f"blaise_api_url - {self.blaise_api_url}")
from flask import Flask

from app.mover import mover
from pkg.config import Config
from pkg.sftp import SFTPConfig
from util.service_logging import log

app = Flask(__name__)


def load_config(app: Flask) -> None:
    sftp_config = SFTPConfig.from_env()
    config = Config.from_env()
    if sftp_config.survey_source_path == "":
        log.error("survey_source_path is blank")
        raise Exception("survey_source_path is blank")
    config.log()
    sftp_config.log()
    app.nisra_config = config
    app.sftp_config = sftp_config


app.register_blueprint(mover)

log.info("Application started")
 def log(self):
     log.info(f"survey_source_path - {self.survey_source_path}")
     log.info(f"sftp_host - {self.host}")
     log.info(f"sftp_port - {self.port}")
     log.info(f"sftp_username - {self.username}")