示例#1
0
def write_if_allowed(filename: str,
                     content: str,
                     overwrite: bool = False,
                     mock: bool = False) -> None:
    """
    Writes the contents to a file, if permitted.

    Args:
        filename: filename to write
        content: contents to write
        overwrite: permit overwrites?
        mock: pretend to write, but don't

    Raises:
        RuntimeError: if file exists but overwriting not permitted
    """
    # Check we're allowed
    if not overwrite and exists(filename):
        fail(f"File exists, not overwriting: {filename!r}")

    # Make the directory, if necessary
    directory = dirname(filename)
    if not mock:
        mkdir_p(directory)

    # Write the file
    log.info("Writing to {!r}", filename)
    if mock:
        log.warning("Skipping writes as in mock mode")
    else:
        with open(filename, "wt") as outfile:
            outfile.write(content)
示例#2
0
def test_anon(uniquepatients: bool, limit: int, from_src: bool, rawdir: str,
              anondir: str, scrubfile: str, resultsfile: str, dsttable: str,
              dstfield: str) -> None:
    """
    Fetch raw and anonymised documents and store them in files for comparison,
    along with some summary information.

    Args:
        uniquepatients:
            fetch one document each for a lot of patients (rather than a lot of
            documents, potentially from the same patient or a small number)?
        limit:
            maximum number of documents to retrieve
        from_src:
            retrieve IDs from the source database, not the destination
            database?
        rawdir:
            directory to store raw documents in
        anondir:
            directory to store anonymised documents in
        scrubfile:
            filename to store scrubber information in (as JSON)
        resultsfile:
            filename to store CSV summaries in
        dsttable:
            name of the destination table
        dstfield:
            name of the destination table's text field of interest
    """
    fieldinfo = FieldInfo(dsttable, dstfield)
    docids = get_docids(fieldinfo=fieldinfo,
                        uniquepatients=uniquepatients,
                        limit=limit,
                        from_src=from_src)
    mkdir_p(rawdir)
    mkdir_p(anondir)
    scrubdict = {}  # type: Dict[int, Dict[str, Any]]
    pidset = set()  # type: Set[int]
    with open(resultsfile, 'w') as csvfile:
        csvwriter = csv.writer(csvfile, delimiter='\t')
        first = True
        for docid in docids:
            # noinspection PyTypeChecker
            pid = process_doc(docid=docid,
                              rawdir=rawdir,
                              anondir=anondir,
                              fieldinfo=fieldinfo,
                              csvwriter=csvwriter,
                              first=first,
                              scrubdict=scrubdict)
            first = False
            pidset.add(pid)
    with open(scrubfile, 'w') as f:
        f.write(json.dumps(scrubdict, indent=4))
    log.info(f"Finished. See {resultsfile} for a summary.")
    log.info(f"Use meld to compare directories {rawdir} and {anondir}")
    log.info("To install meld on Debian/Ubuntu: sudo apt-get install meld")
    log.info(f"{len(docids)} documents, {len(pidset)} patients")
示例#3
0
def download_if_not_exists(url: str,
                           filename: str,
                           skip_cert_verify: bool = True,
                           mkdir: bool = True) -> None:
    """
    Downloads a URL to a file, unless the file already exists.
    """
    if os.path.isfile(filename):
        log.info("No need to download, already have: {}", filename)
        return
    if mkdir:
        directory, basename = os.path.split(os.path.abspath(filename))
        mkdir_p(directory)
    download(url=url, filename=filename, skip_cert_verify=skip_cert_verify)
示例#4
0
    def export_file(self,
                    filename: str,
                    text: str = None,
                    binary: bytes = None,
                    text_encoding: str = UTF8) -> False:
        """
        Exports the file.

        Args:
            filename:
            text: text contents (specify this XOR ``binary``)
            binary: binary contents (specify this XOR ``text``)
            text_encoding: encoding to use when writing text

        Returns:
            bool: was it exported?
        """
        assert bool(text) != bool(binary), "Specify text XOR binary"
        exported_task = self.exported_task
        filename = os.path.abspath(filename)
        directory = os.path.dirname(filename)
        recipient = exported_task.recipient

        if not recipient.file_overwrite_files and os.path.isfile(filename):
            self.abort("File already exists: {!r}".format(filename))
            return False

        if recipient.file_make_directory:
            try:
                mkdir_p(directory)
            except Exception as e:
                self.abort("Couldn't make directory {!r}: {}".format(
                    directory, e))
                return False

        try:
            log.debug("Writing to {!r}", filename)
            if text:
                with open(filename, mode="w", encoding=text_encoding) as f:
                    f.write(text)
            else:
                with open(filename, mode="wb") as f:
                    f.write(binary)
        except Exception as e:
            self.abort("Failed to open or write file {!r}: {}".format(
                filename, e))
            return False

        self.note_exported_file(filename)
        return True
示例#5
0
    def _start(self) -> None:
        """
        Launch the external process. We will save and retrieve data via files,
        and send signals ("data ready", "results ready) via stdin/stout.
        """
        if self._started or self._debug_mode:
            return
        args = self._progargs

        # Nasty MedEx hacks
        cwd = os.getcwd()
        log.info(f"for MedEx's benefit, changing to directory: "
                 f"{self._workingdir.name}")
        os.chdir(self._workingdir.name)
        sentsdir = os.path.join(self._workingdir.name, "sents")
        log.info(f"making temporary sentences directory: {sentsdir}")
        mkdir_p(sentsdir)
        logdir = os.path.join(self._workingdir.name, "log")
        log.info(f"making temporary log directory: {logdir}")
        mkdir_p(logdir)

        log.info(f"launching command: {args}")
        self._p = subprocess.Popen(
            args,
            stdin=subprocess.PIPE,
            stdout=subprocess.PIPE,
            # stderr=subprocess.PIPE,
            shell=False,
            bufsize=1)
        # ... don't ask for stderr to be piped if you don't want it; firstly,
        # there's a risk that if you don't consume it, something hangs, and
        # secondly if you don't consume it, you see it on the console, which is
        # helpful.
        self._started = True
        log.info(f"returning to working directory {cwd}")
        os.chdir(cwd)
示例#6
0
    def __init__(self,
                 nlpdef: NlpDefinition,
                 cfgsection: str,
                 commit: bool = False) -> None:
        """
        Args:
            nlpdef:
                a :class:`crate_anon.nlp_manager.nlp_definition.NlpDefinition`
            cfgsection:
                the name of a CRATE NLP config file section (from which we may
                choose to get extra config information)
            commit:
                force a COMMIT whenever we insert data? You should specify this
                in multiprocess mode, or you may get database deadlocks.
        """
        super().__init__(nlpdef=nlpdef,
                         cfgsection=cfgsection,
                         commit=commit,
                         name="MedEx")

        if nlpdef is None:  # only None for debugging!
            self._debug_mode = True
            self._tablename = self.classname().lower()
            self._max_external_prog_uses = 1
            self._progenvsection = ""
            self._env = {}  # type: Dict[str, str]
            progargs = ""
        else:
            self._debug_mode = False
            self._tablename = nlpdef.opt_str(self._sectionname,
                                             ProcessorConfigKeys.DESTTABLE,
                                             required=True)

            self._max_external_prog_uses = nlpdef.opt_int(
                self._sectionname,
                ProcessorConfigKeys.MAX_EXTERNAL_PROG_USES,
                default=0)

            self._progenvsection = nlpdef.opt_str(
                self._sectionname, ProcessorConfigKeys.PROGENVSECTION)

            if self._progenvsection:
                self._env = nlpdef.get_env_dict(
                    full_sectionname(NlpConfigPrefixes.ENV,
                                     self._progenvsection), os.environ)
            else:
                self._env = os.environ.copy()
            self._env["NLPLOGTAG"] = nlpdef.get_logtag() or '.'
            # ... because passing a "-lt" switch with no parameter will make
            # CrateGatePipeline.java complain and stop

            progargs = nlpdef.opt_str(self._sectionname,
                                      ProcessorConfigKeys.PROGARGS,
                                      required=True)

        if USE_TEMP_DIRS:
            self._inputdir = tempfile.TemporaryDirectory()
            self._outputdir = tempfile.TemporaryDirectory()
            self._workingdir = tempfile.TemporaryDirectory()
            # ... these are autodeleted when the object goes out of scope; see
            #     https://docs.python.org/3/library/tempfile.html
            # ... which manages it using weakref.finalize
        else:
            homedir = os.path.expanduser("~")
            self._inputdir = PseudoTempDir(
                os.path.join(homedir, "medextemp", "input"))
            mkdir_p(self._inputdir.name)
            self._outputdir = PseudoTempDir(
                os.path.join(homedir, "medextemp", "output"))
            mkdir_p(self._outputdir.name)
            self._workingdir = PseudoTempDir(
                os.path.join(homedir, "medextemp", "working"))
            mkdir_p(self._workingdir.name)

        formatted_progargs = progargs.format(**self._env)
        self._progargs = shlex.split(formatted_progargs)
        self._progargs.extend([
            "-data_ready_signal",
            MEDEX_DATA_READY_SIGNAL,
            "-results_ready_signal",
            MEDEX_RESULTS_READY_SIGNAL,
            "-i",
            self._inputdir.name,
            "-o",
            self._outputdir.name,
        ])

        self._n_uses = 0
        self._pipe_encoding = 'utf8'
        self._file_encoding = 'utf8'
        self._p = None  # the subprocess
        self._started = False
def build_package() -> None:
    """
    Builds the package.
    """
    log.info("Building Python package")

    setup_py = join(SRCSERVERDIR, "setup.py")
    sdist_basefilename = "camcops_server-{}.tar.gz".format(MAINVERSION)
    src_sdist_file = join(SRCSERVERDIR, "dist", sdist_basefilename)
    wrk_sdist_file = join(WRKBASEDIR, sdist_basefilename)

    try:
        log.info("Deleting old {} if it exists", src_sdist_file)
        os.remove(src_sdist_file)
    except OSError:
        pass
    os.chdir(SETUP_PY_DIR)  # or setup.py looks in wrong places?
    cmdargs = ["python", setup_py, "sdist"]
    call(cmdargs)
    remove_gzip_timestamp(src_sdist_file)

    log.info("Making directories")
    mkdir_p(DEBDIR)
    mkdir_p(DEBOVERRIDEDIR)
    mkdir_p(PACKAGEDIR)
    mkdir_p(RPMTOPDIR)
    mkdir_p(WRKCONFIGDIR)
    mkdir_p(WRKCONSOLEFILEDIR)
    mkdir_p(WRKDIR)
    mkdir_p(WRKDOCDIR)
    mkdir_p(WRKMANDIR)
    mkdir_p(WRKMPLCONFIGDIR)
    mkdir_p(WRKBASEDIR)
    mkdir_p(WRKTOOLDIR)
    for d in "BUILD,BUILDROOT,RPMS,RPMS/noarch,SOURCES,SPECS,SRPMS".split(","):
        mkdir_p(join(RPMTOPDIR, d))

    log.info("Copying files")
    write_gzipped_text(join(WRKDOCDIR, "changelog.Debian"), get_changelog())
    copyglob(join(SRCTOOLDIR, VENVSCRIPT), WRKTOOLDIR)
    shutil.copyfile(src_sdist_file, wrk_sdist_file)

    log.info("Creating man page for camcops. "
             "Will be installed as " + DSTMANFILE)
    write_gzipped_text(WRKMANFILE_BASE, get_man_page_camcops_server())

    log.info("Creating man page for camcops_server_meta. "
             "Will be installed as " + DSTMETAMANFILE)
    write_gzipped_text(WRKMETAMANFILE_BASE, get_man_page_camcops_server_meta())

    log.info("Creating links to documentation. "
             "Will be installed as " + DSTREADME)
    write_text(WRKREADME, get_readme())

    log.info("Creating camcops_server launch script. "
             "Will be installed as " + DSTCONSOLEFILE)
    write_text(WRKCONSOLEFILE, get_camcops_server_launcher())

    log.info("Creating camcops_server_meta launch script. "
             "Will be installed as " + DSTMETACONSOLEFILE)
    write_text(WRKMETACONSOLEFILE, get_camcops_server_meta_launcher())

    log.info("Creating Debian control file")

    write_text(join(DEBDIR, "control"), get_debian_control())

    log.info("Creating preinst file. Will be installed as " +
             join(DSTDPKGDIR, PACKAGE_DEB_NAME + ".preinst"))
    write_text(join(DEBDIR, "preinst"), get_preinst())

    log.info("Creating postinst file. Will be installed as " +
             join(DSTDPKGDIR, PACKAGE_DEB_NAME + ".postinst"))
    write_text(join(DEBDIR, "postinst"), get_postinst(sdist_basefilename))

    log.info("Creating prerm file. Will be installed as " +
             join(DSTDPKGDIR, PACKAGE_DEB_NAME + ".prerm"))
    write_text(join(DEBDIR, "prerm"), get_prerm())

    log.info("Creating postrm file. Will be installed as " +
             join(DSTDPKGDIR, PACKAGE_DEB_NAME + ".postrm"))
    write_text(join(DEBDIR, "postrm"), get_postrm())

    log.info("Creating Lintian override file")
    write_text(join(DEBOVERRIDEDIR, PACKAGE_DEB_NAME), get_override())

    log.info("Creating copyright file. Will be installed as " +
             join(DSTDOCDIR, "copyright"))
    write_text(join(WRKDOCDIR, "copyright"), get_copyright())

    log.info("Setting ownership and permissions")
    call(["find", WRKDIR, "-type", "d", "-exec", "chmod", "755", "{}", ";"])
    # ... make directories executabe: must do that first, or all the subsequent
    # recursions fail
    call(["find", WRKDIR, "-type", "f", "-exec", "chmod", "644", "{}", ";"])
    call([
        "chmod",
        "a+x",
        WRKCONSOLEFILE,
        WRKMETACONSOLEFILE,
        join(DEBDIR, "prerm"),
        join(DEBDIR, "postrm"),
        join(DEBDIR, "preinst"),
        join(DEBDIR, "postinst"),
    ])
    call(
        ["find", WRKDIR, "-iname", "*.py", "-exec", "chmod", "a+x", "{}", ";"])
    call(
        ["find", WRKDIR, "-iname", "*.pl", "-exec", "chmod", "a+x", "{}", ";"])

    log.info("Removing junk")
    call(["find", WRKDIR, "-name", "*.svn", "-exec", "rm", "-rf", "{}", ";"])
    call(["find", WRKDIR, "-name", ".git", "-exec", "rm", "-rf", "{}", ";"])
    call([
        "find",
        WRKDOCDIR,
        "-name",
        "LICENSE",
        "-exec",
        "rm",
        "-rf",
        "{}",
        ";",
    ])

    log.info("Building package")
    call(["fakeroot", "dpkg-deb", "--build", WRKDIR, PACKAGENAME])
    # ... "fakeroot" prefix makes all files installed as root:root

    log.info("Checking with Lintian")
    # fail-in-warnings has gone in 2.62.0
    # It isn't clear if lintian now exits with 0 on warnings (the previous
    # default). Future versions seems to have a more flexible --fail-on option
    call(["lintian", PACKAGENAME])

    log.info("Converting to RPM")
    call(
        ["fakeroot", "alien", "--to-rpm", "--scripts", PACKAGENAME],
        cwd=PACKAGEDIR,
    )
    # see "man alien"
    # NOTE: needs to be run as root for correct final permissions
    expected_main_rpm_name = "{PACKAGE}-{MAINVERSION}-2.noarch.rpm".format(
        PACKAGE=PACKAGE_DEB_NAME, MAINVERSION=MAINVERSION)
    full_rpm_path = join(PACKAGEDIR, expected_main_rpm_name)
    # This chown is causing problems with GitHub actions. The user is 'runner'
    # and there is no group called 'runner'. Is it needed anyway? Seems to run
    # OK locally without this line.
    # myuser = getpass.getuser()
    # shutil.chown(full_rpm_path, myuser, myuser)

    log.info("Changing dependencies within RPM")
    # Alien does not successfully translate the dependencies, and anyway the
    # names for packages are different on CentOS. A dummy prerequisite package
    # works (below) but is inelegant.
    # The rpmbuild commands are filters (text in via stdin, text out to
    # stdout), so replacement just needs the echo command.

    depends_rpm = get_lines_without_comments(RPM_REQ_FILE)
    echoparam = repr("Requires: {}".format(" ".join(depends_rpm)))
    call([
        "rpmrebuild",
        "--define",
        "_topdir " + RPMTOPDIR,
        "--package",
        "--change-spec-requires=/bin/echo {}".format(echoparam),
        full_rpm_path,
    ])
    # ... add "--edit-whole" as the last option before the RPM name to see what
    #     you're getting
    # ... define topdir, or it builds in ~/rpmbuild/...
    # ... --package, or it looks for an installed RPM rather than a package
    #     file
    # ... if echo parameter has brackets in, ensure it's quoted

    shutil.move(
        join(RPMTOPDIR, "RPMS", "noarch", expected_main_rpm_name),
        join(PACKAGEDIR, expected_main_rpm_name),
    )
    # ... will overwrite its predecessor

    log.info("Deleting temporary workspace")
    shutil.rmtree(TMPDIR, ignore_errors=True)  # CAUTION!

    # Done
    log.info("=" * 79)
    log.info("Debian package should be: " + PACKAGENAME)
    log.info("RPM should be: " + full_rpm_path)
示例#8
0
    if not _archive_static_dir:
        missing.append(SettingsKeys.ARCHIVE_STATIC_DIR)
    if not _archive_template_cache_dir:
        missing.append(SettingsKeys.ARCHIVE_TEMPLATE_CACHE_DIR)
    if not _archive_template_dir:
        missing.append(SettingsKeys.ARCHIVE_TEMPLATE_DIR)
    return HttpResponseBadRequest(
        f"Archive not configured. Administrator has not set: {missing!r}")


# =============================================================================
# Set up caches and Mako lookups.
# =============================================================================

if ARCHIVE_IS_CONFIGURED:
    mkdir_p(_archive_template_cache_dir)
    archive_mako_lookup = TemplateLookup(
        directories=[_archive_template_dir],
        module_directory=_archive_template_cache_dir,
        strict_undefined=True,  # raise error immediately upon typos!
    )
else:
    archive_mako_lookup = None

# =============================================================================
# Auditing
# =============================================================================


def audit_archive_template(request: HttpRequest, patient_id: str,
                           query_string: str) -> None:
示例#9
0
DEST_SUPERVISOR_CONF_FILE = join(DEST_SUPERVISOR_CONF_DIR,
                                 f'{PACKAGE_FOR_DEB}.conf')
DEB_PACKAGE_FILE = join(PACKAGE_DIR, f'{PACKAGE_FOR_DEB}_{DEBVERSION}_all.deb')
LOCAL_CONFIG_BASENAME = "crateweb_local_settings.py"
DEST_CRATEWEB_CONF_FILE = join(DEST_PACKAGE_CONF_DIR, LOCAL_CONFIG_BASENAME)
INSTRUCTIONS = join(DEST_ROOT, 'instructions.txt')
DEST_VENV_INSTALLER = join(DEST_ROOT, 'tools', 'install_virtualenv.py')
DEST_WKHTMLTOPDF_INSTALLER = join(DEST_ROOT, 'tools', 'install_wkhtmltopdf.py')
DEST_CRATE_PIPFILE = join(DEST_ROOT, CRATE_PIPFILE)

# =============================================================================
# Make directories
# =============================================================================

print("Making directories")
mkdir_p(WORK_DIR)
mkdir_p(workpath(DEST_ROOT))
mkdir_p(workpath(DEST_PACKAGE_CONF_DIR))
mkdir_p(workpath(DEST_SUPERVISOR_CONF_DIR))
mkdir_p(workpath(DEST_DOC_DIR))
mkdir_p(DEB_DIR)
mkdir_p(DEB_OVERRIDE_DIR)

# =============================================================================
# Make Debian files
# =============================================================================

# -----------------------------------------------------------------------------
print("Creating preinst file. Will be installed as " +
      join(INFO_DEST_DPKG_DIR, PACKAGE_FOR_DEB + '.preinst'))
# -----------------------------------------------------------------------------
示例#10
0
def untar_to_directory(tarfile: str,
                       directory: str,
                       verbose: bool = False,
                       gzipped: bool = False,
                       skip_if_dir_exists: bool = True,
                       run_func: RunFuncType = None,
                       chdir_via_python: bool = True,
                       tar_executable: str = None,
                       tar_supports_force_local: bool = None) -> None:
    """
    Unpacks a TAR file into a specified directory.

    Args:
        tarfile:
            filename of the ``.tar`` file
        directory:
            destination directory
        verbose:
            be verbose?
        gzipped:
            is the ``.tar`` also gzipped, e.g. a ``.tar.gz`` file?
        skip_if_dir_exists:
            don't do anything if the destrination directory exists?
        run_func:
            function to use to call an external command
        chdir_via_python:
            change directory via Python, not via ``tar``. Consider using this
            via Windows, because Cygwin ``tar`` v1.29 falls over when given a
            Windows path for its ``-C`` (or ``--directory``) option.
        tar_executable:
            name of the ``tar`` executable (default is ``tar``)
        tar_supports_force_local:
            does tar support the ``--force-local`` switch? If you pass ``None``
            (the default), this is checked directly via ``tar --help``.
            Linux/GNU tar does; MacOS tar doesn't; Cygwin tar does; Windows 10
            (build 17063+) tar doesn't.
    """
    if skip_if_dir_exists and os.path.isdir(directory):
        log.info("Skipping extraction of {} as directory {} exists", tarfile,
                 directory)
        return
    tar = which_and_require(tar_executable or "tar")
    if tar_supports_force_local is None:
        tar_supports_force_local = tar_supports_force_local_switch(tar)
    log.info("Extracting {} -> {}", tarfile, directory)
    mkdir_p(directory)
    args = [tar, "-x"]  # -x: extract
    if verbose:
        args.append("-v")  # -v: verbose
    if gzipped:
        args.append("-z")  # -z: decompress using gzip
    if tar_supports_force_local:
        args.append("--force-local")  # allows filenames with colons in
    args.extend(["-f", tarfile])  # -f: filename follows
    if chdir_via_python:
        with pushd(directory):
            run_func(args)
    else:
        # chdir via tar
        args.extend(["-C", directory])  # -C: change to directory
        run_func(args)
示例#11
0
def prepare_umls_for_bioyodie(cfg: UmlsBioyodieConversionConfig) -> None:
    """
    Prepare downloaded UMLS data for Bio-YODIE, according to the instructions
    at https://github.com/GateNLP/bio-yodie-resource-prep.
    """
    # -------------------------------------------------------------------------
    # Parameter checks
    # -------------------------------------------------------------------------
    assert cfg.java_home
    assert cfg.gate_home

    # -------------------------------------------------------------------------
    # Establish the release (version)
    # -------------------------------------------------------------------------
    # There are two releases per year, e.g. 2017AA and 2017AB.
    release_regex = regex.compile(r"umls-(\d\d\d\dA[AB])-full.zip")
    umls_zip_basename = os.path.basename(cfg.umls_zip)
    try:
        release = release_regex.match(umls_zip_basename).group(1)
    except AttributeError:  # 'NoneType' object has no attribute 'group'
        release = None  # for type-checker only (below)
        die(f"Unable to work out UMLS release from filename: "
            f"{umls_zip_basename!r}")

    # -------------------------------------------------------------------------
    # Directory names
    # -------------------------------------------------------------------------
    umls_root_dir = join(cfg.tmp_dir, "umls_data_with_mmsys")
    umls_metadir = umls_root_dir
    umls_mmsys_home = umls_metadir
    # ... because the GUI installer wants "release.dat" (which is in the root
    # and config/2017AA directories of "mmsys.zip") to be in the same directory
    # as the Metathesaurus files. Do NOT put it in a "MMSYS" subdirectory,
    # despite
    # https://www.nlm.nih.gov/research/umls/implementation_resources/community/mmsys/BatchMRCXTBuilder.html
    umls_lib_dir = join(umls_mmsys_home, "lib")
    umls_plugins_dir = join(umls_mmsys_home, "plugins")

    umls_output_dir = join(cfg.tmp_dir, "umls_output")
    # ... Where we tell it to store data.
    # Log files and other output go here.

    bioyodie_repo_dir = join(cfg.tmp_dir, "bio-yodie-resource-prep")
    bioyodie_db_dir = join(bioyodie_repo_dir, "databases")
    bioyodie_scala_dir = join(bioyodie_repo_dir, "scala")
    bioyodie_tmpdata_dir = join(bioyodie_repo_dir, "tmpdata")
    bioyodie_umls_dir_containing_symlink = join(
        bioyodie_repo_dir, "srcs", "umls", "2015AB")  # hard-coded "2015AB"
    bioyodie_umls_input_dir = join(bioyodie_umls_dir_containing_symlink,
                                   "META")  # hard-coded "META"
    bioyodie_output_dir = join(bioyodie_repo_dir, "output")

    # -------------------------------------------------------------------------
    # Filenames
    # -------------------------------------------------------------------------
    scala_tgz = join(bioyodie_scala_dir, "scala.tgz")
    builder_script = join(bioyodie_repo_dir, "bin", "all.sh")
    mmsys_zip = join(umls_root_dir, "mmsys.zip")
    config_file = join(umls_metadir, "config.properties")
    boot_config = join(umls_mmsys_home, "etc", "subset.boot.properties")
    log4j_config = join(umls_mmsys_home, "etc",
                        "rudolf.log4j.properties")  # new  # noqa

    system_java_home = cfg.java_home
    umls_java_home = join(umls_mmsys_home, "jre", "linux")  # it brings its own

    # -------------------------------------------------------------------------
    # Checks
    # -------------------------------------------------------------------------
    if os.path.exists(cfg.dest_dir):
        die(f"Directory already exists: {cfg.dest_dir}")
    system_unzip = require_external_tool("unzip")
    # These are required by the Bio-YODIE preprocessor:
    groovy_executable = cfg.groovy_executable or require_external_tool(
        "groovy")  # noqa
    require_external_tool("gzip")
    require_external_tool("zcat")

    # -------------------------------------------------------------------------
    # Environment variables
    # -------------------------------------------------------------------------
    # For UMLS
    umls_env = os.environ.copy()
    umls_env[EnvVar.JAVA_HOME] = umls_java_home
    # For Bio-YODIE preprocessor
    bioyodie_env = os.environ.copy()
    bioyodie_env[EnvVar.JAVA_HOME] = system_java_home
    bioyodie_env[EnvVar.GATE_HOME] = cfg.gate_home
    groovy_dir = os.path.dirname(os.path.abspath(groovy_executable))
    old_path = bioyodie_env.get(EnvVar.PATH, "")
    new_path_with_groovy = os.pathsep.join(x for x in [groovy_dir, old_path]
                                           if x)
    bioyodie_env[EnvVar.PATH] = new_path_with_groovy

    # -------------------------------------------------------------------------
    log.info("Cloning Bio-YODIE resource prep repository...")
    # -------------------------------------------------------------------------
    check_call_verbose(
        ["git", "clone", cfg.bioyodie_prep_repo_url, bioyodie_repo_dir])

    # -------------------------------------------------------------------------
    log.info("Making directories...")
    # -------------------------------------------------------------------------
    mkdir_p(umls_output_dir)
    mkdir_p(bioyodie_db_dir)
    # mkdir_p(bioyodie_scala_dir)  # already exists
    mkdir_p(bioyodie_tmpdata_dir)
    mkdir_p(bioyodie_umls_dir_containing_symlink)
    mkdir_p(bioyodie_output_dir)

    # -------------------------------------------------------------------------
    log.info("Fetching/building Scala for the BioYODIE processor...")
    # -------------------------------------------------------------------------
    # ... either before we set JAVA_HOME (to use the system Java) or after
    # we've unpacked MMSYS (which brings its own JRE), but not in between!
    download(cfg.scala_url, scala_tgz)
    with pushd(bioyodie_scala_dir):
        check_call_verbose(["tar", "-xzvf", scala_tgz])
        check_call_verbose(["ant"], env=bioyodie_env)

    # -------------------------------------------------------------------------
    log.info("Unzipping UMLS data...")
    # -------------------------------------------------------------------------
    check_call_verbose(["unzip", "-j", cfg.umls_zip, "-d", umls_root_dir])
    # -j: junk paths (extract "flat" into the specified directory)

    # -------------------------------------------------------------------------
    log.info("Unzipping UMLS MetamorphoSys (MMSYS) program (and its JRE)...")
    # -------------------------------------------------------------------------
    check_call_verbose(["unzip", mmsys_zip, "-d", umls_mmsys_home])
    # "To ensure proper functionality users must unzip mmsys.zip to the same
    # directory as the other downloaded files."
    # -- https://www.ncbi.nlm.nih.gov/books/NBK9683/
    # ... but see also example above.

    # -------------------------------------------------------------------------
    log.info("Running MetamorphoSys in batch mode...")
    # -------------------------------------------------------------------------
    # https://www.nlm.nih.gov/research/umls/implementation_resources/community/mmsys/BatchMetaMorphoSys.html  # noqa
    classpath = ":".join([
        umls_mmsys_home,
        umls_plugins_dir,  # RNC extra
        join(umls_lib_dir, "jpf-boot.jar"),
        join(umls_lib_dir, "jpf.jar"),  # RNC extra
        # You can use "dir/*" to mean "all JAR files in a directory":
        # https://en.wikipedia.org/wiki/Classpath
        join(umls_plugins_dir, "gov.nih.nlm.umls.meta", "lib",
             "*"),  # RNC extra  # noqa
        join(umls_plugins_dir, "gov.nih.nlm.umls.mmsys", "lib",
             "*"),  # RNC extra  # noqa
        join(umls_plugins_dir, "gov.nih.nlm.umls.mmsys.gui", "lib",
             "*"),  # RNC extra  # noqa
        join(umls_plugins_dir, "gov.nih.nlm.umls.mmsys.io", "lib",
             "*"),  # RNC extra  # noqa
        join(umls_plugins_dir, "gov.nih.nlm.umls.util", "lib",
             "*"),  # RNC extra  # noqa
    ])
    write_text(
        config_file,
        get_mmsys_configfile_text(metadir=umls_metadir,
                                  mmsys_home=umls_mmsys_home,
                                  release=release))
    write_text(log4j_config, LOG4J_PROPERTIES_TEXT)
    with pushd(umls_mmsys_home):
        log.warning(f"The next step is slow, and doesn't say much. "
                    f"It produces roughly 29 Gb at peak. "
                    f"Watch progress with: "
                    f"watch 'du -bc {cfg.tmp_dir} | tail -1'")
        check_call_verbose(
            [
                join(cfg.java_home, "bin", "java"),
                "-classpath",
                classpath,
                "-Djava.awt.headless=true",
                f"-Djpf.boot.config={boot_config}",
                f"-Dlog4j.configurationFile={log4j_config}",
                # not "log4j.configuration" as in the original! Argh.
                # http://logging.apache.org/log4j/2.x/manual/configuration.html
                f"-Dinput.uri={umls_metadir}",
                f"-Doutput.uri={umls_output_dir}",
                f"-Dmmsys.config.uri={config_file}",
                # Additional from run_linux.sh:
                "-client",  # JVM option: client rather than server mode
                "-Dunzip.native=true",
                f"-Dunzip.path={system_unzip}",
                "-Dfile.encoding=UTF-8",
                "-Xms1000M",  # was 300M, but it's 1000M in run_linux.sh
                "-Xmx2000M",  # was 1000M, but it's 2000M in run_linux.sh
                "org.java.plugin.boot.Boot"
            ],
            env=umls_env)

    # -------------------------------------------------------------------------
    log.info("Converting UMLS data to Bio-YODIE format...")
    # -------------------------------------------------------------------------
    os.symlink(src=umls_output_dir,
               dst=bioyodie_umls_input_dir,
               target_is_directory=True)
    with pushd(bioyodie_repo_dir):
        log.warning("The next step is also slow.")
        check_call_verbose([builder_script], env=bioyodie_env)

    # -------------------------------------------------------------------------
    log.info(f"Moving Bio-YODIE data to destination directory: {cfg.dest_dir}")
    # -------------------------------------------------------------------------
    output_files = os.listdir(bioyodie_output_dir)
    if output_files:
        shutil.copytree(bioyodie_output_dir, cfg.dest_dir)
        # ... destination should not already exist
        # ... it will make intermediate directories happily
    else:
        log.error(f"No output files in {bioyodie_output_dir}! "
                  f"Did the Bio-YODIE preprocessor partly crash?")
示例#12
0
def build_package() -> None:
    """
    Builds the package.
    """
    log.info("Building Python package")

    setup_py = join(SRCSERVERDIR, 'setup.py')
    sdist_basefilename = ('camcops_server-{}.tar.gz'.format(MAINVERSION))
    src_sdist_file = join(SRCSERVERDIR, 'dist', sdist_basefilename)
    wrk_sdist_file = join(WRKBASEDIR, sdist_basefilename)

    try:
        log.info("Deleting old {} if it exists", src_sdist_file)
        os.remove(src_sdist_file)
    except OSError:
        pass
    os.chdir(SETUP_PY_DIR)  # or setup.py looks in wrong places?
    cmdargs = ['python', setup_py, 'sdist', '--extras']  # special!
    call(cmdargs)
    remove_gzip_timestamp(src_sdist_file)

    log.info("Making directories")
    mkdir_p(DEBDIR)
    mkdir_p(DEBOVERRIDEDIR)
    mkdir_p(PACKAGEDIR)
    mkdir_p(RPMTOPDIR)
    mkdir_p(WRKCONFIGDIR)
    mkdir_p(WRKCONSOLEFILEDIR)
    mkdir_p(WRKDIR)
    mkdir_p(WRKDOCDIR)
    mkdir_p(WRKMANDIR)
    mkdir_p(WRKMPLCONFIGDIR)
    mkdir_p(WRKBASEDIR)
    mkdir_p(WRKTOOLDIR)
    for d in "BUILD,BUILDROOT,RPMS,RPMS/noarch,SOURCES,SPECS,SRPMS".split(","):
        mkdir_p(join(RPMTOPDIR, d))

    log.info("Copying files")
    write_gzipped_text(join(WRKDOCDIR, 'changelog.Debian'), get_changelog())
    copyglob(join(SRCTOOLDIR, VENVSCRIPT), WRKTOOLDIR)
    shutil.copyfile(src_sdist_file, wrk_sdist_file)

    log.info("Creating man page for camcops. "
             "Will be installed as " + DSTMANFILE)
    write_gzipped_text(WRKMANFILE_BASE, get_man_page_camcops_server())

    log.info("Creating man page for camcops_server_meta. "
             "Will be installed as " + DSTMETAMANFILE)
    write_gzipped_text(WRKMETAMANFILE_BASE, get_man_page_camcops_server_meta())

    log.info("Creating links to documentation. "
             "Will be installed as " + DSTREADME)
    write_text(WRKREADME, get_readme())

    log.info("Creating camcops_server launch script. "
             "Will be installed as " + DSTCONSOLEFILE)
    write_text(WRKCONSOLEFILE, get_camcops_server_launcher())

    log.info("Creating camcops_server_meta launch script. "
             "Will be installed as " + DSTMETACONSOLEFILE)
    write_text(WRKMETACONSOLEFILE, get_camcops_server_meta_launcher())

    log.info("Creating Debian control file")

    write_text(join(DEBDIR, 'control'), get_debian_control())

    log.info("Creating preinst file. Will be installed as " +
             join(DSTDPKGDIR, PACKAGE_DEB_NAME + '.preinst'))
    write_text(join(DEBDIR, 'preinst'), get_preinst())

    log.info("Creating postinst file. Will be installed as " +
             join(DSTDPKGDIR, PACKAGE_DEB_NAME + '.postinst'))
    write_text(join(DEBDIR, 'postinst'), get_postinst(sdist_basefilename))

    log.info("Creating prerm file. Will be installed as " +
             join(DSTDPKGDIR, PACKAGE_DEB_NAME + '.prerm'))
    write_text(join(DEBDIR, 'prerm'), get_prerm())

    log.info("Creating postrm file. Will be installed as " +
             join(DSTDPKGDIR, PACKAGE_DEB_NAME + '.postrm'))
    write_text(join(DEBDIR, 'postrm'), get_postrm())

    log.info("Creating Lintian override file")
    write_text(join(DEBOVERRIDEDIR, PACKAGE_DEB_NAME), get_override())

    log.info("Creating copyright file. Will be installed as " +
             join(DSTDOCDIR, 'copyright'))
    write_text(join(WRKDOCDIR, 'copyright'), get_copyright())

    log.info("Setting ownership and permissions")
    call(['find', WRKDIR, '-type', 'd', '-exec', 'chmod', '755', '{}', ';'])
    # ... make directories executabe: must do that first, or all the subsequent
    # recursions fail
    call(['find', WRKDIR, '-type', 'f', '-exec', 'chmod', '644', '{}', ';'])
    call([
        "chmod",
        "a+x",
        WRKCONSOLEFILE,
        WRKMETACONSOLEFILE,
        join(DEBDIR, 'prerm'),
        join(DEBDIR, 'postrm'),
        join(DEBDIR, 'preinst'),
        join(DEBDIR, 'postinst'),
    ])
    call(
        ['find', WRKDIR, '-iname', '*.py', '-exec', 'chmod', 'a+x', '{}', ';'])
    call(
        ['find', WRKDIR, '-iname', '*.pl', '-exec', 'chmod', 'a+x', '{}', ';'])

    log.info("Removing junk")
    call(['find', WRKDIR, '-name', '*.svn', '-exec', 'rm', '-rf', '{}', ';'])
    call(['find', WRKDIR, '-name', '.git', '-exec', 'rm', '-rf', '{}', ';'])
    call([
        'find', WRKDOCDIR, '-name', 'LICENSE', '-exec', 'rm', '-rf', '{}', ';'
    ])

    log.info("Building package")
    call(['fakeroot', 'dpkg-deb', '--build', WRKDIR, PACKAGENAME])
    # ... "fakeroot" prefix makes all files installed as root:root

    log.info("Checking with Lintian")
    call(['lintian', '--fail-on-warnings', PACKAGENAME])

    log.info("Converting to RPM")
    call(['fakeroot', 'alien', '--to-rpm', '--scripts', PACKAGENAME],
         cwd=PACKAGEDIR)
    # see "man alien"
    # NOTE: needs to be run as root for correct final permissions
    expected_main_rpm_name = "{PACKAGE}-{MAINVERSION}-2.noarch.rpm".format(
        PACKAGE=PACKAGE_DEB_NAME,
        MAINVERSION=MAINVERSION,
    )
    full_rpm_path = join(PACKAGEDIR, expected_main_rpm_name)
    myuser = getpass.getuser()
    shutil.chown(full_rpm_path, myuser, myuser)

    log.info("Changing dependencies within RPM")
    # Alien does not successfully translate the dependencies, and anyway the
    # names for packages are different on CentOS. A dummy prerequisite package
    # works (below) but is inelegant.
    # The rpmbuild commands are filters (text in via stdin, text out to
    # stdout), so replacement just needs the echo command.

    depends_rpm = get_lines_without_comments(RPM_REQ_FILE)
    echoparam = repr("Requires: {}".format(" ".join(depends_rpm)))
    call([
        'rpmrebuild',
        '--define',
        '_topdir ' + RPMTOPDIR,
        '--package',
        '--change-spec-requires=/bin/echo {}'.format(echoparam),
        full_rpm_path,
    ])
    # ... add "--edit-whole" as the last option before the RPM name to see what
    #     you're getting
    # ... define topdir, or it builds in ~/rpmbuild/...
    # ... --package, or it looks for an installed RPM rather than a package
    #     file
    # ... if echo parameter has brackets in, ensure it's quoted

    shutil.move(join(RPMTOPDIR, 'RPMS', 'noarch', expected_main_rpm_name),
                join(PACKAGEDIR, expected_main_rpm_name))
    # ... will overwrite its predecessor

    log.info("Deleting temporary workspace")
    shutil.rmtree(TMPDIR, ignore_errors=True)  # CAUTION!

    # Done
    log.info("=" * 79)
    log.info("Debian package should be: " + PACKAGENAME)
    log.info("RPM should be: " + full_rpm_path)
def ensure_directories_exist() -> None:
    config = get_default_config_from_os_env()
    mkdir_p(config.export_lockdir)
    if config.user_download_dir:
        mkdir_p(config.user_download_dir)