Exemplos de Tier em Python, exemplos de cdrapi.settings.Tier em Python

Exemplo n.º 1

0

Exibir arquivo

Arquivo: job_list.py Projeto: NCIOCPL/cdr-scheduler

    def subject(self):
        """Subject line for the report's email message."""

        if not hasattr(self, "_subject"):
            tier = Tier().name
            self._subject = f"[{tier}] CDR Pending Scheduled Jobs"
        return self._subject

Exemplo n.º 2

0

Exibir arquivo

Arquivo: export-docs.py Projeto: NCIOCPL/cdr-publishing

    def tier(self):
        """
        Identification of which CDR server is running the publishing job
        """

        if not hasattr(self, "_tier"):
            self._tier = Tier()
        return self._tier

Exemplo n.º 3

0

Exibir arquivo

Arquivo: glossifier_refresh.py Projeto: NCIOCPL/cdr-scheduler

    def __init__(self, logger=None, recip=None):
        """
        Collect the glossary term information.

        Pass:
            logger - the scheduled job's logger (unless testing from the
                     command line)
            recip - optional email address for testing without spamming
                    the users
        """

        self.tier = Tier()
        self.logger = logger
        self.recip = recip
        if self.logger is None:
            self.logger = cdr.Logging.get_logger("glossifier", level="debug")
        self.conn = db.connect()
        self.cursor = self.conn.cursor()

Exemplo n.º 4

0

Exibir arquivo

Arquivo: PushDevData.py Projeto: NCIOCPL/cdr-tools

    def __init__(self):
        """
        Constructs job control object for restoring data on CDR DEV server.

        1. Make sure we're running on the DEV tier.
        2. Get the parameters for this job.
        3. Create the control object for the job.
        """

        # 1. Safety check.
        if Tier().name != "DEV":
            raise Exception("This script must only be run on the DEV tier.")

        # 2. Get what we need from the command line.
        parser = ArgumentParser()
        parser.add_argument("--directory",
                            required=True,
                            help="directory to restore from")
        parser.add_argument("--user", required=True, help="user ID")
        parser.add_argument("--session", required=True, help="user session")
        parser.add_argument(
            "--skip-content",
            action="store_true",
            help="exclude practice documents from being restored")
        opts = parser.parse_args()

        # 3. Create objects used to do the job's work.
        self._logger = cdr.Logging.get_logger("PushDevData", console=True)
        self._conn = db.connect(user="******")
        self._cursor = self._conn.cursor()
        self._dir = opts.directory
        self._skip_content = opts.skip_content or False
        self._old = cdr_dev_data.Data(self._dir)
        self._new = cdr_dev_data.Data(self._cursor, self._old)
        self._uid = opts.user
        self._session = opts.session
        self._logger.info("session %s", self._session)
        self._logger.info("using data preserved in %s", self._dir)
        self._new_doc_types = []

Exemplo n.º 5

0

Exibir arquivo

    def tier(self):
        """Name of the local tier."""

        if not hasattr(self, "_tier"):
            self._tier = Tier()
        return self._tier

Exemplo n.º 6

0

Exibir arquivo

import cdr
from cdrapi import db as cdrdb
from cdrapi.settings import Tier

parser = argparse.ArgumentParser()
parser.add_argument("doctype")
parser.add_argument("--max-docs", type=int)
parser.add_argument("--tier")
parser.add_argument("--skip", type=int)
opts = parser.parse_args()
cursor = cdrdb.connect(user="******", tier=opts.tier).cursor()
query = cdrdb.Query("document d", "d.id")
query.join("doc_type t", "t.id = d.doc_type")
query.where(query.Condition("t.name", opts.doctype))
if opts.max_docs:
    query.limit(opts.max_docs)
rows = query.order("d.id").execute(cursor).fetchall()
if opts.skip:
    rows = rows[opts.skip:]
where = opts.tier if opts.tier else Tier().name
stderr.write("reindexing {} documents on {}\n".format(len(rows), where))
count = 0
for doc_id, in rows:
    count += 1
    args = doc_id, count, len(rows)
    stderr.write("\rreindexing CDR{:010d} {:d} of {:d}".format(*args))
    resp = cdr.reindex("guest", doc_id, tier=opts.tier)
    if resp:
        stderr.write("\n{!r}\n".format(resp))
stderr.write("\n")

Exemplo n.º 7

0

Exibir arquivo

    return sets


def get_members(members):
    lines = []
    for member in members:
        if isinstance(member, FilterSet):
            lines.append(f"filter set: {member.name}")
        else:
            lines.append(f"filter: {member.title}")
    return lines


parser = ArgumentParser()
parser.add_argument("--other_tier", default="PROD")
parser.add_argument("--local_tier", default=Tier().name)
opts = parser.parse_args()
local = get_sets(opts.local_tier)
other = get_sets(opts.other_tier)
other_names = sorted(other)
position = 0
banner = f"Comparing FilterSets between {opts.other_tier} and {opts.local_tier}"
for name in sorted(local):
    while position < len(other_names) and other_names[position] < name:
        if banner:
            print(banner)
            print()
            banner = None
        print(other_names[position])
        print(f"local name is {name!r}")
        print(f"other name is {other_names[position]!r}")

Exemplo n.º 8

0

Exibir arquivo

Arquivo: PrepareCreateLoginsScript.py Projeto: NCIOCPL/cdr-server

#----------------------------------------------------------------------
# Preprocess login creation/configuration script for this tier.
#----------------------------------------------------------------------
import time
import cdrpw
from cdrapi.settings import Tier

script = open("CreateLogins.sql", "rb").read()
try:
    with open("d:/etc/cdrenv.rc") as fp:
        env = fp.read().strip()
except:
    env = "CBIIT"
tier = Tier().name
dboPw = cdrpw.password(env, tier, "cdr", "CdrSqlAccount")
guestPw = cdrpw.password(env, tier, "cdr", "CdrGuest")
pubPw = cdrpw.password(env, tier, "cdr", "CdrPublishing")
script = script.replace("@@DBOPW@@", dboPw)
script = script.replace("@@GUESTPW@@", guestPw)
script = script.replace("@@PUBPW@@", pubPw)
now = time.strftime("%Y%m%d%H%M%S")
name = "CreateLogins-%s.sql" % now
with open(name, "wb") as fp:
    fp.write(script)
print("wrote", name)

Exemplo n.º 9

0

Exibir arquivo

    def tier(self):
        """Run time settings."""

        if not hasattr(self, "_tier"):
            self._tier = Tier()
        return self._tier

Exemplo n.º 10

0

Exibir arquivo

Arquivo: export-docs.py Projeto: NCIOCPL/cdr-publishing

    args = parser.parse_args()
    opts["thread"] = args.thread
    if args.debug:
        opts["level"] = "DEBUG"
    if args.output:
        opts["output-dir"] = args.output
    control = Control(args.session, args.job, args.spec, *args.docs, **opts)
    try:
        control.run()
        control.logger.info("Thread %05d complete", args.thread)
        for thread in threading.enumerate():
            control.logger.info("%s is active", thread)
        sys.exit(0)
    except Exception:
        control.logger.exception("Thread %05d failure", args.thread)
        sys.exit(1)


if __name__ == "__main__":
    """
    Don't invoke `main()` if loaded as a module.
    """

    try:
        main()
    except Exception:
        tier = Tier()
        logger = tier.get_logger("export-docs")
        logger.exception("Unable to construct CDR document export controller")
        sys.exit(1)

Exemplo n.º 11

0

Exibir arquivo

    def tier(self):
        """Which CDR tier are we using?"""

        if not hasattr(self, "_tier"):
            self._tier = Tier()
        return self._tier

Exemplo n.º 12

0

Exibir arquivo

Arquivo: fetch-tier-settings.py Projeto: NCIOCPL/cdr-admin

class Settings:
    TIER = Tier()
    HOSTNAMES = TIER.hosts
    LOGFILE = f"{cdr.DEFAULT_LOGDIR}/fetch-tier-settings.log"
    WD = cdr.WORK_DRIVE
    WEBCONFIG_ROOT = f"{WD}:/Inetpub/wwwroot/web.config"
    WEBCONFIG_SECURE = f"{WD}:/Inetpub/wwwroot/cgi-bin/secure/web.config"
    WEBCONFIG_GLOSSIFIER = f"{WD}:/cdr/Glossifier/cgi-bin/web.config"

    def __init__(self, session):
        self.session = session
        try:
            with open(f"self.TIER.etc/cdrenv.rc") as fp:
                self.org = fp.read().strip()
        except:
            self.org = "CBIIT"
        self.tier = self.TIER.name
        self.windows = self.get_windows_settings()

    def get_iis_settings(self):
        return {
            "account": cdr.run_command("whoami").stdout.strip(),
            "version": os.environ.get("SERVER_SOFTWARE"),
            "web.config": {
                "root": self.xmltojson(self.WEBCONFIG_ROOT),
                "secure": self.xmltojson(self.WEBCONFIG_SECURE),
                "glossifier": self.xmltojson(self.WEBCONFIG_GLOSSIFIER),
            }
        }

    def xmltojson(self, path):
        root = etree.parse(path).getroot()
        return {root.tag: self.extract_node(root)}

    def extract_node(self, node):
        children = {}
        for key in node.keys():
            children[key] = [node.get(key)]
        for child in node:
            if child.tag not in children:
                children[child.tag] = []
            children[child.tag].append(self.extract_node(child))
        for name in children:
            if len(children[name]) == 1:
                children[name] = children[name][0]
        return children

    def get_windows_settings(self):
        winver = sys.getwindowsversion()
        settings = {"version": {}}
        for name in ("major", "minor", "build", "platform", "service_pack"):
            settings["version"][name] = getattr(winver, name, "")
        settings["environ"] = dict(os.environ)
        path = [p for p in os.environ.get("PATH").split(";") if p]
        settings["search_path"] = path
        settings["mssql"] = self.get_mssql_settings()
        settings["python"] = self.get_python_settings()
        settings["iis"] = self.get_iis_settings()
        settings["files"] = self.get_files()
        settings["doctypes"] = self.get_doctypes()
        return settings

    def get_doctypes(self):
        doctypes = {}
        path = f"{self.WD}:/cdr/ClientFiles/CdrDocTypes.xml"
        root = etree.parse(path).getroot()
        for node in root.findall("CdrGetDocTypeResp"):
            key = node.get("Type")
            doctypes[key] = {}
            for child in node:
                if child.tag == "EnumSet":
                    values = [vv.text for vv in child.findall("ValidValue")]
                    doctypes[key][child.get("Node")] = sorted(values)
                elif child.tag == "LinkingElements":
                    elems = [e.text for e in child.findall("LinkingElements")]
                    doctypes[key]["linking-elements"] = sorted(elems)
        return doctypes

    def get_files(self):
        files = {}
        self.walk(files, f"{self.WD}:/cdr/lib")
        self.walk(files, f"{self.WD}:/cdr/Bin")
        self.walk(files, f"{self.WD}:/cdr/Build")
        self.walk(files, f"{self.WD}:/cdr/ClientFiles")
        self.walk(files, f"{self.WD}:/cdr/Glossifier")
        self.walk(files, f"{self.WD}:/cdr/Licensee")
        self.walk(files, f"{self.WD}:/cdr/Mailers")
        self.walk(files, f"{self.WD}:/cdr/Publishing")
        self.walk(files, f"{self.WD}:/cdr/Licensee")
        self.walk(files, f"{self.WD}:/Inetpub/wwwroot")
        return files

    def walk(self, files, path):
        for path, dirs, filenames in os.walk(path):
            if "__pycache__" in path:
                continue
            path = path.replace("\\", "/")
            directory = files
            for name in path.split("/")[1:]:
                if name not in directory:
                    directory[name] = {}
                directory = directory[name]
            for name in filenames:
                self.add_file(path, name, directory)

    def add_file(self, path, name, files):
        try:
            path = "%s/%s" % (path, name)
            fp = open(path, "rb")
            bytes = fp.read()
            fp.close()
            md5 = hashlib.md5()
            md5.update(bytes)
            md5 = md5.hexdigest().lower()
        except Exception as e:
            md5 = "unreadable"
        files[name] = md5

    def get_python_settings(self):
        env = pkg_resources.Environment()
        settings = dict(python=sys.version)
        for name in env:
            for package in env[name]:
                settings[package.project_name] = package.version
        return settings

    def get_mssql_settings(self):
        cursor = db.connect().cursor()
        cursor.execute("EXEC sp_server_info")
        settings = {}
        for attr_id, attr_name, attr_value in cursor.fetchall():
            settings[attr_name] = attr_value
        return settings

    def serialize(self):
        return json.dumps({
            "windows": self.windows,
        }, indent=2)

    def run(self):
        print(f"Content-type: application/json\n\n{self.serialize()}")

Exemplo n.º 13

0

Exibir arquivo

 def setUp(self):
     password = Tier().password(self.USERNAME)
     opts = dict(comment="filter testing", password=password)
     Tests.session = Session.create_session(self.USERNAME, **opts)

Exemplo n.º 14

0

Exibir arquivo

Arquivo: GlossaryTermAudioReview.py Projeto: NCIOCPL/cdr-admin

class Control(Controller):
    """Logic control center for the script."""

    SUBTITLE = "Glossary Term Audio Review"
    SAVE = "Save"
    BASEDIR = Tier().basedir
    LOGNAME = "GlossaryTermAudioReview"
    ZIPDIR = f"{BASEDIR}/Audio_from_CIPSFTP"
    REVDIR = f"{ZIPDIR}/GeneratedRevisionSheets"
    IGNORE = "__MACOSX"
    NOTEPAT = compile(r"[\r\n]+")
    NAMEPAT = compile(r"(?i)(?P<base>Week_\d{4}_\d\d)(?P<rev>_Rev\d)*.zip")
    REVPAT = compile(r"(?i)_Rev(?P<num>\d+)")
    MAXNOTE = 2040
    MAXFILE = 250
    MAXTERM = 250
    MAXNAME = 250
    PERMISSION = "REVIEW TERM AUDIO"
    FIXNAME_INSTRUCTIONS = [
        "Please correct the name to reflect one of the following formats "
        "or contact programming support staff for assistance.",
        "Week_YYYY_WW.zip or Week_YYYY_WW_RevN.zip",
        "... where 'Y', 'W', and 'N' represent decimal digits.",
    ]

    def run(self):
        """Provide custom routing."""

        args = self.request, self.name, self.id
        self.logger.debug("request=%s name=%s, id=%s", *args)
        try:
            if self.book:
                return self.send_book()
            if self.mp3:
                return self.send_mp3()
            elif self.request == self.SAVE:
                return self.save()
        except Exception as e:
            self.logger.exception("Failure")
            self.bail(e)
        Controller.run(self)

    def populate_form(self, page):
        """Show the review form for a set, or the set list if none selected.

        The landing page for this script shows the list of audio file sets
        on the disk. If the user selects one of the sets, we draw the
        form for rewviewing the audio files in that set.

        Pass:
            page - HTMLPage object on which we draw the form
        """

        # Set the table background to match the rest of the form page.
        rules = ["td, th { background:#e8e8e8; }"]

        # Show the review form for an audio file set if one has been picked.
        if self.audio_set:
            instructions = (
                "Click a hyperlinked mp3 filename to play the sound in "
                "your browser-configured mp3 player (files which have "
                "already been reviewed files are at the bottom of the "
                "list of files.)",
                "Use the radio buttons to approve or reject a file.",
                "When finished, click 'Save' to save any changes to "
                "the database. If all files in the set have been reviewed "
                "and any have been rejected, a spreadsheet containing "
                "rejected terms will be created and displayed on your "
                "workstation. Please save it for future use.",
            )
            page.form.append(page.hidden_field("id", self.audio_set.id))
            fieldset = page.fieldset("Instructions")
            for paragraph in instructions:
                fieldset.append(page.B.P(paragraph))
            page.form.append(fieldset)
            page.form.append(self.audio_set.table)
            rules += (
                "td, th { border-color:#888; }",
                "fieldset{ width: 900px; }",
                ".status-buttons { width: 86px; white-space: nowrap; }",
                ".status-buttons input { padding-left: 10px; }",
                "td:last-child: padding: 0 2px; }",
            )

        # Otherwise, show the list of all the sets on the disk.
        else:
            fieldset = page.fieldset("Instructions")
            instructions = (
                "Click a link to a zip file to review from the table below. "
                "Only those files that have not yet been completely reviewed "
                "are hyperlinked.")
            fieldset.append(page.B.P(instructions))
            page.form.append(fieldset)
            columns = "File name", "Review status", "Date modified"
            columns = [page.B.TH(column) for column in columns]
            table = page.B.TABLE(page.B.TR(*columns))
            for zipfile in self.zipfiles_on_disk:
                table.append(zipfile.row)
            fieldset = page.fieldset("Audio Zip Files")
            fieldset.append(table)
            page.form.append(fieldset)
            rules += [
                "td, th { border-color: #bbb; }",
                "table { width: 95%; }",
            ]
        page.add_css("\n".join(rules))

    def save(self):
        """Save review results and show another form.

        If the user has not completed the review of this set, redisplay
        its review form. Otherwise, go back to the display of all the
        sets on the disk.
        """

        if not self.session.can_do(self.PERMISSION):
            self.bail("User not authorized to review term audio files")
        updates = 0
        for mp3 in self.audio_set.audio_files:
            status = self.fields.getvalue(f"status-{mp3.id}") or "U"
            note = self.fields.getvalue(f"note-{mp3.id}") or ""
            note = self.NOTEPAT.sub("\n", note.strip())[:self.MAXNOTE]
            if note != mp3.reviewer_note or status != mp3.review_status:
                mp3.update(status, note)
                updates += 1

        # If there have been any changes, commit them and refresh the set.
        if updates:
            self.logger.info("updated %d mp3 rows", updates)
            self.conn.commit()
        if self.audio_set.done:
            book_name = self.audio_set.close()
            legend = f"Audio Set {self.audio_set.name} Review Complete"
            fieldset = self.form_page.fieldset(legend)
            args = ["All of the audio files in this set have been reviewed. "]
            if book_name:
                url = self.make_url(self.script, book=book_name)
                label = "the workbook for these rejected audio files"
                link = self.form_page.B.A(label, href=url)
                args += [
                    "Some of the audio files were rejected. You can retrieve ",
                    link,
                    ", which can be used for the next round of audio files.",
                ]
            else:
                args.append(
                    "None of the files in the set were rejected, so there "
                    "is no new workbook for a subsequent round of files.")
            paragraph = self.form_page.B.P(*args)
            fieldset.append(paragraph)
            self.form_page.form.append(fieldset)
            self.audio_set = None
        else:
            if updates:
                self.subtitle = f"Saved updates for {updates} recording(s)"
        self.show_form()

    def send_book(self):
        """Serve up the new workbook with rejected audio files."""

        with open(f"{self.REVDIR}/{self.book}.xlsx", "rb") as fp:
            book_bytes = fp.read()
        mime_type = f"application/{Excel.MIME_SUBTYPE}"
        self.send_bytes(book_bytes, f"{self.book}.xlsx", mime_type)

    def send_bytes(self, payload, name, mime_type):
        """Return a binary file to the browser.

        Used by `send_book()` and `send_mp3()`.

        Pass:
            payload - the bytes to return
            name - string for the content disposition's filename
            mime_type - standard RFC6838 type/subtype string
        """

        headers = (
            f"Content-Type: {mime_type}",
            f"Content-disposition: inline; filename={name}",
            f"Content-Length: {len(payload):d}",
        )
        for header in headers:
            stdout.buffer.write(header.encode("utf-8"))
            stdout.buffer.write(b"\n")
        stdout.buffer.write(b"\n")
        stdout.buffer.write(payload)

    def send_mp3(self):
        """Let the reviewer listen to the audio file."""

        query = self.Query("term_audio_mp3 m", "m.mp3_name", "z.filename")
        query.join("term_audio_zipfile z", "z.id = m.zipfile_id")
        query.where(query.Condition("m.id", self.mp3))
        mp3_name, filename = query.execute(self.cursor).fetchone()
        with ZipFile(f"{self.ZIPDIR}/{filename}") as zipfile:
            mp3_bytes = zipfile.read(mp3_name)
        self.send_bytes(mp3_bytes, mp3_name, "audio/mpeg")

    @property
    def audio_set(self):
        """Information about the set of MP3 files being reviewed."""

        if not hasattr(self, "_audio_set"):
            self._audio_set = None
            if self.name:
                self._audio_set = AudioSet(self, name=self.name)
            elif self.id:
                self._audio_set = AudioSet(self, id=self.id)
        return self._audio_set

    @audio_set.setter
    def audio_set(self, value):
        """Allow the audio_set to be reset after review is done.

        Pass:
            value - new value for the property (will be None in this case)
        """

        self._audio_set = value

    @property
    def book(self):
        """Name of new workbook with rejected audio files.

        Used by the callback to fetch the new Excel file.
        """
        return self.fields.getvalue("book")

    @property
    def buttons(self):
        """Customize the action list (this isn't a report)."""
        if not self.audio_set:
            return self.ADMINMENU, self.LOG_OUT
        else:
            return self.SAVE, self.ADMINMENU, self.LOG_OUT

    @property
    def id(self):
        """ID of the MP3 file set's row in the database table."""
        return self.fields.getvalue("id")

    @property
    def mp3(self):
        """ID of the MP3 file the reviewer wishes to hear."""
        return self.fields.getvalue("mp3")

    @property
    def name(self):
        """File name for the selected MP3 file set to be reviewed."""
        return self.fields.getvalue("name")

    @property
    def name_counts(self):
        """Index of integers for new MP3 names.

        This is used to prevent name collisions in the event there
        are multiple Spanish names for the same term.
        """

        if not hasattr(self, "_name_counts"):
            self._name_counts = dict()
        return self._name_counts

    @property
    def subtitle(self):
        """String to be displayed under the main banner."""

        if not hasattr(self, "_subtitle"):
            self._subtitle = self.SUBTITLE
        return self._subtitle

    @subtitle.setter
    def subtitle(self, value):
        """Allow the display to be overriden after saving reviews.

        Pass:
            value - new string to be displayed under the banner
        """

        self._subtitle = value

    @property
    def user_id(self):
        """Account ID for the current CDR user."""
        return self.session.user_id

    @property
    def zipfiles(self):
        """Load the complete set of term audio zipfiles from the database.

        Does not include zipfiles which are in the file system but have
        not yet been reviewed. See the `zipfiles_on_disk` property for
        the list of all file in the zipfile directory which match our
        conventional filename pattern for audio zipfiles.

        This set contains zipfiles which are no longer in the audio
        files directory in the file system (because they have been
        archived by the scheduler file sweeper). Only those zipfiles
        which are still in the file system are shown in the list of
        zipfiles on this script's initial page.
        """

        if not hasattr(self, "_zipfiles"):

            class ZipFiles:
                """ID and name indexes to the term audio zipfiles."""
                def __init__(self, control):
                    """Save the reference to the control object.

                    Pass:
                        control - access to the DB and the HTML builder class
                    """

                    self.__control = control

                @property
                def files(self):
                    """Sequence of `ZipFile` objects."""

                    if not hasattr(self, "_files"):
                        ctrl = self.__control
                        query = ctrl.Query("term_audio_zipfile", "*")
                        rows = query.execute(ctrl.cursor).fetchall()
                        self._files = [self.ZipFile(ctrl, row) for row in rows]
                    return self._files

                @property
                def ids(self):
                    """Dictionary of zipfiles by primary key."""

                    if not hasattr(self, "_ids"):
                        ids = dict([(file.id, file) for file in self.files])
                        self._ids = ids
                    return self._ids

                @property
                def names(self):
                    """Dictionary of zipfiles by primary key."""

                    if not hasattr(self, "_names"):
                        names = dict([(f.filename, f) for f in self.files])
                        self._names = names
                    return self._names

                class ZipFile:
                    """Information about a single archive of audio files.

                    This is a simpler class than the global `AudioSet`
                    class. That class has information about the audio
                    files in the zip file. This class has just enough
                    information to meet the needs of the page which
                    displays all of the zipfiles.

                    Properties:
                        id - integer primary key for the zipfile record
                        filename - string for the zipfile's name
                        filedate - date/time stamp for the zipfile
                        complete - Boolean indicating whether reviews are done
                    """

                    PROPS = "id", "filename", "filedate", "complete"

                    def __init__(self, control, row):
                        """Capture the caller's information.

                        Pass:
                            control - access to the HTML builder class
                            row - result set row from the SQL query
                        """

                        self.__control = control
                        self.__row = row

                    def __getattr__(self, name):
                        """Return the other properties directly."""
                        return getattr(self.__row, name)

                    def __str__(self):
                        """String for debugging/logging."""

                        if not hasattr(self, "_str"):
                            names = self.PROPS
                            props = [f"{n}={getattr(self, n)}" for n in names]
                            self._str = " ".join(props)
                        return self._str

                    @property
                    def complete(self):
                        """True if all the audio files have been reviewed."""
                        return self.__row.complete == "Y"

            self.__zipfiles = ZipFiles(self)
            self._zipfiles = {}
            for zipfile in self.__zipfiles.files:
                self._zipfiles[zipfile.filename.lower()] = zipfile
        return self._zipfiles

    @property
    def zipfile_names(self):
        """Index by name of all the audio set zipfiles on the disk."""

        if not hasattr(self, "_zipfile_names"):
            self._zipfile_names = {}
            for zipfile in self.zipfiles_on_disk:
                self._zipfile_names[zipfile.key] = zipfile
        return self._zipfile_names

    @property
    def zipfiles_on_disk(self):
        """Zipfiles in the file system."""

        if not hasattr(self, "_zipfiles_on_disk"):

            class DiskFile:
                STARTED = "Started"
                UNREVIEWED = "Unreviewed"
                COMPLETED = "Completed"
                STATUS_SORT = {STARTED: 1, UNREVIEWED: 2, COMPLETED: 3}

                def __init__(self, control, entry):
                    self.__control = control
                    self.__entry = entry

                def __lt__(self, other):
                    """Sort by status then by filename."""
                    return self.sortkey < other.sortkey

                @property
                def control(self):
                    return self.__control

                @property
                def datetime(self):
                    if not hasattr(self, "_datetime"):
                        mtime = self.__entry.stat().st_mtime
                        self._datetime = datetime.fromtimestamp(mtime)
                    return self._datetime

                @property
                def db_info(self):
                    """Information about this file from the database."""
                    if not hasattr(self, "_db_info"):
                        self._db_info = self.control.zipfiles.get(self.key)
                    return self._db_info

                @property
                def name(self):
                    return self.__entry.name

                @property
                def path(self):
                    if not hasattr(self, "_path"):
                        self._path = self.__entry.path.replace("\\", "/")
                    return self._path

                @property
                def key(self):
                    if not hasattr(self, "_key"):
                        self._key = self.name.lower()
                    return self._key

                @property
                def sortkey(self):
                    "Major sort by status, subsort by filename"

                    if not hasattr(self, "_sortkey"):
                        self._sortkey = self.STATUS_SORT[self.status], self.key
                    return self._sortkey

                @property
                def status(self):
                    if not hasattr(self, "_status"):
                        if not self.db_info:
                            self._status = self.UNREVIEWED
                        elif self.db_info.complete:
                            self._status = self.COMPLETED
                        else:
                            self._status = self.STARTED
                    return self._status

                @property
                def row(self):
                    if not hasattr(self, "_row"):
                        B = self.control.HTMLPage.B
                        filename = self.__entry.name
                        if self.status != self.COMPLETED:
                            script = self.control.script
                            if self.status == self.UNREVIEWED:
                                params = dict(name=self.name)
                            else:
                                params = dict(id=self.db_info.id)
                            url = self.control.make_url(script, **params)
                            filename = B.A(filename, href=url)
                        filename = B.TD(filename)
                        status = B.TD(self.status, B.CLASS("center"))
                        modified = str(self.datetime)[:19]
                        modified = B.TD(modified, B.CLASS("center"))
                        self._row = B.TR(filename, status, modified)
                    return self._row

            files = []
            for entry in scandir(self.ZIPDIR):
                key = entry.name.lower()
                if key.startswith("week") and key.endswith(".zip"):
                    if self.NAMEPAT.match(entry.name):
                        files.append(DiskFile(self, entry))
                    else:
                        message = f"Found file {entry.name!r}."
                        self.logger.warning(message)
                        self.bail(message, extra=self.FIXNAME_INSTRUCTIONS)
            self._zipfiles_on_disk = sorted(files)
        return self._zipfiles_on_disk

Exemplo n.º 15

0

Exibir arquivo

class Control(Controller):
    """Processing logic."""

    TIER = Tier()
    SUBTITLE = "Retrieve Audio Files From CIPSFTP Server"
    LOGNAME = "FtpAudio"
    USER = "******"
    WEEK = r"^Week_\d{4}_\d\d(_Rev\d)?"
    FILE = r"\d+_e[ns]\d*"
    SSH_KEY = r"\etc\cdroperator_rsa"
    CDRSTAGING = "/sftp/sftphome/cdrstaging"
    AUDIO_DIR = f"{CDRSTAGING}/ciat/{TIER.name.lower()}/Audio"
    SOURCE_DIR = f"{AUDIO_DIR}/Term_Audio"
    TARGET_DIR = f"{TIER.basedir}/Audio_from_CIPSFTP"
    TRANSFERRED_DIR = f"{AUDIO_DIR}/Audio_Transferred"
    INSTRUCTIONS = (
        "Files which match the pattern Week_YYYY_WW.zip or "
        "Week_YYYY_WW_RevN.zip will be retrieved from the source "
        "directory on the NCI SFTP server and placed in the destination "
        "directory on the Windows CDR server. Then they will be copied "
        "(if running in test mode) or moved to a backup location on the SFTP server "
        "(referred to below as the Transferred directory). By default, "
        "retrieval of a zip file will be skipped if the file already exists "
        "on the Windows CDR server (though this can be overridden). "
        "In test mode, the retrievals will be reported but not "
        "performed. ")
    BUFSIZE = 2**15

    def populate_form(self, page):
        """Add fields to the form.

        Pass:
            page - HTMLPage object to be populated
        """

        fieldset = page.fieldset("Instructions")
        fieldset.append(page.B.P(self.INSTRUCTIONS))
        page.form.append(fieldset)
        fieldset = page.fieldset("Directories")
        fieldset.set("id", "paths")
        fieldset.append(page.text_field("source", value=self.SOURCE_DIR))
        fieldset.append(page.text_field("destination", value=self.TARGET_DIR))
        opts = dict(value=self.TRANSFERRED_DIR)
        fieldset.append(page.text_field("transferred", **opts))
        page.form.append(fieldset)
        fieldset = page.fieldset("Options")
        label = "Keep documents in 'Source' directory"
        opts = dict(value="keep", label=label)
        fieldset.append(page.checkbox("options", **opts))
        opts = dict(value="test", label="Run in test mode")
        fieldset.append(page.checkbox("options", **opts))
        label = "Overwrite files in 'Destination' directory if they already exist"
        opts = dict(value="overwrite", label=label)
        fieldset.append(page.checkbox("options", **opts))
        page.form.append(fieldset)
        page.add_css("fieldset {width:600px} #paths input {width:400px}")

    def build_tables(self):
        """Perform the retrievals and report the processing outcome."""

        if not self.session.can_do("AUDIO DOWNLOAD"):
            self.bail("Not authorized")
        self.logger.info("Running in %s mode", self.mode)
        lines = [
            f"Processing mode: {self.mode}",
            f"Source directory: {self.source_dir}",
            f"Destination directory: {self.destination_dir}",
            f"Transferred directory: {self.transferred_dir}",
        ]
        if not self.zipfiles:
            lines.append("No zip files found to be transferred")
        else:
            errors = []
            for name in self.zipfiles:
                ### pass  ###
                errors += self.check_mp3_paths(name)
            if errors:
                lines += errors
                lines.append("Retrieval aborted by failed MP3 path checks")
            else:
                for name in self.zipfiles:
                    lines += self.retrieve(name)
        for name in self.rejected:
            lines.append(f"Skipped {name}")
        rows = [[line] for line in lines]
        caption = "Processing Results"
        return self.Reporter.Table(rows, caption=caption)

    def retrieve(self, name):
        """Transfer zipfile if appropriate and possible.

        Pass:
            name - string for the name of the zipfile to transfer

        Return:
           array of strings for the processing results table
        """

        source = f"{self.source_dir}/{name}"
        target = f"{self.destination_dir}/{name}"
        retrieve = not self.test
        if name.lower() in self.already_transferred:
            if self.overwrite:
                line = f"Retrieved {name}, overwriting file at destination"
            else:
                line = f"Skipping {name}, which already exists at destination"
                retrieve = False
        else:
            line = f"Retrieved {name}"
        failed = False

        if retrieve:
            try:
                with self.connection.open_sftp() as sftp:
                    sftp.get(source, target)
            except Exception as e:
                self.logger.exception("Retrieving %s", source)
                line = f"Failed retrieval of {name}: {e}"
                failed = True

            process = run_command(f"fix-permissions {target}")
            if process.stderr:
                self.bail(f"Unable to fix permissions for {target}",
                          extra=[process.stderr])

        lines = [line]

        # Copy or move the source files to a backup location on the FTP server
        # There are several different scenarios:
        # a) The specific file to be copied already exists
        #    If file already exists in transfer directory first move the
        #    existing file to a backup location (adding time stamp to file name)
        # b) Running in Test or Live mode
        #    In test mode files are always copied
        #    In live mode files are moved unless option to keep source is specified
        # c) Setting option to keep files in source directory
        #    In test mode files are always kept in source directory
        #    In live mode files are moved unless option to keep source is specified
        #
        #    File exists   Test/Live   Keep Y/N   Action
        #    ---------------------------------------------
        #      N           Test           N        copy
        #      N           Test           Y        copy
        #      N           Live           N        move
        #      N           Live           Y        copy
        #
        #      Y           Test           N        move backup, then copy
        #      Y           Test           Y        move backup, then copy
        #      Y           Live           N        move backup, then move
        #      Y           Live           Y        move backup, then copy
        # -------------------------------------------------------------------------
        ### if not failed and not self.keep:
        if not failed:
            target = f"{self.transferred_dir}/{name}"
            program = "cp"

            # Check if target file already exists. Move to backup location
            ls_cmd = f"ls {target}"
            stdin, stdout, stderr = self.connection.exec_command(ls_cmd)
            ls_error = stderr.readlines()

            mode_flag = "T" if self.test else "L"

            # File already exists if ls command succeeds
            if not ls_error:
                self.logger.info(
                    f"Found existing file {target.split('/')[-1]}")
                backup = f"{target}-{mode_flag}-{self.stamp}"
                self.logger.info(f"Create backup file {backup.split('/')[-1]}")
                cmd = f"mv {target} {backup}"
                stdin, stdout, stderr = self.connection.exec_command(cmd)
                errors = stderr.readlines()
                if errors:
                    lines.append(f"Errors moving existing file {target}")
                    self.logger.info(errors)

            if not self.test and not self.keep: program = "mv"

            cmd = f"{program} {source} {target}"

            stdin, stdout, stderr = self.connection.exec_command(cmd)
            errors = stderr.readlines()
            if errors:
                if self.test:
                    lines.append(f"Errors copying {name} to {target}")
                else:
                    lines.append(f"Errors moving {name} to {target}")
                lines += errors
            elif self.test:
                lines.append(f"Copied {name} to Transferred directory")
                self.logger.info(f"Copied {name} to {target}")
            else:
                action = "Copied" if self.keep else "Moved"
                lines.append(f"{action} {name} to Transferred directory")
                self.logger.info(f"{action} {name} to {target}")
        return lines

    def check_mp3_paths(self, filename):
        """Make sure the spreadsheet and zip file MP3 paths match.

        Also ensures that the paths follow the pattern convention
        established for the audio files.

        Pass:
           filename - string for the name of the zipfile to inspect

        Return:
           Possibly empty sequence of error strings
        """

        with self.connection.open_sftp() as sftp:
            zip_path = f"{self.source_dir}/{filename}"
            with sftp.open(zip_path, bufsize=self.BUFSIZE) as fp:
                zipfile = ZipFile(BytesIO(fp.read()))
        self.logger.info("Verifying MP3 paths in %s", zip_path)
        mp3_paths = set()
        col_paths = set()
        errors = []
        for name in zipfile.namelist():
            normalized = name.lower()
            if "macosx" not in normalized:
                if normalized.endswith(".mp3"):
                    mp3_paths.add(name)
                elif normalized.endswith(".xlsx"):
                    opts = dict(read_only=True, data_only=True)
                    book = load_workbook(BytesIO(zipfile.read(name)), **opts)
                    sheet = book.active
                    headers = True
                    for row in sheet:
                        if headers:
                            headers = False
                        else:
                            try:
                                value = row[4].value
                                if not isinstance(value, str):
                                    errors.append("Missing MP3 path")
                                else:
                                    col_paths.add(value)
                            except:
                                errors.append("Missing MP3 path")
        all_paths = mp3_paths | col_paths
        for path in all_paths:
            if not self.member_pattern.match(path):
                errors.append(f"{filename} has invalid MP3 path format {path}")
        missing = col_paths - mp3_paths
        for path in missing:
            errors.append(f"{filename} does not contain {path}")
        unused = mp3_paths - col_paths
        for path in unused:
            errors.append(f"{filename} has unused MP3 file {path}")
        return errors

    @property
    def connection(self):
        """Connection to the SFTP server."""

        if not hasattr(self, "_connection"):
            self._connection = paramiko.SSHClient()
            policy = paramiko.AutoAddPolicy()
            self._connection.set_missing_host_key_policy(policy)
            pkey = paramiko.RSAKey.from_private_key_file(self.SSH_KEY)
            opts = dict(hostname=self.server, username=self.USER, pkey=pkey)
            self.logger.info("Connecting to %s ...", self.server)
            self._connection.connect(**opts)
            self.logger.info("Connected")
        return self._connection

    @property
    def destination_dir(self):
        """Directory to which we copy the audio zip archives."""

        if not hasattr(self, "_destination_dir"):
            directory = self.fields.getvalue("destination")
            if not os.path.exists(directory):
                try:
                    os.mkdir(directory)
                except Exception as e:
                    self.logger.exception("Creating %s", directory)
                    self.bail(e)
            self.logger.info("Destination directory: %s", directory)
            self._destination_dir = directory
        return self._destination_dir

    @property
    def keep(self):
        """If True, don't move files to transferred directory."""
        return "keep" in self.options

    @property
    def mode(self):
        """One of 'test' or 'live' values."""
        return "test" if self.test else "live"

    @property
    def names(self):
        """All the file names found in the source directory."""

        if not hasattr(self, "_names"):
            command = f"ls {self.SOURCE_DIR}/*"
            self.logger.info("Running %s", command)
            stdin, stdout, stderr = self.connection.exec_command(command)
            self._names = []
            for name in stdout.readlines():
                self._names.append(name.split("/")[-1].strip())
        return self._names

    @property
    def already_transferred(self):
        """Zipfiles which already exist in the destination directory."""

        if not hasattr(self, "_already_transferred"):
            os.chdir(self.destination_dir)
            names = glob("*.zip")
            self.logger.info("Destination dir has %s", names)
            self._already_transferred = set([name.lower() for name in names])
        return self._already_transferred

    @property
    def options(self):
        """Overrides of runtime defaults."""
        if not hasattr(self, "_options"):
            self._options = self.fields.getlist("options")
        return self._options

    @property
    def overwrite(self):
        """Boolean indicating whether it is OK to overwrite destination files."""
        return "overwrite" in self.options

    @property
    def pattern(self):
        """Files we want will match this regular expression."""

        if not hasattr(self, "_pattern"):
            self._pattern = re.compile(f"^{self.WEEK}.zip$")
        return self._pattern

    @property
    def member_pattern(self):
        """Members of zip files must match this regular expression."""

        if not hasattr(self, "_member_pattern"):
            self._member_pattern = re.compile(f"^{self.WEEK}/{self.FILE}.mp3$")
        return self._member_pattern

    @property
    def rejected(self):
        """File names which don't match our naming convention.

        We don't have to do anything but reference the `zipfiles`
        property, which takes care of populating both its own
        property and this one.
        """

        if self.zipfiles and not hasattr(self, "_rejected"):
            self.bail("Internal error")
        return self._rejected

    @property
    def server(self):
        """Local name of the SFTP server."""

        if not hasattr(self, "_server"):
            self._server = self.session.tier.hosts["SFTP"].split(".")[0]
        return self._server

    @property
    def source_dir(self):
        """Directory from which we copy the audio zip archives."""

        if not hasattr(self, "_source_dir"):
            self._source_dir = self.fields.getvalue("source")
            self.logger.info("Source directory: %s", self._source_dir)
        return self._source_dir

    @property
    def stamp(self):
        """String used to name files moved in test mode."""

        if not hasattr(self, "_stamp"):
            self._stamp = self.started.strftime("%Y%m%d%H%M%S")
        return self._stamp

    @property
    def test(self):
        """Are we testing the waters instead of running in live mode?"""
        return "test" in self.options

    @property
    def transferred_dir(self):
        """Directory where source files are moved after being transferred."""

        if not hasattr(self, "_transferred_dir"):
            directory = self.fields.getvalue("transferred")
            self.logger.info("Transferred directory: %s", directory)
            self._transferred_dir = directory
        return self._transferred_dir

    @property
    def zipfiles(self):
        """Names of files to be transferred."""

        if not hasattr(self, "_zipfiles"):
            zipfiles = []
            rejected = []
            for name in self.names:
                if self.pattern.match(name):
                    zipfiles.append(name)
                else:
                    rejected.append(name)
            self._zipfiles = zipfiles
            if not hasattr(self, "_rejected"):
                self._rejected = rejected
            if not zipfiles:
                self.logger.warning("No audio archive files found to transfer")
            else:
                self.logger.info("%d audio archive files found to transfer",
                                 len(zipfiles))
            for name in zipfiles:
                self.logger.info(name)
            if rejected:
                self.logger.warning("Ignored files: %r", rejected)
        return self._zipfiles

Exemplo n.º 16

0

Exibir arquivo

Arquivo: glossifier_refresh.py Projeto: NCIOCPL/cdr-scheduler

class Terms:

    SERVER = socket.gethostname().split(".")[0]
    SENDER = "cdr@{}.nci.nih.gov".format(SERVER.lower())
    SUBJECT = "DUPLICATE GLOSSARY TERM NAME MAPPINGS ON " + SERVER.upper()
    UNREPORTED = set() # see OCECDR-4795 set(["tpa", "cab", "ctx", "receptor"])
    GROUP = "glossary-servers"

    def __init__(self, logger=None, recip=None):
        """
        Collect the glossary term information.

        Pass:
            logger - the scheduled job's logger (unless testing from the
                     command line)
            recip - optional email address for testing without spamming
                    the users
        """

        self.tier = Tier()
        self.logger = logger
        self.recip = recip
        if self.logger is None:
            self.logger = cdr.Logging.get_logger("glossifier", level="debug")
        self.conn = db.connect()
        self.cursor = self.conn.cursor()

    def save(self):
        """
        Store the serialized name information in the database.
        """

        names = repr(self.names)
        self.logger.info("saving glossifier names (%d bytes)", len(names))
        self.cursor.execute("""\
            UPDATE glossifier
               SET refreshed = GETDATE(),
                   terms = ?
             WHERE pk = 1""", names)
        self.conn.commit()

    def send(self):
        """
        Send the glossary information to registered Drupal CMS servers
        """

        failures = []
        success = "Sent glossary to server %r at %s"
        failure = "Failure sending glossary to server %r at %s: %s"
        for alias, base in self.servers.items():
            url = "{}/pdq/api/glossifier/refresh".format(base)
            try:
                response = requests.post(url, json=self.data, auth=self.auth)
                if response.ok:
                    self.logger.info(success, alias, base)
                else:
                    args = alias, base, response.reason
                    self.logger.error(failure, *args)
                    failures.append(args)
            except Exception as e:
                args = alias, base, e
                self.logger.exception(failure, *args)
                failures.append(args)
        if failures:
            group = "Developers Notification"
            if self.recip:
                recips = [self.recip]
            else:
                recips = Job.get_group_email_addresses(group)
            if not recips:
                raise Exception("no recips found for glossary failure message")
            tier = self.tier.name
            subject = "[{}] Failure sending glossary information".format(tier)
            lines = []
            for args in failures:
                lines.append("Server {!r} at {}: {}".format(*args))
            body = "\n".join(lines)
            opts = dict(subject=subject, body=body)
            message = cdr.EmailMessage(self.SENDER, recips, **opts)
            self.logger.error("send failure notice sent to %r", recips)

    @property
    def auth(self):
        """
        Basic authorization credentials pair for Drupal CMS servers
        """

        if not hasattr(self, "_auth"):
            password = self.tier.password("PDQ")
            if not password:
                raise Exception("Unable to find PDQ CMS credentials")
            self._auth = "PDQ", password
        return self._auth

    @property
    def concepts(self):
        """
        Dictionary information for the term concepts.
        """

        if not hasattr(self, "_concepts"):

            class Concept:
                """
                CDR GlossaryTermConcept document.

                Attributes:
                  - id: integer for the document's CDR ID
                  - dictionaries: English and Spanish dictionaries
                                  for which we have definitions
                """

                def __init__(self, doc_id):
                    self.id = doc_id
                    self.dictionaries = dict(en=set(), es=set())

            self._concepts = {}
            tags = dict(en="TermDefinition", es="TranslatedTermDefinition")
            for lang in tags:
                path = "/GlossaryTermConcept/{}/Dictionary".format(tags[lang])
                query = db.Query("query_term_pub", "doc_id", "value")
                query.where(query.Condition("path", path))
                rows = query.execute(self.cursor).fetchall()
                self.logger.debug("fetched %d %s dictionaries", len(rows), lang)
                for doc_id, dictionary in rows:
                    concept = self._concepts.get(doc_id)
                    if not concept:
                        concept = self._concepts[doc_id] = Concept(doc_id)
                    concept.dictionaries[lang].add(dictionary.strip())
        return self._concepts

    @property
    def data(self):
        """
        JSON-serializable glossary data for the Drupal CMS servers

        JSON can't deal with sets, so we transform the sets of
        dictionaries into plain lists.
        """

        if not hasattr(self, "_data"):
            names = dict()
            for name, docs in self.names.items():
                names[name] = dict()
                for doc_id, languages in docs.items():
                    names[name][doc_id] = dict()
                    for language, dictionaries in languages.items():
                        names[name][doc_id][language] = list(dictionaries)
            self._data = names
        return self._data

    @property
    def extra_names(self):
        """Fetch variant names from the external_map table."""

        if not hasattr(self, "_extra_names"):
            self._extra_names = {}
            for langcode in Term.USAGES:
                query = db.Query("external_map m", "m.value", "m.doc_id")
                query.join("external_map_usage u", "u.id = m.usage")
                query.where(query.Condition("u.name", Term.USAGES[langcode]))
                rows = query.execute(self.cursor).fetchall()
                args = len(rows), langcode
                self.logger.debug("fetched %d extra %s names", *args)
                names = {}
                for name, doc_id in rows:
                    if doc_id not in names:
                        names[doc_id] = [name]
                    else:
                        names[doc_id].append(name)
                self._extra_names[langcode] = names
        return self._extra_names

    @property
    def names(self):
        """
        Dictionary of name information used by the glossifier.

        Only unique usage information is included in the returned dictionary.
        Duplicate usage is stored in the `dups` attribute as a side effect
        of this method, so that they can be reported via email notification.
        There are a handful of unreported duplicates which CIAT has decided
        not to eliminate.

        Return:
            nested dictionary indexed by normalized name strings:
                names[normalized-name][doc_id][language] => set of dictionaries
        """

        if not hasattr(self, "_names"):
            self.dups = dict()
            names = dict()
            for key in self.usages:
                name, language, dictionary = key
                ids = list(self.usages[key])
                if len(ids) > 1:
                    if name not in self.UNREPORTED:
                        self.dups[key] = ids
                else:
                    doc_id = ids[0]
                    if name not in names:
                        names[name] = {}
                    if doc_id not in names[name]:
                        names[name][doc_id] = {}
                    if language not in names[name][doc_id]:
                        names[name][doc_id][language] = set()
                    if dictionary is not None:
                        names[name][doc_id][language].add(dictionary)
            self._names = names
        return self._names

    @property
    def servers(self):
        """
        Servers who receive scheduled updated glossary data

        This property is a dictionary of each server's base URL,
        indexed by a unique alias.

        The servers are stored in the CDR control table. Each server
        gets a row in the table, with `GROUP` as the value of the `grp`
        column, and a unique alias for the server stored in the `name`
        column. The URL for the server is stored in the `val` column.

        If no servers are found in the table, then fetch the
        DRUPAL CMS with which this tier is associated, and
        use the alias "Primary" for the server.
        """

        if not hasattr(self, "_servers"):
            self._servers = cdr.getControlGroup(self.GROUP)
            if not self._servers:
                server = self.tier.hosts.get("DRUPAL")
                self._servers = dict(Primary="https://{}".format(server))
        return self._servers

    @property
    def usages(self):
        """
        Published glossary term name documents.

        Property value is a dictionary indexed by a tuple containing:
          - normalized term name string
          - language ("en" or "es")
          - dictionary (e.g., "Cancer.gov"; None if no dictionaries
                        assigned for this language)
        The values of the dictionaries are sequence of glossary term
        name documents which are found for the tuple's values. In order
        to be usable by the glossifier, each value must be unique
        (that is, the sequence must have exactly one term name doc ID).
        """

        if not hasattr(self, "_usages"):

            # Start with an empty usages dictionary.
            self._usages = {}

            # Get the dictionary of Concept object with dictionary information.
            concepts = self.concepts
            self.logger.debug("fetched %d concepts", len(concepts))

            # Fetch all of the published CDR glossary term documents.
            columns = "v.id", "v.xml", "q.int_val"
            joins = (
                ("pub_proc_doc d", "d.doc_id = v.id", "d.doc_version = v.num"),
                ("pub_proc_cg c", "c.id = v.id", "c.pub_proc = d.pub_proc"),
                ("query_term_pub q", "q.doc_id = v.id"),
            )
            path = "/GlossaryTermName/GlossaryTermConcept/@cdr:ref"
            query = db.Query("doc_version v", *columns)
            for args in joins:
                query.join(*args)
            query.where(query.Condition("q.path", path))
            rows = query.execute(self.cursor).fetchall()
            self.logger.debug("processing %d glossary terms", len(rows))

            # Use the term information to populate the usages dictionary.
            for term_id, doc_xml, concept_id in rows:
                term = Term(self, term_id, doc_xml, concepts.get(concept_id))
                term.record_usages(self._usages)

        return self._usages

    def report_duplicates(self):
        """
        Send a report on duplicate name+language+dictionary mappings.
        """

        if not self.dups:
            self.logger.error("no duplicates to report")
            return
        if self.recip:
            recips = [self.recip]
        else:
            recips = Job.get_group_email_addresses("GlossaryDupGroup")
        if not recips:
            raise Exception("no recipients found for glossary dup message")
        body = ["The following {:d} sets of ".format(len(self.dups)),
                "duplicate glossary mappings were found in the CDR ",
                "on {}. ".format(self.SERVER.upper()),
                "Mappings for any phrase + language + dictionary must ",
                "be unique. ",
                "Please correct the data so that this requirement is met. ",
                "You may need to look at the External Map Table for ",
                "Glossary Terms to find some of the mappings.\n"]
        template = "\n{} (language={!r} dictionary={!r})\n"
        for key in sorted(self.dups):
            name, language, dictionary = key
            args = name.upper(), language, dictionary
            body.append(template.format(*args))
            for doc_id in self.dups[key]:
                body.append("\tCDR{:010d}\n".format(doc_id))
        body = "".join(body)
        opts = dict(subject=self.SUBJECT, body=body)
        message = cdr.EmailMessage(self.SENDER, recips, **opts)
        message.send()
        self.logger.info("duplicate mapping notification sent to %r", recips)

Exemplo n.º 17

0

Exibir arquivo

FS_LOGGER = None # supplied later by the FileSweeper object.

# Don't go wild creating output files
MAX_OUTPUT_FILES_WITH_ONE_NAME = 5

# Size for read/write
BLOCK_SIZE = 4096

# Date constants, YEARS_OLD is max time we'll look back, sanity check
DAY_SECS  = 86400
YEAR_DAYS = 365.25
YEARS_OLD = 10
LONG_TIME = DAY_SECS * YEAR_DAYS * YEARS_OLD

# Where are we running?
TIER = Tier().name


class FileSweeper(Job):
    """
    Adapter to allow the overall file clean up task to be driven
    from the CDR scheduler.

    Required jobParam fields:
        ConfigFile  Full or relative path to configuration file.

    Optional jobParam fields:
        TestMode    Boolean value. Create output files but delete nothing.
                    (default False)
        Email       Alternate email list for fatal error msgs.
                    If more than one address, use '+' as separator, no spaces.

Exemplo n.º 18

0

Exibir arquivo

Arquivo: dictionary_loader.py Projeto: NCIOCPL/cdr-lib

    def tier(self):
        """Which CDR server are we using?"""

        if not hasattr(self, "_tier"):
            self._tier = Tier(self.opts.get("tier"))
        return self._tier