示例#1
0
    def run(self):
        if self.args.subName == "run":
            condDict = MU.parseExperimentCSV(self.args.infile)
            dataDict = {
                "Analysis": STANDARD if self.args.atype == "Standard" else MINIMAL,
                "Conditions": OrderedDict([(k, condDict[k]) for k in sorted(condDict.keys())]),
                "Description": self.args.desc,
                "Hypothesis": self.args.hyp,
                "RunBy": subprocess.Popen("whoami", stdout=subprocess.PIPE, shell=True).stdout.read().strip(),
                "Tags": ["PrimaryRetraining"],
                "Title": self.args.title,
            }

            mProjectR = MProjectFactory.getMProject("Runable", self.mDB)
            mProjectR.setFromDict(dataDict)
            mProjectR.register()
            print "Registered Milhouse Project %s" % mProjectR.ID
        elif self.args.subName == "status":
            mProject = self.mDB.getMProjectFromID(self.args.id)
            sDict = mProject.getStatusDict()
            print "Milhouse Project %d: %s [by: %s]" % (self.args.id, mProject.title, mProject.runby)
            print "Status: %s" % sDict["Status"]
            print "Martin: %.1f%%" % sDict["Conditions"]
            print "Analysis-Plots: %.1f%%" % sDict["Analysis"]["Plots"]
            print "Analysis-Tables: %.1f%%" % sDict["Analysis"]["Tables"]
示例#2
0
    def _validateCSV(self, csvFN):
        try:
            data = MU.getRecArrayFromCSV(self.args.infile, caseSensitive=True)
            CSVF_DEFAULT = MU.getCSVF("MartinJobID" in data.dtype.names, withExtras=True)

            # Check for correct usage of MartinType
            if "MartinType" in data.dtype.names:
                if not set(n.unique(data["MartinType"])).issubset(MU.MARTIN_ROOT.keys()):
                    msg = "Invalid MartinType value, allowed values are = [%s]" % ", ".join(MU.MARTIN_ROOT.keys())
                    logging.error(msg)
                    sys.exit(0)
                elif "smrtportal" in data["MartinType"]:
                    if "MartinJobID" not in data.dtype.names:
                        msg = "Smrportal conditions can not be run at this time and thus require a populated MartinJobID column"
                        logging.error(msg)
                        sys.exit(0)
            else:
                data = MU.addColumnToRecArray(
                    data, [MU.MARTIN_ROOT.keys()[0]] * len(data), ("MartinType", "|S11"), tail=True
                )

            # Check for unpopulated default columns
            wrngclmns = filter(lambda x: n.dtype(x[1]) == n.dtype(bool) and x[0] in MU.CSVF_ALL, data.dtype.descr)
            if wrngclmns:
                msg = "Incorrectly formatted CSV file:\n Column(s) [%s] have not been populated" % ", ".join(
                    [c[0] for c in wrngclmns]
                )
                logging.error(msg)
                sys.exit(0)

            # Check if the file contains the correct default column names
            if filter(lambda x: x not in data.dtype.names, CSVF_DEFAULT):
                msg = "Incorrectly formatted CSV file:\n Missing default column names from %s" % CSVF_DEFAULT
                logging.error(msg)
                sys.exit(0)

            # Check for correct naming of conditions
            if filter(lambda x: re.findall(r"[^A-Za-z0-9_\.\-]", x), data["Name"]):
                msg = "Incorrectly formatted CSV file:\n Condition names can only contain: alphanumeric characters, dashes (-), underscores (_) and dots (.)"
                logging.error(msg)
                sys.exit(0)

            # Check if the non-default columns have a p_ prefix
            extras = filter(lambda x: x not in MU.CSVF_ALL, data.dtype.names)
            if filter(lambda x: x[:2] != "p_", extras):
                msg = 'Incorrectly formatted CSV file:\n Extra parameters need to be named using a "p_" prefix'
                logging.error(msg)
                sys.exit(0)

            # Check if workflow provided exists in martin's list of workflows
            if "smrtportal" not in data["MartinType"]:
                mWkflowNames = self.mDBS.getMartinWkflowDict().keys()
                if filter(lambda x: x not in mWkflowNames, n.unique(data["MartinWorkflow"])):
                    msg = "Unsupported Martin Workflow name provided."
                    logging.error(msg)
                    sys.exit(0)

            # Check if reference sequence provided exists in the reference repository
            if "MartinRefSeq" in data.dtype.names:
                wrongrefseqs = set(
                    filter(
                        lambda x: not glob.glob("%s/%s" % (MU.MARTIN_REFREPOS[x["MartinType"]], x["MartinRefSeq"])),
                        data,
                    )
                )
                if wrongrefseqs:
                    msg = "The following reference sequence names are invalid: [%s]." % ",".join(wrongrefseqs)
                    logging.error(msg)
                    sys.exit(0)

            # Check for correctness MartinJobID values
            if "MartinJobID" in data.dtype.names:
                wrnglens = set(filter(lambda x: x != 6, map(lambda x: len(str(x)), data["MartinJobID"])))
                if (
                    wrnglens
                    and len(wrnglens) == 1
                    and wrnglens.issubset([5])
                    and "smrtportal" not in data["MartinType"]
                ):
                    msg = "Invalid MartinJobID lengths supplied:\n If these are smrtportal jobs, you need to set MartinType to smrtportal"
                    logging.error(msg)
                    sys.exit(0)
                elif wrnglens and not wrnglens.issubset([5]):
                    msg = (
                        "Invalid MartinJobID lengths supplied:\n Martin expects length == 6 and smrtportal length => 5"
                    )
                    logging.error(msg)
                    sys.exit(0)

            # Check whether primary folder names are contained within the given run codes
            if set(["RunCodes", "PrimaryFolder"]).issubset(data.dtype.names):
                for row in data:
                    if len(row["RunCodes"].split("-")) == 2:
                        exp, run = row["RunCodes"].split("-")
                        if not glob.glob("/mnt/data*/vol*/%s/%s/%s" % (exp, run, row["PrimaryFolder"])):
                            msg = "Run code [%s] does not contain primary folder [%s]." % (
                                row["RunCodes"],
                                row["PrimaryFolder"],
                            )
                            logging.error(msg)
                            sys.exit(0)

            # Check for uniqueness of column values within conditions
            for cond in n.unique(data["Name"]):
                sl_data = data[data["Name"] == cond]
                if filter(
                    lambda x: len(n.unique(sl_data[x])) != 1, [k for k in sl_data.dtype.names if k != "RunCodes"]
                ):
                    msg = "For condition name=%s some of the attributes are NOT unique" % cond
                    logging.error(msg)
                    sys.exit(0)

        except ValueError as err:
            msg = "Incorrectly formatted CSV file:\n %s" % err
            logging.error(msg)
            sys.exit(0)
示例#3
0
MINIMAL = ["tSummary", "readlength", "accuracy", "yield"]

# Set MILHOUSE_HOME environment variable
os.environ["MILHOUSE_HOME"] = os.path.join("/mnt/secondary/Share/Milhouse", server)

# Get configuration info and check for MILHOUSE_HOME setting
if not os.environ.get("MILHOUSE_HOME"):
    print "Environment variable MILHOUSE_HOME is not set! Exiting..."
    sys.exit(1)
else:
    CONFDICT = os.path.join(os.environ.get("MILHOUSE_HOME"), "config", "milhouse.conf")
    if not os.path.isfile(CONFDICT):
        print "milhouse.conf stored at [%s] does not exist! Exiting..." % CONFDICT
        sys.exit(1)
    else:
        CONFDICT = MU.parseMilhouseConf(CONFDICT)
        print "Submitting Milhouse analysis project to %s server" % CONFDICT["MDB_TYPE"]

# Tool for running Milhouse Jobs from command line
class ToolRunner(object):
    def __init__(self):
        self.mDB = MDBCFactory.getMDBController(
            "data", mdbServer=CONFDICT["MDB_SERVER"], mdbPort=CONFDICT["MDB_PORT"], mlDataDir=CONFDICT["ML_DATADIR"]
        )
        self.mDBS = self.mDB.getMDBExtra()[0]
        self._parseOptions()
        self._setupLogging()

    def _parseOptions(self):
        desc = [
            "Tool for running and managing Milhouse projects from the command line.",