"json_field": str
}
GITHUB_SOURCE = {"provider": 'github', "properties": GITHUB_SOURCE_PROPS}

# S3 Source
S3_SOURCE_PROPS = {
    "account_id": And(Use(int), lambda n: len(str(n)) == 12),
    "bucket_name": str,
    "object_key": str
}
S3_SOURCE = {"provider": 's3', "properties": S3_SOURCE_PROPS}

# CodeBuild
CODEBUILD_PROPS = {
    Optional("image"): str,
    Optional("size"): Or('small', 'medium', 'large'),
    Optional("spec_filename"): str,
    Optional("environment_variables"): {
        Optional(str): Or(str, bool, int, object)
    },
    Optional("role"): str,
    Optional("timeout"): int,
    Optional("privileged"): bool,
    Optional("spec_inline"): str
}
DEFAULT_CODEBUILD_BUILD = {
    Optional("provider"): 'codebuild',
    Optional("enabled"): bool,
    Optional("properties"): CODEBUILD_PROPS
}
STAGE_CODEBUILD_BUILD = {
示例#2
0
 def spook_schema(cls) -> dict:
     return {"a": Or(None, int), "b": Or(None, str), "c": Or(None, bool)}
示例#3
0
import logging

from concurrent.futures import ThreadPoolExecutor
from schema import Or
from tornado import httpclient
from tornado.web import RequestHandler
from tornado.web import gen

from error_code import ERR_UNKNOWN, ERR_NO_CONTENT, ERR_ARG, ERR_MULTIPLE_OBJ_RETURNED, ERR_DUPLICATE_ENTRY
from tools_lib.gtornado.http_code import (
    HTTP_200_OK, HTTP_204_NO_CONTENT, HTTP_201_CREATED, HTTP_400_BAD_REQUEST,
    HTTP_422_UNPROCESSABLE_ENTITY, HTTP_403_FORBIDDEN,
    HTTP_500_INTERNAL_SERVER_ERROR, HTTP_401_UNAUTHORIZED)
from . import async_requests

STR_OR_UNICODE = Or(str, unicode)
executor = ThreadPoolExecutor(8)


class RedirectedHTTPResponse(object):
    def __init__(self, response):
        self.request = response.request
        self.code = response.code
        self.reason = response.reason
        self.headers = response.headers
        self.effective_url = response.effective_url
        self.buffer = response.buffer
        self.body = response.body
        self.error = response.error
        self.request_time = response.request_time
        self.time_info = response.time_info
示例#4
0
文件: base.py 项目: sandeep937/dvc
class OutputBase(object):
    IS_DEPENDENCY = False

    REMOTE = None

    PARAM_PATH = 'path'
    PARAM_CACHE = 'cache'
    PARAM_METRIC = 'metric'
    PARAM_METRIC_TYPE = 'type'
    PARAM_METRIC_XPATH = 'xpath'

    METRIC_SCHEMA = Or(
        None, bool, {
            Optional(PARAM_METRIC_TYPE): Or(str, None),
            Optional(PARAM_METRIC_XPATH): Or(str, None)
        })

    DoesNotExistError = OutputDoesNotExistError
    IsNotFileOrDirError = OutputIsNotFileOrDirError

    def __init__(self,
                 stage,
                 path,
                 info=None,
                 remote=None,
                 cache=True,
                 metric=False):
        self.stage = stage
        self.project = stage.project
        self.url = path
        self.info = info
        self.remote = remote or self.REMOTE(self.project, {})
        self.use_cache = False if self.IS_DEPENDENCY else cache
        self.metric = False if self.IS_DEPENDENCY else metric

        if self.use_cache and getattr(self.project.cache,
                                      self.REMOTE.scheme) is None:
            raise DvcException(
                "no cache location setup for '{}' outputs.".format(
                    self.REMOTE.scheme))

    def __repr__(self):
        return "{class_name}: '{url}'".format(class_name=type(self).__name__,
                                              url=(self.url or 'No url'))

    def __str__(self):
        return self.url

    @classmethod
    def match(cls, url):
        return re.match(cls.REMOTE.REGEX, url)

    def group(self, name):
        match = self.match(self.url)
        if not match:
            return None
        return match.group(name)

    @classmethod
    def supported(cls, url):
        return cls.match(url) is not None

    @property
    def scheme(self):
        return self.REMOTE.scheme

    @property
    def path(self):
        return self.path_info['path']

    @property
    def sep(self):
        return '/'

    @property
    def exists(self):
        return self.remote.exists(self.path_info)

    def changed(self):
        if not self.exists:
            return True

        if not self.use_cache:
            return self.info != self.remote.save_info(self.path_info)

        return getattr(self.project.cache,
                       self.scheme).changed(self.path_info, self.info)

    def status(self):
        if self.changed():
            # FIXME better msgs
            return {str(self): 'changed'}
        return {}

    def save(self):
        if not self.use_cache:
            self.info = self.remote.save_info(self.path_info)
        else:
            self.info = getattr(self.project.cache,
                                self.scheme).save(self.path_info)

    def dumpd(self):
        ret = self.info.copy()
        ret[self.PARAM_PATH] = self.url

        if self.IS_DEPENDENCY:
            return ret

        ret[self.PARAM_CACHE] = self.use_cache

        if isinstance(self.metric, dict):
            if self.PARAM_METRIC_XPATH in self.metric \
               and not self.metric[self.PARAM_METRIC_XPATH]:
                del self.metric[self.PARAM_METRIC_XPATH]

        ret[self.PARAM_METRIC] = self.metric

        return ret

    def download(self, to_info):
        self.remote.download([self.path_info], [to_info])

    def checkout(self, force=False):
        if not self.use_cache:
            return

        getattr(self.project.cache, self.scheme).checkout(self.path_info,
                                                          self.info,
                                                          force=force)

    def remove(self, ignore_remove=False):
        self.remote.remove(self.path_info)
        if self.scheme != 'local':
            return

        if ignore_remove and self.use_cache and self.is_local:
            self.project.scm.ignore_remove(self.path)

    def move(self, out):
        if self.scheme == 'local' and self.use_cache and self.is_local:
            self.project.scm.ignore_remove(self.path)

        self.remote.move(self.path_info, out.path_info)
        self.url = out.url
        self.path_info = out.path_info
        self.save()

        if self.scheme == 'local' and self.use_cache and self.is_local:
            self.project.scm.ignore(self.path)
示例#5
0
def nullable(schema):
    """
    Create new schema that allows the supported schema or None.
    """
    return Or(schema, None)
示例#6
0
app = Flask(__name__)
CORS(app)
model = pickle.load(open('model.pkl', 'rb'))

PREDICT_SCHEMA = Schema({
    'CHARGE_COUNT': int,
    'CHARGE_DISPOSITION': And(str, len),
    'OFFENSE_CATEGORY': And(str, len),
    'PRIMARY_CHARGE_FLAG': bool,
    'DISPOSITION_CHARGED_OFFENSE_TITLE': And(str, len),
    'DISPOSITION_CHARGED_CLASS': And(str, len),
    'SENTENCE_JUDGE': And(str, len),
    'SENTENCE_PHASE': And(str, len),
    'COMMITMENT_TERM': And(str, len),
    'COMMITMENT_UNIT': And(str, len),
    'LENGTH_OF_CASE_in_Days': Or(float, int),
    'AGE_AT_INCIDENT': Or(float, int),
    'RACE': And(str, len),
    'GENDER': And(str, len),
    'INCIDENT_CITY': And(str, len),
    'LAW_ENFORCEMENT_AGENCY': And(str, len),
    'LAW_ENFORCEMENT_UNIT': And(str, len),
    'SENTENCE_TYPE': And(str, len)
})

PREDICT_KEYS = [
    'OFFENSE_CATEGORY', 'PRIMARY_CHARGE_FLAG',
    'DISPOSITION_CHARGED_OFFENSE_TITLE', 'CHARGE_COUNT',
    'DISPOSITION_CHARGED_CLASS', 'CHARGE_DISPOSITION', 'SENTENCE_JUDGE',
    'SENTENCE_PHASE', 'AGE_AT_INCIDENT', 'GENDER', 'LAW_ENFORCEMENT_AGENCY'
]
示例#7
0
from schema import Schema, And, Or, Optional, Const

CONFIGURATION_PATH = "configuration/config.yaml"

# All messages need to have a source address and a destination address.
# These addresses should resolve using cluster DNS.
# Messages may optionally include a list of headers as string key/value pairs.
# and an optional body. How these headers and bodies are included in new values
# is dependent on where the message is defined in the schema - `matchRequest` messages
# have different behaviour than `onFailue` messages.
HTTP_REQUEST_SCHEMA = Schema(
    {
        "method":
        lambda t: t in ["GET", "HEAD", "PUT", "PATCH", "DELETE", "POST"],
        "url": str,
        Optional("headers"): Schema(Or({str: str}, {})),
        Optional("body"): str,
    },
    ignore_extra_keys=True,
)

HTTP_RESPONSE_SCHEMA = Schema(
    {
        "status-code": int,
        Optional("headers"): Schema(Or({str: str}, {})),
        Optional("body"): str,
    },
    ignore_extra_keys=True,
)

# Some messages are part of a transaction. Such transactions need to specify a timeout,
示例#8
0
文件: schemas.py 项目: usydnlp/REXUP
tier_schema = Schema({
    "data": [vectorised_data_schema],
    "images": {
        str: {
            "imagesFilename": str,  # config.imagesFile(tier)
            "imgsInfoFilename": str,  # config.imgsInfoFile(tier)
            "imgsSceneGraph": str  # config.sceneGraphsFile(tier)
        }
    },
    "train": bool
})

dataset_schema = Schema({
    "evalTrain": tier_schema,
    "test": tier_schema,
    "train": Or(None, tier_schema),
    "val": tier_schema,
})

data_schema = Schema({
    "main": Or(None, dataset_schema),
    "extra": Or(None, dataset_schema)
})

separate_embeddings_schema = Schema({
    'a': Or(None, np.array),
    'q': np.array,
    'scene': Or(None, np.array)
})

shared_embeddings_schema = Schema({
#!/var/www/lisb/venv/bin/python
import json
import os
import sys

import boto3
import tarfile
from datetime import datetime

from schema import Schema, Optional, And, Or

from common_functions import encrypt_file

# BACKUPS SCHEMA
command_schema = Schema({
    Optional("--to-backup"): And([Or("conf", "data", "logs")], lambda l: 0 < len(l) <= 3),
    Optional("--s3"): [And(str, lambda bucket_str: len(bucket_str.split("/", maxsplit=1)) == 2)],
    Optional("--encrypted"): And(list, lambda l: len(l) == 0)
})


def create_backup(options):
    # Crate backups directory if necessary
    base_path = "/var/www/lisb/"
    backups_path = "/var/www/lisb/backups/"
    if not os.path.exists(backups_path):
        os.makedirs(backups_path)

    # Create GZ-compressed local TAR backup file of information specified by '--to-backup'
    to_backup = ['conf', 'data', 'logs'] if '--to-backup' not in options else options['--to-backup']
    backup_name = "backup" + datetime.utcnow().strftime("%Y%m%d%H%M%S") + ".tar.gz"
示例#10
0
 Optional('searchSpacePath'):
 os.path.exists,
 Optional('multiPhase'):
 bool,
 'useAnnotation':
 bool,
 'tuner':
 Or(
     {
         'builtinTunerName':
         Or('TPE', 'Random', 'Anneal', 'Evolution', 'SMAC', 'BatchTuner'),
         'classArgs': {
             'optimize_mode': Or('maximize', 'minimize'),
             Optional('speed'): int
         },
         Optional('gpuNum'):
         And(int, lambda x: 0 <= x <= 99999),
     }, {
         'codeDir': os.path.exists,
         'classFileName': str,
         'className': str,
         Optional('classArgs'): dict,
         Optional('gpuNum'): And(int, lambda x: 0 <= x <= 99999),
     }),
 Optional('assessor'):
 Or(
     {
         'builtinAssessorName': lambda x: x in ['Medianstop'],
         'classArgs': {
             'optimize_mode': lambda x: x in ['maximize', 'minimize']
         },
示例#11
0
class Lucene(HoaxyCommand):
    """
usage:
  hoaxy lucene --index [--mode=<mode>]
  hoaxy lucene --search --query=<q> [--top=<n>]
  hoaxy lucene -h | --help

Using Apache Lucene to build index from the parsed articles. And also
provide a simple interface to query the indexed articles.
--index             Create, append and update index.
--search            Do lucene search

Options:
--mode=<mode>       Mode for create index, available choices are:
                    create_or_append, create, append
                    [default: create_or_append]
--query=<q>         String to query.
--top=<n>           Number of top results to show.
                    [default: 5]
-h --help           Show help.

Examples:

    1. Create index of all non-index documents
        hoaxy lucene --index --mode=create_or_append

    2. If you want to replace the old indexes and create a new one:
        hoaxy lucene --index --mode=create

    3. Search top 5 most relavant article containing keywords 'trump'
        hoaxy lucene --search --query=trump
    """
    name = 'lucene'
    short_description = 'Lucene Indexing and Searching'
    args_schema = Schema({
        '--query':
        Or(None, lambda s: len(s) > 0),
        '--mode':
        Or(
            None,
            And(Use(str.lower), lambda s: s in
                ('create_or_append', 'create', 'append'))),
        '--top':
        Or(None, And(Use(int), lambda x: x > 0)),
        object:
        object
    })

    @classmethod
    def prepare_article(cls, article_data):
        article_id, group_id, canonical_url, title, meta, content,\
            date_published, domain, site_type = article_data
        article = dict(article_id=article_id,
                       group_id=group_id,
                       canonical_url=canonical_url,
                       title=title,
                       content=content,
                       date_published=date_published,
                       domain=domain,
                       site_type=site_type)
        article['meta'] = unicode(meta)
        article['uq_id_str'] = unicode(group_id) + title
        if article['content'] is None:
            article['content'] = u'NULL'
        return article

    @classmethod
    def index(cls, session, mode, articles_iter, mgid):
        lucene.initVM()
        index_dir = cls.conf['lucene']['index_dir']
        indexer = Indexer(index_dir,
                          mode,
                          date_format=cls.conf['lucene']['date_format'])
        article = None
        for i, data in enumerate(articles_iter):
            article = cls.prepare_article(data)
            indexer.index_one(article)
            if i % cls.conf['window_size'] == 1:
                logger.info('Indexed %s articles', i)
        indexer.close()
        if article is not None:
            mgid.value = str(article['group_id'])
            session.commit()
            logger.info('Indexed article pointer updated!')
        else:
            logger.warning('No new articles are found!')
        logger.info('Done!')

    @classmethod
    def search(cls, query, n):
        lucene.initVM()
        index_dir = cls.conf['lucene']['index_dir']
        searcher = Searcher(index_dir)
        rs = searcher.search(query, n)
        pprint.pprint(rs)

    @classmethod
    def run(cls, args):
        try:
            # print(args)
            args = cls.args_schema.validate(args)
        except SchemaError as e:
            sys.exit(e)
        session = Session()
        # make sure lucene be inited
        lucene.initVM()
        lucene.getVMEnv().attachCurrentThread()
        if args['--index'] is True:
            configure_logging('lucene.index', console_level='INFO')
            mgid = get_or_create_m(
                session,
                MetaInfo,
                data=dict(
                    name='article_group_id_lucene_index',
                    value='0',
                    value_type='int',
                    description='article.group_id used for lucene index'),
                fb_uk='name')
            if args['--mode'] == 'create':
                mgid.set_value(0)
                session.commit()
            q = """
            SELECT DISTINCT ON (a.group_id) a.id, a.group_id,
                a.canonical_url,
                a.title, a.meta, a.content,
                coalesce(a.date_published, a.date_captured) AS pd,
                s.domain, s.site_type
            FROM article AS a
                JOIN site AS s ON s.id=a.site_id
            WHERE a.site_id IS NOT NULL AND s.is_enabled IS TRUE
                AND a.group_id>:gid
            ORDER BY group_id, pd ASC
            """
            articles_iter = session.execute(
                sqlalchemy.text(q).bindparams(gid=mgid.get_value()))
            cls.index(session, args['--mode'], articles_iter, mgid)
        elif args['--search'] is True:
            configure_logging('lucene.search', console_level='INFO')
            cls.search(args['--query'], args['--top'])
        else:
            print("Unrecognized command!")
            sys.exit(2)
示例#12
0
def bayesitc_mcmc_parser(argv=sys.argv[1:]):
    __usage__ = """Analyze ITC data using Markov chain Monte Carlo (MCMC). Uses MicroCal .itc files, or custom format .yml files for modeling experiments.
    When running the program you can select one of two options:

    competitive
      A competitive binding model. Requires multiple experiments to be specified.

    twocomponent
      A twocomponent binding model. Analyzes only a single experiment

    Usage:
      bayesitc_mcmc.py twocomponent <datafile> <heatsfile> [-v | -vv | -vvv] [--cc=<c_cell>] [--cs=<c_syringe> ] [--dc=<dc_cell>] [--ds=<dc_syringe>] [options]
      bayesitc_mcmc.py competitive (<datafile> <heatsfile>)... (-r <receptor> | --receptor <receptor>) [-v | -vv | -vvv] [options]
      bayesitc_mcmc.py (-h | --help)
      bayesitc_mcmc.py --license
      bayesitc_mcmc.py --version

    Options:
      -h, --help                             Show this screen
      --version                              Show version
      --license                              Show license
      -l <logfile>, --log=<logfile>          File to write logs to. Will be placed in workdir.
      --cc <c_cell>                          Concentration of component in cell in mM. Defaults to value in input file
      --cs <c_syringe>                       Concentration of component in syringe in mM. Defaults to value in input file
      --dc <dc_cell>                         Relative uncertainty in cell concentration      [default: 0.1]
      --ds <dc_syringe>                      Relative uncertainty in syringe concentration   [default: 0.1]
      -v,                                    Verbose output level. Multiple flags increase verbosity.
      -w <workdir>, --workdir <workdir>      Directory for output files                      [default: ./]
      -r <receptor> | --receptor <receptor>  The name of the receptor for a competitive binding model.
      -n <name>, --name <name>               Name for the experiment. Will be used for output files. Defaults to inputfile name.
      -i <ins>, --instrument <ins>           The name of the instrument used for the experiment. Overrides .itc file instrument.
      --nfit=<n>                             No. of iteration for maximum a posteriori fit   [default: 20000]
      --niters=<n>                           No. of iterations for mcmc sampling             [default: 2000000]
      --nburn=<n>                            No. of Burn-in iterations for mcmc sampling     [default: 500000]
      --nthin=<n>                            Thinning period for mcmc sampling               [default: 500]
"""
    arguments = docopt(__usage__,
                       argv=argv,
                       version='bayesitc_mcmc.py, pre-alpha')
    schema = Schema({
        '--help':
        bool,  # True or False are accepted
        '--license':
        bool,  # True or False are accepted
        # integer between 0 and 3
        '-v':
        And(int, lambda n: 0 <= n <= 3),
        # str and found in this dict
        'twocomponent':
        bool,
        'competitive':
        bool,
        '--nfit':
        And(Use(int), lambda n: n > 0),
        # Convert str to int, make sure that it is larger than 0
        '--nburn':
        And(Use(int), lambda n: n > 0),
        # Convert str to int, make sure that it is larger than 0
        '--niters':
        And(Use(int), lambda n: n > 0),
        # Convert str to int, make sure that it is larger than 0
        '--nthin':
        And(Use(int), lambda n: n > 0),
        # Convert str to int, make sure that it is larger than 0
        '--name':
        Or(None, And(str, len)),  # Not an empty string
        '--instrument':
        Or(None, And(str, lambda m: m in known_instruments)),
        # None, or str and found in this dict
        '--version':
        bool,  # True or False are accepted
        '--receptor':
        Or(None, str),  # str or None
        '--workdir':
        str,  # str
        # str and ensure file exists
        # list and ensure it contains existing files
        '<datafile>':
        And(
            list,
            lambda inpfiles: [os.path.isfile(inpfile) for inpfile in inpfiles],
            Use(lambda inpfiles:
                [os.path.abspath(inpfile) for inpfile in inpfiles])),
        # list and ensure it contains existing files
        '<heatsfile>':
        And(
            list,
            lambda inpfiles: [os.path.isfile(inpfile) for inpfile in inpfiles],
            Use(lambda inpfiles:
                [os.path.abspath(inpfile) for inpfile in inpfiles])),
        # Don't use, or open file with writing permissions
        '--log':
        Or(None, str),  # Don't use, or str
        '--cc':
        Or(None,
           And(Use(float),
               lambda n: n > 0.0)),  # Not specified, or a float greater than 0
        '--cs':
        Or(None, And(Use(float),
                     lambda n: n > 0.0)),  # Not specified, or a float
        '--dc':
        And(Use(float), lambda n: n > 0.0),  # a float greater than 0
        '--ds':
        And(Use(float), lambda n: n > 0.0),  # a float greater than 0
    })

    return schema.validate(arguments)
示例#13
0
def bayesitc_util_parser(argv=sys.argv[1:]):
    __usage__ = """
Bayesian analysis of ITC data. Uses MicroCal .itc files, or custom format .yml files for analysing experiments.

Usage:
  ITC.py <datafiles>... [-w <workdir> | --workdir=<workdir>] [-n <name> | --name=<name>] [-q <file> | --heats=<file>] [-i <ins> | --instrument=<ins> ] [-v | -vv | -vvv] [-r <file> | --report=<file>] [ -l <logfile> | --log=<logfile>]
  ITC.py mcmc <datafiles>...  (-m <model> | --model=<model>) [-w <workdir> | --workdir=<workdir>] [ -r <receptor> | --receptor=<receptor>] [-n <name> | --name=<name>] [-q <file> | --heats=<file>] [-i <ins> | --instrument=<ins> ] [ -l <logfile> | --log=<logfile>] [-v | -vv | -vvv] [--report=<file>] [options]
  ITC.py (-h | --help)
  ITC.py --license
  ITC.py --version

Options:
  -h, --help                            Show this screen
  --version                              Show version
  --license                              Show license
  -l <logfile>, --log=<logfile>          File to write logs to. Will be placed in workdir.
  -v,                                    Verbose output level. Multiple flags increase verbosity.
  <datafiles>                            Datafile(s) to perform the analysis on, .itc, .yml
  -w <workdir>, --workdir=<workdir>      Directory for output files                      [default: ./]
  -r <receptor> | --receptor=<receptor>  The name of the receptor for a Competitive Binding model.
  -n <name>, --name=<name>               Name for the experiment. Will be used for output files. Defaults to inputfile name.
  -i <ins>, --instrument=<ins>           The name of the instrument used for the experiment. Overrides .itc file instrument.
  -q <file>, --heats=<file>              Origin format integrated heats file. (From NITPIC use .dat file)
  -m <model>, --model=<model>            Model to use for mcmc sampling                  [default: TwoComponent]
  --nfit=<n>                             No. of iteration for maximum a posteriori fit   [default: 20000]
  --niters=<n>                           No. of iterations for mcmc sampling             [default: 6000]
  --nburn=<n>                            No. of Burn-in iterations for mcmc sampling     [default: 1000]
  --nthin=<n>                            Thinning period for mcmc sampling               [default: 5]
  --report=<file>                        Output file with summary in markdown
"""
    arguments = docopt(__usage__, argv=argv, version='ITC.py, pre-alpha')
    schema = Schema({
        '--heats':
        Or(None, And(str, os.path.isfile,
                     Use(os.path.abspath))),  # str, verify that it exists
        '--help':
        bool,  # True or False are accepted
        '--license':
        bool,  # True or False are accepted
        # integer between 0 and 3
        '-v':
        And(int, lambda n: 0 <= n <= 3),
        # str and found in this dict
        '--model':
        And(str, lambda m: m in known_models),
        '--nfit':
        And(Use(int), lambda n: n > 0),
        # Convert str to int, make sure that it is larger than 0
        '--nburn':
        And(Use(int), lambda n: n > 0),
        # Convert str to int, make sure that it is larger than 0
        '--niters':
        And(Use(int), lambda n: n > 0),
        # Convert str to int, make sure that it is larger than 0
        '--nthin':
        And(Use(int), lambda n: n > 0),
        # Convert str to int, make sure that it is larger than 0
        '--name':
        Or(None, And(str, len)),  # Not an empty string
        '--instrument':
        Or(None, And(str, lambda m: m in known_instruments)),
        # None, or str and found in this dict
        '--version':
        bool,  # True or False are accepted
        '--receptor':
        Or(None, str),  # str or None
        '--workdir':
        str,  # str
        # list and ensure it contains existing files
        '<datafiles>':
        And(
            list,
            lambda inpfiles: [os.path.isfile(inpfile) for inpfile in inpfiles],
            Use(lambda inpfiles:
                [os.path.abspath(inpfile) for inpfile in inpfiles])),
        'mcmc':
        bool,  # True or False are accepted
        '--report':
        Or(None, Use(lambda f: open(f, 'w'))),
        # Don't use, or open file with writing permissions
        '--log':
        Or(None, str),  # Don't use, or str
    })

    return schema.validate(arguments)
示例#14
0
def integrate_parser(argv=sys.argv[1:]):
    __usage__ = """
Integrate ITC data using Gaussian process regression. Uses MicroCal .itc files, or custom format .yml files for analysing experiments.

Usage:
  bayesitc_integrate.py <datafiles>... [-w <workdir> | --workdir=<workdir>] [-v | -vv | -vvv] [options]
  bayesitc_integrate.py (-h | --help)
  bayesitc_integrate.py --license
  bayesitc_integrate.py --version

Options:
  -h, --help                             Show this screen
  --version                              Show version
  --license                              Show license
  -l <logfile>, --log=<logfile>          File to write logs to. Will be placed in workdir.
  -v,                                    Verbose output level. Multiple flags increase verbosity.
  <datafiles>                            Datafile(s) to perform the analysis on, .itc, .yml
  -w <workdir>, --workdir=<workdir>      Directory for output files                      [default: ./]
  -n <name>, --name=<name>               Name for the experiment. Will be used for output files. Defaults to input file name.
  -i <ins>, --instrument=<ins>           The name of the instrument used for the experiment. Overrides .itc file instrument.
  -f <frac>, --fraction=<frac>           The fraction of the injection to fit, measured from the end [default: 0.2]
  --theta0=<theta0>                      The parameters in the autocorrelation model. [default: 5.0]
  --nugget=<nugget>                      Size of nugget effect to allow smooth predictions from noisy data. [default: 1.0]
  --plot                                 Generate plots of the baseline fit
"""

    arguments = docopt(__usage__,
                       argv=argv,
                       version='bayesitc_integrate.py, pre-alpha')
    schema = Schema({
        '--help':
        bool,  # True or False are accepted
        '--license':
        bool,  # True or False are accepted
        # integer between 0 and 3
        '-v':
        And(int, lambda n: 0 <= n <= 3),
        # Float greater than 0
        '--fraction':
        And(Use(float), lambda n: 0 < n <= 1.0),
        '--nugget':
        And(Use(float), lambda n: n > 0),
        '--theta0':
        And(Use(float), lambda n: n > 0),
        # Convert str to int, make sure that it is larger than 0
        '--name':
        Or(None, And(str, len)),  # Not an empty string
        '--instrument':
        Or(None, And(str, lambda m: m in known_instruments)),
        # None, or str and found in this dict
        '--version':
        bool,  # True or False are accepted
        '--plot':
        bool,  # True or False are accepted
        '--workdir':
        str,  # str
        # list and ensure it contains existing files
        '<datafiles>':
        And(
            list,
            lambda inpfiles: [os.path.isfile(inpfile) for inpfile in inpfiles],
            Use(lambda inpfiles:
                [os.path.abspath(inpfile) for inpfile in inpfiles])),
        # Don't use, or open file with writing permissions
        '--log':
        Or(None, str),  # Don't use, or str
    })

    return schema.validate(arguments)
示例#15
0
文件: main.py 项目: PurdueMINDS/SAGA
def parse_args():
    from schema import Schema, Use, And, Or

    args = docopt.docopt(__doc__, version='SAGA Project Ver 4.2')

    requirements = {
        '--phone'     : And(Use(str), lambda x: x in ('galaxy', 'pixel', 'stratux'),
                            error='Phone type only support \'galaxy\', \'pixel\' and \'stratux\''),
        '--threshold' : And(Use(int), lambda x: x > 0,
                            error='Roll threshold should be integer > 0'),
        '--freq'      : Or(None,
                           And(Use(str), lambda x: x in ('haar'),
                                error='Wavelet only support \'haar\'')),
        '--batch-size': And(Use(int), lambda x: x > 0,
                            error='Batch size should be integer > 0'),
        '--win'       : Or(None, And(Use(int), lambda x: x > 0),
                            error='Feature window length should be integer > 0'),
        '--offset'    : Or(None, And(Use(int), lambda x: x > 0),
                           error='Feature window offset should be integer > 0'),
        '--rate'      : Or(None, And(Use(float), lambda x: (0 < x) & (x <= 1)),
                           error='Feature window offset rate should be float in (0, 1]'),
        '--limit'     : Or(None, And(Use(int), lambda x: x > 0),
                           error='Batch size should be integer > 0'),
        '--model'     : And(Use(str), lambda x: x in nnet.model_list,
                            error='Model not available'),
        '--epochs'    : And(Use(int), lambda x: x >= 0,
                            error='Number of epochs should be integer >= 0'),
        '--lr'        : And(Use(float), lambda x: x > 0,
                            error='Learning rate should be float > 0'),
        '--print-freq': And(Use(int), lambda x: x > 0,
                            error='Print frequency should be integer > 0'),
        '--device'    : And(Use(int), lambda x: x >= 0,
                            error='CUDA device ID should be integer >= 0'),
        '--keyword'   : And(Use(str), lambda x: x in ('pitch', 'roll', 'heading', 'hazard'),
                            error='Only predict \'pitch\' or \'roll\' or \'heading\' or \'hazard\''),
        '--stratux'   : Or(None,
                           And(Use(int), lambda x: x >= 0,
                               error='Stratux input level should be integer >= 0')),
        '--try'       : And(Use(int), lambda x: x > 0,
                            error='Number of trials should be integer > 0'),
        object        : object,
    }
    args = Schema(requirements).validate(args)

    # midterm prediction must be fixed
    assert not (args['--trig'] and args['--diff'] and args['--direct'])
    assert args['--keyword'] != 'hazard' or (args['--direct'] and args['--no-normal'])
    assert args['--phone'] != 'stratux' or args['--stratux'] is not None

    global WINDOW_CONFIG
    if args['--win'] is not None:
        WINDOW_CONFIG = {
            'input': {
                'length': args['--win'],
                'offset_length': args['--offset'], 'offset_rate': args['--rate'],
                'padding': 'repeat_base',
            },
            'target': {
                'length': args['--win'],
                'offset_length': args['--offset'], 'offset_rate': args['--rate'],
                'padding': 'repeat_base',
            },
        }
    else:
        WINDOW_CONFIG = constant.WINDOW_CONFIG

    return args
示例#16
0
def query_latest_articles():
    """Handle API request '/latest-articles'.

    API Request Parameters
    ----------------------
    past_hours : int
        Set the hours from now to past to be defined as latest hours.
    domains : object
        If None, return all articles in the latest hours;
        If str, should be one of {'fact_checking', 'claim', 'fake'}:
            if 'fact_checking', return fact checking articles,
            if 'claim', return claim articles,
            if 'fake', return selected fake articles, which is a subset of
               claim, which is selected by us.
        If array of domain, return articles belonging to these domains.
    domains_file : str
        When `domains` is 'fake', the actual used domains are loaded from
        file `domains_file`. If this file doesn't exist, then `claim` type
        domains would be used.

    API Response Keys
    -----------------
        status : string
        num_of_entries : int
        articles : dict
            keys are:
                canonical_url : string
                date_published : string formatted datetime
                domain : string
                id : int
                site_type : {'claim', 'fact_checking'}
                title : string
    """
    lucene.getVMEnv().attachCurrentThread()
    # Validate input of request
    q_articles_schema = Schema({
        'past_hours':
        And(Use(int), lambda x: x > 0, error='Invalid value of `past_hours`'),
        Optional('domains', default=None):
        Or(lambda s: s in ('fact_checking', 'claim', 'fake'),
           Use(flask.json.loads,
               error='Not valid values nor JSON string of `domains`'))
    })
    q_kwargs = copy_req_args(request.args)
    try:
        q_kwargs = q_articles_schema.validate(q_kwargs)
        domains_file = CONF['api'].get('selected_fake_domains_path')
        df = db_query_latest_articles(
            engine, domains_file=domains_file, **q_kwargs)
        if len(df) == 0:
            raise APINoResultError('No articles found!')
        response = dict(
            status='OK',
            num_of_entries=len(df),
            articles=flask.json.loads(df.to_json(**TO_JSON_KWARGS)))
    except SchemaError as e:
        response = dict(status='Parameter error', error=str(e))
    except APIParseError as e:
        response = dict(status='Invalide query', error=str(e))
    except APINoResultError as e:
        response = dict(status='No result error', error=str(e))
    except Exception as e:
        logger.exception(e)
        response = dict(status='ERROR', error='Server error, query failed!')
    return flask.jsonify(response)
示例#17
0
文件: stage.py 项目: databill86/dvc
class Stage(object):
    STAGE_FILE = "Dvcfile"
    STAGE_FILE_SUFFIX = ".dvc"

    PARAM_MD5 = "md5"
    PARAM_CMD = "cmd"
    PARAM_WDIR = "wdir"
    PARAM_DEPS = "deps"
    PARAM_OUTS = "outs"
    PARAM_LOCKED = "locked"

    SCHEMA = {
        Optional(PARAM_MD5): Or(str, None),
        Optional(PARAM_CMD): Or(str, None),
        Optional(PARAM_WDIR): Or(str, None),
        Optional(PARAM_DEPS): Or(And(list, Schema([dependency.SCHEMA])), None),
        Optional(PARAM_OUTS): Or(And(list, Schema([output.SCHEMA])), None),
        Optional(PARAM_LOCKED): bool,
    }

    def __init__(
        self,
        repo,
        path=None,
        cmd=None,
        wdir=os.curdir,
        deps=None,
        outs=None,
        md5=None,
        locked=False,
    ):
        if deps is None:
            deps = []
        if outs is None:
            outs = []

        self.repo = repo
        self.path = path
        self.cmd = cmd
        self.wdir = wdir
        self.outs = outs
        self.deps = deps
        self.md5 = md5
        self.locked = locked

    def __repr__(self):
        return "Stage: '{path}'".format(
            path=self.relpath if self.path else "No path")

    @property
    def relpath(self):
        return os.path.relpath(self.path)

    @property
    def is_data_source(self):
        """Whether the stage file was created with `dvc add` or `dvc import`"""
        return self.cmd is None

    @staticmethod
    def is_valid_filename(path):
        return (path.endswith(Stage.STAGE_FILE_SUFFIX)
                or os.path.basename(path) == Stage.STAGE_FILE)

    @staticmethod
    def is_stage_file(path):
        return os.path.isfile(path) and Stage.is_valid_filename(path)

    def changed_md5(self):
        return self.md5 != self._compute_md5()

    @property
    def is_callback(self):
        """
        A callback stage is always considered as changed,
        so it runs on every `dvc repro` call.
        """
        return not self.is_data_source and len(self.deps) == 0

    @property
    def is_import(self):
        """Whether the stage file was created with `dvc import`."""
        return not self.cmd and len(self.deps) == 1 and len(self.outs) == 1

    def _changed_deps(self):
        if self.locked:
            return False

        if self.is_callback:
            logger.warning(
                "Dvc file '{fname}' is a 'callback' stage "
                "(has a command and no dependencies) and thus always "
                "considered as changed.".format(fname=self.relpath))
            return True

        for dep in self.deps:
            if dep.changed():
                logger.warning(
                    "Dependency '{dep}' of '{stage}' changed.".format(
                        dep=dep, stage=self.relpath))
                return True

        return False

    def _changed_outs(self):
        for out in self.outs:
            if out.changed():
                logger.warning("Output '{out}' of '{stage}' changed.".format(
                    out=out, stage=self.relpath))
                return True

        return False

    def _changed_md5(self):
        if self.changed_md5():
            logger.warning("Dvc file '{}' changed.".format(self.relpath))
            return True
        return False

    def changed(self):
        ret = any(
            [self._changed_deps(),
             self._changed_outs(),
             self._changed_md5()])

        if ret:
            msg = "Stage '{}' changed.".format(self.relpath)
            color = "yellow"
        else:
            msg = "Stage '{}' didn't change.".format(self.relpath)
            color = "green"

        logger.info(logger.colorize(msg, color))

        return ret

    def remove_outs(self, ignore_remove=False):
        """
        Used mainly for `dvc remove --outs`
        """
        for out in self.outs:
            out.remove(ignore_remove=ignore_remove)

    def unprotect_outs(self):
        for out in self.outs:
            if out.scheme != "local" or not out.exists:
                continue
            self.repo.unprotect(out.path)

    def remove(self):
        self.remove_outs(ignore_remove=True)
        os.unlink(self.path)

    def reproduce(self,
                  force=False,
                  dry=False,
                  interactive=False,
                  no_commit=False):
        if not self.changed() and not force:
            return None

        if (self.cmd or self.is_import) and not self.locked and not dry:
            # Removing outputs only if we actually have command to reproduce
            self.remove_outs(ignore_remove=False)

        msg = ("Going to reproduce '{stage}'. "
               "Are you sure you want to continue?".format(stage=self.relpath))

        if interactive and not prompt.confirm(msg):
            raise DvcException("reproduction aborted by the user")

        logger.info("Reproducing '{stage}'".format(stage=self.relpath))

        self.run(dry=dry, no_commit=no_commit, force=force)

        logger.debug("'{stage}' was reproduced".format(stage=self.relpath))

        return self

    @staticmethod
    def validate(d, fname=None):
        from dvc.utils import convert_to_unicode

        try:
            Schema(Stage.SCHEMA).validate(convert_to_unicode(d))
        except SchemaError as exc:
            raise StageFileFormatError(fname, exc)

    @classmethod
    def _stage_fname(cls, fname, outs, add):
        if fname:
            return fname

        if not outs:
            return cls.STAGE_FILE

        out = outs[0]
        path_handler = out.remote.ospath

        fname = path_handler.basename(out.path) + cls.STAGE_FILE_SUFFIX

        fname = Stage._expand_to_path_on_add_local(add, fname, out,
                                                   path_handler)

        return fname

    @staticmethod
    def _expand_to_path_on_add_local(add, fname, out, path_handler):
        if (add and out.is_local
                and not contains_symlink_up_to(out.path, out.repo.root_dir)):
            fname = path_handler.join(path_handler.dirname(out.path), fname)
        return fname

    @staticmethod
    def _check_stage_path(repo, path):
        assert repo is not None

        real_path = os.path.realpath(path)
        if not os.path.exists(real_path):
            raise StagePathNotFoundError(path)

        if not os.path.isdir(real_path):
            raise StagePathNotDirectoryError(path)

        proj_dir = os.path.realpath(repo.root_dir) + os.path.sep
        if not (real_path + os.path.sep).startswith(proj_dir):
            raise StagePathOutsideError(path)

    @property
    def is_cached(self):
        """
        Checks if this stage has been already ran and stored
        """
        from dvc.remote.local import RemoteLOCAL
        from dvc.remote.s3 import RemoteS3

        old = Stage.load(self.repo, self.path)
        if old._changed_outs():
            return False

        # NOTE: need to save checksums for deps in order to compare them
        # with what is written in the old stage.
        for dep in self.deps:
            dep.save()

        old_d = old.dumpd()
        new_d = self.dumpd()

        # NOTE: need to remove checksums from old dict in order to compare
        # it to the new one, since the new one doesn't have checksums yet.
        old_d.pop(self.PARAM_MD5, None)
        new_d.pop(self.PARAM_MD5, None)
        outs = old_d.get(self.PARAM_OUTS, [])
        for out in outs:
            out.pop(RemoteLOCAL.PARAM_CHECKSUM, None)
            out.pop(RemoteS3.PARAM_CHECKSUM, None)

        return old_d == new_d

    @staticmethod
    def create(
        repo=None,
        cmd=None,
        deps=None,
        outs=None,
        outs_no_cache=None,
        metrics=None,
        metrics_no_cache=None,
        fname=None,
        cwd=None,
        wdir=None,
        locked=False,
        add=False,
        overwrite=True,
        ignore_build_cache=False,
        remove_outs=False,
        validate_state=True,
    ):
        if outs is None:
            outs = []
        if deps is None:
            deps = []
        if outs_no_cache is None:
            outs_no_cache = []
        if metrics is None:
            metrics = []
        if metrics_no_cache is None:
            metrics_no_cache = []

        # Backward compatibility for `cwd` option
        if wdir is None and cwd is not None:
            if fname is not None and os.path.basename(fname) != fname:
                raise StageFileBadNameError(
                    "stage file name '{fname}' may not contain subdirectories"
                    " if '-c|--cwd' (deprecated) is specified. Use '-w|--wdir'"
                    " along with '-f' to specify stage file path and working"
                    " directory.".format(fname=fname))
            wdir = cwd
        else:
            wdir = os.curdir if wdir is None else wdir

        stage = Stage(repo=repo, wdir=wdir, cmd=cmd, locked=locked)

        stage.outs = output.loads_from(stage, outs, use_cache=True)
        stage.outs += output.loads_from(stage,
                                        metrics,
                                        use_cache=True,
                                        metric=True)
        stage.outs += output.loads_from(stage, outs_no_cache, use_cache=False)
        stage.outs += output.loads_from(stage,
                                        metrics_no_cache,
                                        use_cache=False,
                                        metric=True)
        stage.deps = dependency.loads_from(stage, deps)

        stage._check_circular_dependency()
        stage._check_duplicated_arguments()

        fname = Stage._stage_fname(fname, stage.outs, add=add)
        wdir = os.path.abspath(wdir)

        if cwd is not None:
            path = os.path.join(wdir, fname)
        else:
            path = os.path.abspath(fname)

        Stage._check_stage_path(repo, wdir)
        Stage._check_stage_path(repo, os.path.dirname(path))

        stage.wdir = wdir
        stage.path = path

        # NOTE: remove outs before we check build cache
        if remove_outs:
            stage.remove_outs(ignore_remove=False)
            logger.warning("Build cache is ignored when using --remove-outs.")
            ignore_build_cache = True
        else:
            stage.unprotect_outs()

        if validate_state:
            if os.path.exists(path):
                if not ignore_build_cache and stage.is_cached:
                    logger.info("Stage is cached, skipping.")
                    return None

                msg = (
                    "'{}' already exists. Do you wish to run the command and "
                    "overwrite it?".format(stage.relpath))

                if not overwrite and not prompt.confirm(msg):
                    raise StageFileAlreadyExistsError(stage.relpath)

                os.unlink(path)

        return stage

    @staticmethod
    def _check_dvc_filename(fname):
        if not Stage.is_valid_filename(fname):
            raise StageFileBadNameError(
                "bad stage filename '{}'. Stage files should be named"
                " 'Dvcfile' or have a '.dvc' suffix (e.g. '{}.dvc').".format(
                    os.path.relpath(fname), os.path.basename(fname)))

    @staticmethod
    def _check_file_exists(fname):
        if not os.path.exists(fname):
            raise StageFileDoesNotExistError(fname)

    @staticmethod
    def load(repo, fname):
        Stage._check_file_exists(fname)
        Stage._check_dvc_filename(fname)

        if not Stage.is_stage_file(fname):
            raise StageFileIsNotDvcFileError(fname)

        d = load_stage_file(fname)

        Stage.validate(d, fname=os.path.relpath(fname))
        path = os.path.abspath(fname)

        stage = Stage(
            repo=repo,
            path=path,
            wdir=os.path.abspath(
                os.path.join(os.path.dirname(path),
                             d.get(Stage.PARAM_WDIR, "."))),
            cmd=d.get(Stage.PARAM_CMD),
            md5=d.get(Stage.PARAM_MD5),
            locked=d.get(Stage.PARAM_LOCKED, False),
        )

        stage.deps = dependency.loadd_from(stage, d.get(Stage.PARAM_DEPS, []))
        stage.outs = output.loadd_from(stage, d.get(Stage.PARAM_OUTS, []))

        return stage

    def dumpd(self):
        from dvc.remote.local import RemoteLOCAL

        return {
            key: value
            for key, value in {
                Stage.PARAM_MD5:
                self.md5,
                Stage.PARAM_CMD:
                self.cmd,
                Stage.PARAM_WDIR:
                RemoteLOCAL.unixpath(
                    os.path.relpath(self.wdir, os.path.dirname(self.path))),
                Stage.PARAM_LOCKED:
                self.locked,
                Stage.PARAM_DEPS: [d.dumpd() for d in self.deps],
                Stage.PARAM_OUTS: [o.dumpd() for o in self.outs],
            }.items() if value
        }

    def dump(self):
        fname = self.path

        self._check_dvc_filename(fname)

        logger.info("Saving information to '{file}'.".format(
            file=os.path.relpath(fname)))
        d = self.dumpd()

        with open(fname, "w") as fd:
            yaml.safe_dump(d, fd, default_flow_style=False)

        self.repo.scm.track_file(os.path.relpath(fname))

    def _compute_md5(self):
        from dvc.output.local import OutputLOCAL

        d = self.dumpd()

        # NOTE: removing md5 manually in order to not affect md5s in deps/outs
        if self.PARAM_MD5 in d.keys():
            del d[self.PARAM_MD5]

        # Ignore the wdir default value. In this case stage file w/o
        # wdir has the same md5 as a file with the default value specified.
        # It's important for backward compatibility with pipelines that
        # didn't have WDIR in their stage files.
        if d.get(self.PARAM_WDIR) == ".":
            del d[self.PARAM_WDIR]

        # NOTE: excluding parameters that don't affect the state of the
        # pipeline. Not excluding `OutputLOCAL.PARAM_CACHE`, because if
        # it has changed, we might not have that output in our cache.
        m = dict_md5(d, exclude=[self.PARAM_LOCKED, OutputLOCAL.PARAM_METRIC])
        logger.debug("Computed stage '{}' md5: '{}'".format(self.relpath, m))
        return m

    def save(self):
        for dep in self.deps:
            dep.save()

        for out in self.outs:
            out.save()

        self.md5 = self._compute_md5()

    @staticmethod
    def _changed_entries(entries):
        ret = []
        for entry in entries:
            if entry.checksum and entry.changed_checksum():
                ret.append(entry.rel_path)
        return ret

    def check_can_commit(self, force):
        changed_deps = self._changed_entries(self.deps)
        changed_outs = self._changed_entries(self.outs)

        if changed_deps or changed_outs or self.changed_md5():
            msg = ("dependencies {}".format(changed_deps)
                   if changed_deps else "")
            msg += " and " if (changed_deps and changed_outs) else ""
            msg += "outputs {}".format(changed_outs) if changed_outs else ""
            msg += "md5" if not (changed_deps or changed_outs) else ""
            msg += " of '{}' changed. Are you sure you commit it?".format(
                self.relpath)
            if not force and not prompt.confirm(msg):
                raise StageCommitError(
                    "unable to commit changed '{}'. Use `-f|--force` to "
                    "force.`".format(self.relpath))
            self.save()

    def commit(self):
        for out in self.outs:
            out.commit()

    def _check_missing_deps(self):
        missing = [dep for dep in self.deps if not dep.exists]

        if any(missing):
            raise MissingDep(missing)

    @staticmethod
    def _warn_if_fish(executable):  # pragma: no cover
        if (executable is None
                or os.path.basename(os.path.realpath(executable)) != "fish"):
            return

        logger.warning(
            "DVC detected that you are using fish as your default "
            "shell. Be aware that it might cause problems by overwriting "
            "your current environment variables with values defined "
            "in '.fishrc', which might affect your command. See "
            "https://github.com/iterative/dvc/issues/1307. ")

    def _check_circular_dependency(self):
        from dvc.exceptions import CircularDependencyError

        circular_dependencies = set(d.path for d in self.deps) & set(
            o.path for o in self.outs)

        if circular_dependencies:
            raise CircularDependencyError(circular_dependencies.pop())

    def _check_duplicated_arguments(self):
        from dvc.exceptions import ArgumentDuplicationError
        from collections import Counter

        path_counts = Counter(edge.path for edge in self.deps + self.outs)

        for path, occurrence in path_counts.items():
            if occurrence > 1:
                raise ArgumentDuplicationError(path)

    def _run(self):
        self._check_missing_deps()
        executable = os.getenv("SHELL") if os.name != "nt" else None
        self._warn_if_fish(executable)

        p = subprocess.Popen(
            self.cmd,
            cwd=self.wdir,
            shell=True,
            env=fix_env(os.environ),
            executable=executable,
        )
        p.communicate()

        if p.returncode != 0:
            raise StageCmdFailedError(self)

    def run(self, dry=False, resume=False, no_commit=False, force=False):
        if self.locked:
            logger.info("Verifying outputs in locked stage '{stage}'".format(
                stage=self.relpath))
            if not dry:
                self.check_missing_outputs()

        elif self.is_import:
            logger.info("Importing '{dep}' -> '{out}'".format(
                dep=self.deps[0].path, out=self.outs[0].path))
            if not dry:
                if self._already_cached() and not force:
                    self.outs[0].checkout()
                else:
                    self.deps[0].download(self.outs[0].path_info,
                                          resume=resume)

        elif self.is_data_source:
            msg = "Verifying data sources in '{}'".format(self.relpath)
            logger.info(msg)
            if not dry:
                self.check_missing_outputs()

        else:
            logger.info("Running command:\n\t{}".format(self.cmd))
            if not dry:
                if (not force and not self.is_callback
                        and self._already_cached()):
                    self.checkout()
                else:
                    self._run()

        if not dry:
            self.save()
            if not no_commit:
                self.commit()

    def check_missing_outputs(self):
        paths = [
            out.path if out.scheme != "local" else out.rel_path
            for out in self.outs if not out.exists
        ]

        if paths:
            raise MissingDataSource(paths)

    def checkout(self, force=False, progress_callback=None):
        for out in self.outs:
            out.checkout(force=force, progress_callback=progress_callback)

    @staticmethod
    def _status(entries):
        ret = {}

        for entry in entries:
            ret.update(entry.status())

        return ret

    def status(self):
        ret = []

        if not self.locked:
            deps_status = self._status(self.deps)
            if deps_status:
                ret.append({"changed deps": deps_status})

        outs_status = self._status(self.outs)
        if outs_status:
            ret.append({"changed outs": outs_status})

        if self.changed_md5():
            ret.append("changed checksum")

        if self.is_callback:
            ret.append("always changed")

        if ret:
            return {self.relpath: ret}

        return {}

    def _already_cached(self):
        return (not self.changed_md5()
                and all(not dep.changed() for dep in self.deps)
                and all(not out.changed_cache() if out.
                        use_cache else not out.changed() for out in self.outs))

    def get_all_files_number(self):
        return sum(out.get_files_number() for out in self.outs)
示例#18
0
class ScriptSchema(Schema):
    """Extends `Schema` adapting it to PA scripts validation strategies.

    Adds predefined schemata as class variables to be used in scripts'
    validation schemas as well as `validate_user_input` method which acts
    as `Schema.validate` but returns a dictionary with converted keys
    ready to be used as function keyword arguments, e.g. validated
    arguments {"--foo": bar, "<baz>": qux} will be be converted to
    {"foo": bar, "baz": qux}. Additional conversion rules may be added as
    dictionary passed to `validate_user_input` :method: as `conversions`
    :param:.

    Use :method:`ScriptSchema.validate_user_input` to obtain kwarg
    dictionary."""

    # class variables are used in task scripts schemata:
    boolean = Or(None, bool)
    hour = Or(None,
              And(Use(int), lambda h: 0 <= h <= 23),
              error="--hour has to be in 0..23")
    id_multi = Or([],
                  And(lambda y: [x.isdigit() for x in y],
                      error="<id> has to be integer"))
    id_required = And(Use(int), error="<id> has to be an integer")
    minute_required = And(Use(int),
                          lambda m: 0 <= m <= 59,
                          error="--minute has to be in 0..59")
    minute = Or(None, minute_required)
    string = Or(None, str)
    tabulate_format = Or(
        None,
        And(str, lambda f: f in tabulate_formats),
        error=f"--format should match one of: {', '.join(tabulate_formats)}",
    )

    replacements = {"--": "", "<": "", ">": ""}

    def convert(self, string):
        """Removes cli argument notation characters ('--', '<', '>' etc.).

        :param string: cli argument key to be converted to fit Python
        argument syntax."""

        for key, value in self.replacements.items():
            string = string.replace(key, value)
        return string

    def validate_user_input(self, arguments, *, conversions=None):
        """Calls `Schema.validate` on provided `arguments`.

        Returns dictionary with keys converted by
        `ScriptSchema.convert` :method: to be later used as kwarg
        arguments. Universal rules for conversion are stored in
        `replacements` class variable and may be updated using
        `conversions` kwarg. Use optional `conversions` :param: to add
        custom replacement rules.

        :param arguments: dictionary of cli arguments provided be
        (e.g.) `docopt`
        :param conversions: dictionary of additional rules to
        `self.replacements`"""

        if conversions:
            self.replacements.update(conversions)

        try:
            self.validate(arguments)
            return {self.convert(key): val for key, val in arguments.items()}
        except SchemaError as e:
            logger.warning(snakesay(str(e)))
            sys.exit(1)
示例#19
0
import yaml
from schema import Schema, And, Or, Optional
from hyperlink import parse


def check_url(url_str):
    url = parse(unicode(url_str))
    assert url.scheme in ('http', 'https')
    return url


IN_SCHEMA = Schema(
    {'projects': [{
        'name': str,
        Or('url', 'gh_url'): check_url
    }]},
    ignore_extra_keys=True)


def redundant(src, key=None, distinct=False, sort=True):
    """The complement of unique(), returns non-unique values. Pass
    distinct=True to get a list of the *first* redundant value for
    each key. Results are sorted by default.

    >>> redundant(range(5))
    []
    >>> redundant([1, 2, 3, 2, 3, 3])
    [[2, 2], [3, 3, 3]]
    >>> redundant([1, 2, 3, 2, 3, 3], distinct=True)
    [2, 3]
def main(argv, session):
    args = docopt(__doc__, argv=argv)

    # Validate args.
    s = Schema({
        six.text_type:
        bool,
        '<identifier>':
        list,
        '--modify':
        list,
        '--append':
        list,
        '--spreadsheet':
        Or(
            None,
            And(lambda f: os.path.exists(f),
                error='<file> should be a readable file or directory.')),
        '--target':
        Or(None, str),
        '--priority':
        Or(None, Use(int, error='<priority> should be an integer.')),
    })
    try:
        args = s.validate(args)
    except SchemaError as exc:
        print('{0}\n{1}'.format(str(exc), printable_usage(__doc__)),
              file=sys.stderr)
        sys.exit(1)

    formats = set()
    responses = []

    for i, identifier in enumerate(args['<identifier>']):
        item = session.get_item(identifier)

        # Check existence of item.
        if args['--exists']:
            if item.exists:
                responses.append(True)
                print('{0} exists'.format(identifier))
            else:
                responses.append(False)
                print('{0} does not exist'.format(identifier), file=sys.stderr)
            if (i + 1) == len(args['<identifier>']):
                if all(r is True for r in responses):
                    sys.exit(0)
                else:
                    sys.exit(1)

        # Modify metadata.
        elif args['--modify'] or args['--append']:
            metadata_args = args['--modify'] if args['--modify'] else args[
                '--append']
            metadata = get_args_dict(metadata_args)
            responses.append(modify_metadata(item, metadata, args))
            if (i + 1) == len(args['<identifier>']):
                if all(r.status_code == 200 for r in responses):
                    sys.exit(0)
                else:
                    for r in responses:
                        if r.status_code == 200:
                            continue
                        # We still want to exit 0 if the non-200 is a
                        # "no changes to xml" error.
                        elif 'no changes' in r.content.decode('utf-8'):
                            continue
                        else:
                            sys.exit(1)

        # Get metadata.
        elif args['--formats']:
            for f in item.get_files():
                formats.add(f.format)
            if (i + 1) == len(args['<identifier>']):
                print('\n'.join(formats))

        # Dump JSON to stdout.
        else:
            metadata = json.dumps(item.item_metadata)
            print(metadata)

    # Edit metadata for items in bulk, using a spreadsheet as input.
    if args['--spreadsheet']:
        if not args['--priority']:
            args['--priority'] = -5
        with io.open(args['--spreadsheet'], 'rU', newline='',
                     encoding='utf-8') as csvfp:
            spreadsheet = csv.DictReader(csvfp)
            responses = []
            for row in spreadsheet:
                if not row['identifier']:
                    continue
                item = session.get_item(row['identifier'])
                if row.get('file'):
                    del row['file']
                metadata = dict((k.lower(), v) for (k, v) in row.items() if v)
                responses.append(modify_metadata(item, metadata, args))

            if all(r.status_code == 200 for r in responses):
                sys.exit(0)
            else:
                for r in responses:
                    if r.status_code == 200:
                        continue
                    # We still want to exit 0 if the non-200 is a
                    # "no changes to xml" error.
                    elif 'no changes' in r.content.decode('utf-8'):
                        continue
                    else:
                        sys.exit(1)
示例#21
0
def main(argv, session):
    args = docopt(__doc__, argv=argv)

    # Validation error messages.
    destdir_msg = '--destdir must be a valid path to a directory.'
    itemlist_msg = '--itemlist must be a valid path to an existing file.'

    # Validate args.
    s = Schema({
        str:
        Use(bool),
        '--destdir':
        Or([], And(Use(lambda d: d[0]), dir_exists), error=destdir_msg),
        '--format':
        list,
        '--glob':
        Use(lambda l: l[0] if l else None),
        '<file>':
        list,
        '--search':
        Or(str, None),
        '--itemlist':
        Or(None, And(lambda f: os.path.isfile(f)), error=itemlist_msg),
        '<identifier>':
        Or(str, None),
        '--retries':
        Use(lambda x: x[0]),
    })

    # Filenames should be unicode literals. Support PY2 and PY3.
    if six.PY2:
        args['<file>'] = [f.decode('utf-8') for f in args['<file>']]

    try:
        args = s.validate(args)
    except SchemaError as exc:
        sys.stderr.write('{0}\n{1}\n'.format(str(exc),
                                             printable_usage(__doc__)))
        sys.exit(1)

    retries = int(args['--retries'])

    if args['--itemlist']:
        ids = [x.strip() for x in open(args['--itemlist'])]
        total_ids = len(ids)
    elif args['--search']:
        _search = search_items(args['--search'])
        total_ids = _search.num_found
        ids = search_ids(args['--search'])

    # Download specific files.
    if args['<identifier>'] and args['<identifier>'] != '-':
        if '/' in args['<identifier>']:
            identifier = args['<identifier>'].split('/')[0]
            files = ['/'.join(args['<identifier>'].split('/')[1:])]
        else:
            identifier = args['<identifier>']
            files = args['<file>']
        total_ids = 1
        ids = [identifier]
    elif args['<identifier>'] == '-':
        total_ids = 1
        ids = sys.stdin
        files = None
    else:
        files = None

    errors = list()
    for i, identifier in enumerate(ids):
        identifier = identifier.strip()
        if total_ids > 1:
            item_index = '{0}/{1}'.format((i + 1), total_ids)
        else:
            item_index = None

        try:
            item = session.get_item(identifier)
        except Exception as exc:
            print('{0}: failed to retrieve item metadata - errors'.format(
                identifier))
            continue

        # Otherwise, download the entire item.
        _errors = item.download(files=files,
                                formats=args['--format'],
                                glob_pattern=args['--glob'],
                                dry_run=args['--dry-run'],
                                verbose=args['--verbose'],
                                silent=args['--silent'],
                                ignore_existing=args['--ignore-existing'],
                                checksum=args['--checksum'],
                                destdir=args['--destdir'],
                                no_directory=args['--no-directories'],
                                retries=retries,
                                item_index=item_index,
                                ignore_errors=True)
        if _errors:
            errors.append(_errors)
    if errors:
        # TODO: add option for a summary/report.
        sys.exit(1)
    else:
        sys.exit(0)
示例#22
0
文件: remote.py 项目: fukaij/testplan
 def get_options(cls):
     """
     Schema for options validation and assignment of default values.
     """
     return {
         'hosts':
         dict,
         ConfigOption('abort_signals',
                      default=[signal.SIGINT, signal.SIGTERM]): [int],
         ConfigOption('worker_type', default=RemoteWorker):
         object,
         ConfigOption('pool_type', default='thread'):
         str,
         ConfigOption('host', default=cls.default_hostname):
         str,
         ConfigOption('port', default=0):
         int,
         ConfigOption('copy_cmd', default=copy_cmd):
         lambda x: callable(x),
         ConfigOption('link_cmd', default=link_cmd):
         lambda x: callable(x),
         ConfigOption('ssh_cmd', default=ssh_cmd):
         lambda x: callable(x),
         ConfigOption('workspace', default=cls.default_workspace_root):
         str,
         ConfigOption('workspace_exclude', default=[]):
         Or(list, None),
         ConfigOption('remote_workspace', default=None):
         Or(str, None),
         ConfigOption('copy_workspace_check',
                      default=remote_filepath_exists):
         Or(lambda x: callable(x), None),
         ConfigOption('env', default=None):
         Or(dict, None),
         ConfigOption('setup_script', default=None):
         Or(list, None),
         ConfigOption('push', default=[]):
         Or(list, None),
         ConfigOption('push_exclude', default=[]):
         Or(list, None),
         ConfigOption('push_relative_dir', default=None):
         Or(str, None),
         ConfigOption('delete_pushed', default=False):
         bool,
         ConfigOption('pull', default=[]):
         Or(list, None),
         ConfigOption('pull_exclude', default=[]):
         Or(list, None),
         ConfigOption('remote_mkdir', default=['/bin/mkdir', '-p']):
         list,
         ConfigOption('testplan_path', default=None):
         Or(str, None),
         ConfigOption('worker_heartbeat', default=30):
         Or(int, float, None)
     }
示例#23
0
    """
    Create new schema that allows the supported schema or None.
    """
    return Or(schema, None)


# #126f9a == $m-blue-d3 in variables.scss. It's rgb(18,111,154)
main_color = '#126f9a'
# Same as main_color. Almost like openedx's #0075b4 == rgb(0, 117, 180)
link_color = '#126f9a'
# openedx also uses white by default
header_bg_color = '#ffffff'
# openedx also uses white by default
footer_bg_color = '#ffffff'

text_color_options = Or('light', 'dark', color)
main_color_options = Or('accent', 'main', color)

button_color_schema = {
    Optional('main'): main_color_options,
    Optional('text'): text_color_options,
    Optional('line'): nullable(main_color_options),
    Optional('hover-main'): main_color_options,
    Optional('hover-text'): text_color_options,
    Optional('hover-line'): nullable(main_color_options),
}

theme_schema_v0 = Schema({
    'version':
    0,
    # This is used as the primary color in your theme palette. It is used as filler for buttons.
示例#24
0
from schema import Schema, Use, Optional, Regex, Or

# Packed data schema; packed data is stored in FmsFormat object attribute '_packeddata'
packed_fms_structure = {
    'schema': Schema({
        'config': {
            'num_channels': Use(int),
            'num_frames': Use(int),
            'speed': Use(float),
            Optional('meta'): Use(dict),
        },
        'props': [Use(str)],
        'data': {  # channels
            Regex('^channel[0-9]+$'): [  # frames
                [  # position-indexed properties
                    Or([Use(float)], dict)
                ]
            ],
        },
    }),
    'default': {
        'config': {
            'num_channels': 1,
            'num_frames': 0,
            'speed': 0.0,
        },
        'props': [],
        'data': {
            'channel0': [],
        },
    },
示例#25
0
文件: remote.py 项目: cpages/testplan
 def get_options(cls):
     """
     Schema for options validation and assignment of default values.
     """
     return {
         "hosts":
         dict,
         ConfigOption("abort_signals",
                      default=[signal.SIGINT, signal.SIGTERM]): [int],
         ConfigOption("worker_type", default=RemoteWorker):
         object,
         ConfigOption("pool_type", default="thread"):
         str,
         ConfigOption("host", default=cls.default_hostname):
         str,
         ConfigOption("port", default=0):
         int,
         ConfigOption("copy_cmd", default=copy_cmd):
         lambda x: callable(x),
         ConfigOption("link_cmd", default=link_cmd):
         lambda x: callable(x),
         ConfigOption("ssh_cmd", default=ssh_cmd):
         lambda x: callable(x),
         ConfigOption("workspace", default=cls.default_workspace_root):
         str,
         ConfigOption("workspace_exclude", default=[]):
         Or(list, None),
         ConfigOption("remote_workspace", default=None):
         Or(str, None),
         ConfigOption("copy_workspace_check",
                      default=remote_filepath_exists):
         Or(lambda x: callable(x), None),
         ConfigOption("env", default=None):
         Or(dict, None),
         ConfigOption("setup_script", default=None):
         Or(list, None),
         ConfigOption("push", default=[]):
         Or(list, None),
         ConfigOption("push_exclude", default=[]):
         Or(list, None),
         ConfigOption("push_relative_dir", default=None):
         Or(str, None),
         ConfigOption("delete_pushed", default=False):
         bool,
         ConfigOption("pull", default=[]):
         Or(list, None),
         ConfigOption("pull_exclude", default=[]):
         Or(list, None),
         ConfigOption("remote_mkdir", default=["/bin/mkdir", "-p"]):
         list,
         ConfigOption("testplan_path", default=None):
         Or(str, None),
         ConfigOption("worker_heartbeat", default=30):
         Or(int, float, None),
     }
示例#26
0
    setChoice('logLevel', 'trace', 'debug', 'info', 'warning', 'error',
              'fatal'),
    Optional('logCollection'):
    setChoice('logCollection', 'http', 'none'),
    'useAnnotation':
    setType('useAnnotation', bool),
    Optional('tuner'):
    AlgoSchema('tuner'),
    Optional('advisor'):
    AlgoSchema('advisor'),
    Optional('assessor'):
    AlgoSchema('assessor'),
    Optional('localConfig'): {
        Optional('gpuIndices'):
        Or(int,
           And(str, lambda x: len([int(i) for i in x.split(',')]) > 0),
           error='gpuIndex format error!'),
        Optional('maxTrialNumPerGpu'):
        setType('maxTrialNumPerGpu', int),
        Optional('useActiveGpu'):
        setType('useActiveGpu', bool)
    }
}

common_trial_schema = {
    'trial': {
        'command':
        setType('command', str),
        'codeDir':
        setPathCheck('codeDir'),
        Optional('gpuNum'):
示例#27
0
def main(argv, session):
    args = docopt(__doc__, argv=argv)

    # Validation error messages.
    invalid_id_msg = (
        '<identifier> should be between 3 and 80 characters in length, and '
        'can only contain alphanumeric characters, underscores ( _ ), or '
        'dashes ( - )')

    # Validate args.
    s = Schema({
        six.text_type:
        Use(lambda x: bool(x)),
        '<file>':
        And(list,
            Use(lambda x: convert_str_list_to_unicode(x) if six.PY2 else x)),
        '--format':
        list,
        '--glob':
        list,
        'delete':
        bool,
        '<identifier>':
        Or(None, And(str, validate_ia_identifier, error=invalid_id_msg)),
        '--retries':
        Use(lambda i: int(i[0])),
    })
    try:
        args = s.validate(args)
    except SchemaError as exc:
        print('{0}\n{1}'.format(str(exc), printable_usage(__doc__)),
              file=sys.stderr)
        sys.exit(1)

    verbose = True if not args['--quiet'] else False
    item = session.get_item(args['<identifier>'])
    if not item.exists:
        print('{0}: skipping, item does\'t exist.')

    # Files that cannot be deleted via S3.
    no_delete = ['_meta.xml', '_files.xml', '_meta.sqlite']

    if verbose:
        sys.stdout.write('Deleting files from {0}\n'.format(item.identifier))

    if args['--all']:
        files = [f for f in item.get_files()]
        args['--cacade'] = True
    elif args['--glob']:
        files = item.get_files(glob_pattern=args['--glob'])
    elif args['--format']:
        files = item.get_files(formats=args['--format'])
    else:
        fnames = []
        if args['<file>'] == ['-']:
            if six.PY2:
                fnames = convert_str_list_to_unicode(
                    [f.strip() for f in sys.stdin])
            else:
                fnames = [f.strip() for f in sys.stdin]
        else:
            fnames = [f.strip() for f in args['<file>']]

        files = list(item.get_files(fnames))

    if not files:
        sys.stderr.write(' warning: no files found, nothing deleted.\n')
        sys.exit(1)

    errors = False
    for f in files:
        if not f:
            if verbose:
                sys.stderr.write(' error: "{0}" does not exist\n'.format(
                    f.name))
            errors = True
        if any(f.name.endswith(s) for s in no_delete):
            continue
        if args['--dry-run']:
            sys.stdout.write(' will delete: {0}/{1}\n'.format(
                item.identifier, f.name.encode('utf-8')))
            continue
        try:
            resp = f.delete(verbose=verbose,
                            cascade_delete=args['--cascade'],
                            retries=args['--retries'])
        except requests.exceptions.RetryError as e:
            print(' error: max retries exceeded for {0}'.format(f.name),
                  file=sys.stderr)
            errors = True
            continue

        if resp.status_code != 204:
            errors = True
            msg = get_s3_xml_text(resp.content)
            print(' error: {0} ({1})'.format(msg, resp.status_code),
                  file=sys.stderr)
            continue

    if errors is True:
        sys.exit(1)
示例#28
0
from bentoml.configuration import config
from bentoml.exceptions import BentoMLConfigException
from bentoml.utils.ruamel_yaml import YAML

LOGGER = logging.getLogger(__name__)

SCHEMA = Schema({
    "api_server": {
        "port": And(int, lambda port: port > 0),
        "enable_microbatch": bool,
        "run_with_ngrok": bool,
        "enable_swagger": bool,
        "enable_metrics": bool,
        "enable_feedback": bool,
        "max_request_size": And(int, lambda size: size > 0),
        "workers": Or(And(int, lambda workers: workers > 0), None),
        "timeout": And(int, lambda timeout: timeout > 0),
    },
    "marshal_server": {
        "max_batch_size": Or(And(int, lambda size: size > 0), None),
        "max_latency": Or(And(int, lambda latency: latency > 0), None),
        "workers": Or(And(int, lambda workers: workers > 0), None),
        "request_header_flag": str,
    },
    "yatai": {
        "url": Or(str, None)
    },
    "tracing": {
        "type":
        Or(And(str, Use(str.lower), lambda s: s in ('zipkin', 'jaeger')),
           None),
    "query_required_fields": [str],
    "stable_past_days": int,
    "stable_min_threshold": float,
    "stable_max_threshold": float,
    "event_raise_max_thershold": float,
    "event_raise_min_thershold": float,
})

feature_selection_params_schema = Schema({
    "feature_names": [str],
})

single_estimator_grid_search_params_schema = Schema({
    "estimator_name": str,
    "param_grid": {
        str: Or(float, int, list)
    },
    "scoring_name": str
})
grid_search_params_schema = Schema({
    "estimators": [single_estimator_grid_search_params_schema],
    "cv":
    int,
    "verbose":
    int
})

label_producing_params_schema = Schema({
    "method":
    Or("binary_min_threshold_classification",
       "binary_max_threshold_classification", "regression",
示例#30
0
    def oformat(self):
        return self._format

    @property
    def precision(self):
        return self._precision

    @property
    def filedata(self):
        return self._filedata


argvalidators = {
    '--amodetag':
    Or(None,
       And(str, lambda s: s.upper() in params._amodetagChoices),
       error='--amodetag must be in ' + str(params._amodetagChoices)),
    '--beamenergy':
    Or(None,
       And(Use(int), lambda n: n > 0),
       error='--beamenergy should be a positive number'),
    '--xingMin':
    Or(None,
       And(Use(float), lambda n: n > 0),
       error='--xingMin should be a positive number'),
    '--xingTr':
    Or(None,
       And(Use(float), lambda n: (n > 0 and n <= 1)),
       error='--xingTr should be a number in (0,1]'),
    '--xingId':
    Or(None,