class Script(base.Structure, base.RestrictionMixin): """A script, i.e., some executable item within a resource descriptor. The content of scripts is given by their type -- usually, they are either python scripts or SQL with special rules for breaking the script into individual statements (which are basically like python's). The special language AC_SQL is like SQL, but execution errors are ignored. This is not what you want for most data RDs (it's intended for housekeeping scripts). See `Scripting`_. """ name_ = "script" typeDesc_ = "Embedded executable code with a type definition" _lang = base.EnumeratedUnicodeAttribute( "lang", default=base.Undefined, description="Language of the script.", validValues=["SQL", "python", "AC_SQL"], copyable=True) _type = base.EnumeratedUnicodeAttribute( "type", default=base.Undefined, description="Point of time at which script is to run.", validValues=[ "preImport", "newSource", "preIndex", "postCreation", "beforeDrop", "sourceDone" ], copyable=True) _name = base.UnicodeAttribute( "name", default="anonymous", description="A human-consumable designation of the script.", copyable=True) _notify = base.BooleanAttribute( "notify", default=True, description="Send out a notification when running this" " script.", copyable=True) _content = base.DataContent(copyable=True, description="The script body.") _original = base.OriginalAttribute() def getSource(self): """returns the content with all macros expanded. """ return self.parent.getExpander().expand(self.content_)
class BinaryGrammar(Grammar): """A grammar that builds rowdicts from binary data. The grammar expects the input to be in fixed-length records. the actual specification of the fields is done via a binaryRecordDef element. """ name_ = "binaryGrammar" rowIterator = BinaryRowIterator _til = base.IntAttribute("skipBytes", default=0, description="Number of bytes to skip before parsing records.") _fdefs = base.StructAttribute("fieldDefs", description="Definition of the record.", childFactory=BinaryRecordDef) _armoring = base.EnumeratedUnicodeAttribute("armor", default=None, validValues=["fortran"], description="Record armoring; by default it's None meaning the" " data was dumped to the file sequentially. Set it to fortran" " for fortran unformatted files (4 byte length before and after" " the payload).")
class BinaryRecordDef(base.Structure): """A definition of a binary record. A binary records consists of a number of binary fields, each of which is defined by a name and a format code. The format codes supported here are a subset of what python's struct module supports. The widths given below are for big, little, and packed binfmts. For native (which is the default), it depends on your platform. * <number>s -- <number> characters making up a string * b,B -- signed and unsigned byte (8 bit) * h,H -- signed and unsigned short (16 bit) * i,I -- signed and unsigned int (32 bit) * q,Q -- signed and unsigned long (64 bit) * f,d -- float and double. The content of this element gives the record structure in the format <name>(<code>){<whitespace><name>(<code>)} where <name> is a c-style identifier. """ name_ = "binaryRecordDef" _fieldsGrammar = _getFieldsGrammar() _binfmt = base.EnumeratedUnicodeAttribute("binfmt", default="native", validValues=["big", "little", "native", "packed"], description="Binary format of the input data; big and little stand" " for msb first and lsb first, and" " packed is like native except no alignment takes place.") _fields = base.DataContent(description="The enumeration of" " the record fields.") _binfmtToStructCode = { "native": "", "packed": "=", "big": ">", "little": "<"} def completeElement(self, ctx): try: parsedFields = utils.pyparseString(self._fieldsGrammar, self.content_) except pyparsing.ParseBaseException, ex: raise base.ui.logOldExc(base.LiteralParseError("binaryRecordDef", re.sub("\s+", " ", self.content_), pos=str(ex.loc), hint="The parser said: '%s'"%str(ex))) # XXX TODO: Position should probably be position during XML parse. # Fix when we have source positions on parsed elements. self.structFormat = (self._binfmtToStructCode[self.binfmt]+ str("".join(f["formatCode"] for f in parsedFields))) self.recordLength = struct.calcsize(self.structFormat) self.fieldNames = tuple(f["identifier"] for f in parsedFields) self._completeElementNext(BinaryRecordDef, ctx)
class DirectGrammar(base.Structure, base.RestrictionMixin): """A user-defined external grammar. See the separate document on user-defined code on more on direct grammars. Also note the program gavomkboost that can help you generate core for the C boosters used by direct grammars. """ name_ = "directGrammar" _cbooster = rscdef.ResdirRelativeAttribute( "cBooster", default=base.Undefined, description="resdir-relative path to the booster C source.", copyable=True) _gzippedInput = base.BooleanAttribute( "gzippedInput", default=False, description="Pipe gzip before booster? (will not work for FITS)", copyable=True) _autoNull = base.UnicodeAttribute( "autoNull", default=None, description="Use this string as general NULL value (when reading" " from plain text).", copyable=True) _ignoreBadRecords = base.BooleanAttribute( "ignoreBadRecords", default=False, description="Let booster ignore invalid records?", copyable=True) _recordSize = base.IntAttribute( "recordSize", default=4000, description="For bin boosters, read this many bytes to make" " up a record; for line-based boosters, this is the maximum" " length of an input line.", copyable=True) _preFilter = base.UnicodeAttribute( "preFilter", default=None, description="Pipe input through this program before handing it to" " the booster; this string is shell-expanded (will not work for FITS).", copyable=True) _customFlags = base.UnicodeAttribute( "customFlags", default="", description="Pass these flags to the C compiler when building the" " booster.", copyable=True) _type = base.EnumeratedUnicodeAttribute( "type", default="col", validValues=["col", "bin", "fits", "split"], description="Make code for a booster parsing by column indices (col)," " by splitting along separators (split), by reading fixed-length" " binary records (bin), for from FITS binary tables (fits).", copyable=True) _splitChar = base.UnicodeAttribute( "splitChar", default="|", description="For split boosters, use this as the separator.", copyable=True) _ext = base.IntAttribute( "extension", default=1, description= "For FITS table boosters, get the table from this extension.", copyable=True) _mapKeys = base.StructAttribute( "mapKeys", childFactory=common.MapKeys, default=None, copyable=True, description="For a FITS booster, map DB table column names" " to FITS column names (e.g., if the FITS table name flx is to" " end up in the DB column flux, say flux:flx).") _rd = rscdef.RDAttribute() isDispatching = False def validate(self): self._validateNext(DirectGrammar) if self.type == 'bin': if not self.recordSize: raise base.StructureError( "DirectGrammars reading from binary need" " a recordSize attribute") if self.mapKeys is not None: if self.type != "fits": raise base.StructureError("mapKeys is only allowed for FITS" " boosters.") def onElementComplete(self): if self.type == "fits": if self.mapKeys: self.keyMap = self.mapKeys.maps else: self.keyMap = {} def getBooster(self): return CBooster(self.cBooster, self.parent, gzippedInput=self.gzippedInput, preFilter=self.preFilter, autoNull=self.autoNull, ignoreBadRecords=self.ignoreBadRecords, customFlags=self.customFlags) def parse(self, sourceToken, targetData=None): booster = self.getBooster() makes = self.parent.makes if len(makes) != 1: raise base.StructureError( "Directgrammar only works for data having" " exactly one table, but data '%s' has %d" % (self.parent.id, len(makes))) def copyIn(data): data.tables.values()[0].copyIn(booster.getOutput(sourceToken)) if booster.getStatus(): raise base.SourceParseError("Booster returned error signature", source=sourceToken) return copyIn
class Make(base.Structure, scripting.ScriptingMixin): """A build recipe for tables belonging to a data descriptor. All makes belonging to a DD will be processed in the order in which they appear in the file. """ name_ = "make" _table = base.ReferenceAttribute("table", description="Reference to the table to be embedded", default=base.Undefined, copyable=True, forceType=tabledef.TableDef) _rowmaker = base.ReferenceAttribute("rowmaker", default=base.NotGiven, forceType=rmkdef.RowmakerDef, description="The rowmaker (i.e., mapping rules from grammar keys to" " table columns) for the table being made.", copyable=True) _parmaker = base.ReferenceAttribute("parmaker", default=base.NotGiven, forceType=rmkdef.ParmakerDef, description="The parmaker (i.e., mapping rules from grammar parameters" " to table parameters) for the table being made. You will usually" " not give a parmaker.", copyable=True) _role = base.UnicodeAttribute("role", default=None, description="The role of the embedded table within the data set", copyable=True) _rowSource = base.EnumeratedUnicodeAttribute("rowSource", default="rows", validValues=["rows", "parameters"], description="Source for the raw rows processed by this rowmaker.", copyable=True, strip=True) def __repr__(self): return "Make(table=%r, rowmaker=%r)"%( self.table and self.table.id, self.rowmaker and self.rowmaker.id) def onParentComplete(self): if self.rowmaker is base.NotGiven: self.rowmaker = rmkdef.RowmakerDef.makeIdentityFromTable(self.table) def getExpander(self): """used by the scripts of expanding their source. We always return the expander of the table being made. """ return self.table.getExpander() def create(self, connection, parseOptions, tableFactory, **kwargs): """returns a new empty instance of the table this is making. """ newTable = tableFactory(self.table, parseOptions=parseOptions, connection=connection, role=self.role, create=True, **kwargs) if (self.table.onDisk and not parseOptions.updateMode and not getattr(self.parent, "updating", False)): newTable._runScripts = self.getRunner() return newTable def runParmakerFor(self, grammarParameters, destTable): """feeds grammarParameter to destTable. """ if self.parmaker is base.NotGiven: return parmakerFunc = self.parmaker.compileForTableDef(destTable.tableDef) destTable.setParams(parmakerFunc(grammarParameters, destTable), raiseOnBadKeys=False)
class DataURL(base.Structure): """A source document for a regression test. As string URLs, they specify where to get data from, but the additionally let you specify uploads, authentication, headers and http methods, while at the same time saving you manual escaping of parameters. The bodies is the path to run the test against. This is interpreted as relative to the RD if there's no leading slash, relative to the server if there's a leading slash, and absolute if there's a scheme. The attributes are translated to parameters, except for a few pre-defined names. If you actually need those as URL parameters, should at us and we'll provide some way of escaping these. We don't actually parse the URLs coming in here. GET parameters are appended with a & if there's a ? in the existing URL, with a ? if not. Again, shout if this is too dumb for you (but urlparse really isn't all that robust either...) """ name_ = "url" # httpURL will be set to the URL actually used in retrieveResource # Only use this to report the source of the data for, e.g., failing # tests. httpURL = "(not retrieved)" _base = base.DataContent(description="Base for URL generation; embedded" " whitespace will be removed, so you're free to break those whereever" " you like.", copyable=True) _httpMethod = base.UnicodeAttribute("httpMethod", description="Request method; usually one of GET or POST", default="GET") _httpPost = common.ResdirRelativeAttribute("postPayload", default=base.NotGiven, description="Path to a file containing material that should go" " with a POST request (conflicts with additional parameters).", copyable=True) _parset = base.EnumeratedUnicodeAttribute("parSet", description="Preselect a default parameter set; form gives what" " our framework adds to form queries.", default=base.NotGiven, validValues=["form"], copyable=True) _httpHeaders = base.DictAttribute("httpHeader", description="Additional HTTP headers to pass.", copyable=True) _httpAuthKey = base.UnicodeAttribute("httpAuthKey", description="A key into ~/.gavo/test.creds to find a user/password" " pair for this request.", default=base.NotGiven, copyable=True) _httpUploads = base.StructListAttribute("uploads", childFactory=Upload, description='HTTP uploads to add to request (must have httpMethod="POST")', copyable=True) _httpHonorRedirects = base.BooleanAttribute("httpHonorRedirects", default=False, description="Follow 30x redirects instead of just using" " status, headers, and payload of the initial request.", copyable="True") _rd = common.RDAttribute() _open = DynamicOpenVocAttribute("open") def getValue(self, serverURL): """returns a pair of full request URL and postable payload for this test. """ urlBase = re.sub(r"\s+", "", self.content_) if "://" in urlBase: # we believe there's a scheme in there pass elif urlBase.startswith("/"): urlBase = serverURL+urlBase else: urlBase = serverURL+"/"+self.parent.rd.sourceId+"/"+urlBase if self.httpMethod=="POST": return urlBase else: return self._addParams(urlBase, urllib.urlencode(self.getParams())) def getParams(self): """returns the URL parameters as a sequence of kw, value pairs. """ params = getattr(self, "freeAttrs", []) if self.parSet=="form": params.extend([("__nevow_form__", "genForm"), ("submit", "Go"), ("_charset_", "UTF-8")]) return params def retrieveResource(self, serverURL, timeout): """returns a triple of status, headers, and content for retrieving this URL. """ self.httpURL, payload = self.getValue(serverURL), None headers = { "user-agent": "DaCHS regression tester"} headers.update(self.httpHeader) if self.httpMethod=="POST": if self.postPayload: with open(self.postPayload) as f: payload = f.read() elif self.uploads: form = _FormData() for key, value in self.getParams(): form.addParam(key, value) for upload in self.uploads: upload.addToForm(form) boundary = "========== roughtest deadbeef" form.set_param("boundary", boundary) headers["Content-Type"] = form.get_content_type( )+'; boundary="%s"'%boundary payload = form.as_string() else: payload = urllib.urlencode(self.getParams()) headers["Content-Type"] = "application/x-www-form-urlencoded" scheme, host, path, _, query, _ = urlparse.urlparse(str(self.httpURL)) assert scheme=="http" if self.httpAuthKey is not base.NotGiven: headers.update(getAuthFor(self.httpAuthKey)) status, respHeaders, content = doHTTPRequest(str(self.httpMethod), host, path, query, payload, headers, timeout) while self.httpHonorRedirects and status in [301, 302, 303]: scheme, host, path, _, query, _ = urlparse.urlparse( getHeaderValue(respHeaders, "location")) status, respHeaders, content = doHTTPRequest("GET", host, path, query, None, {}, timeout) return status, respHeaders, content def _addParams(self, urlBase, params): """a brief hack to add query parameters to GET-style URLs. This is a workaround for not trusting urlparse and is fairly easy to fool. Params must already be fully encoded. """ if not params: return urlBase if "?" in urlBase: return urlBase+"&"+params else: return urlBase+"?"+params def validate(self): if self.postPayload is not base.NotGiven: if self.getParams(): raise base.StructureError("No parameters (or parSets) are" " possible with postPayload") if self.httpMethod!="POST": raise base.StructureError("Only POST is allowed as httpMethod" " together with postPayload") if self.uploads: if self.httpMethod!="POST": raise base.StructureError("Only POST is allowed as httpMethod" " together with upload") self._validateNext(DataURL)
class ProcDef(base.Structure, base.RestrictionMixin): """An embedded procedure. Embedded procedures are python code fragments with some interface defined by their type. They can occur at various places (which is called procedure application generically), e.g., as row generators in grammars, as applys in rowmakers, or as SQL phrase makers in condDescs. They consist of the actual actual code and, optionally, definitions like the namespace setup, configuration parameters, or a documentation. The procedure applications compile into python functions with special global namespaces. The signatures of the functions are determined by the type attribute. ProcDefs are referred to by procedure applications using their id. """ name_ = "procDef" _code = base.UnicodeAttribute("code", default=base.NotGiven, copyable=True, description="A python function body.") _setup = base.StructListAttribute("setups", ProcSetup, description="Setup of the namespace the function will run in", copyable=True) _doc = base.UnicodeAttribute("doc", default="", description= "Human-readable docs for this proc (may be interpreted as restructured" " text).", copyable=True) _type = base.EnumeratedUnicodeAttribute("type", default=None, description= "The type of the procedure definition. The procedure applications" " will in general require certain types of definitions.", validValues=["t_t", "apply", "rowfilter", "sourceFields", "mixinProc", "phraseMaker", "descriptorGenerator", "dataFunction", "dataFormatter", "metaMaker", "regTest", "iterator", "pargetter"], copyable=True, strip=True) _deprecated = base.UnicodeAttribute("deprecated", default=None, copyable=True, description="A deprecation message. This will" " be shown if this procDef is being compiled.") _original = base.OriginalAttribute() def getCode(self): """returns the body code indented with two spaces. """ if self.code is base.NotGiven: return "" else: return utils.fixIndentation(self.code, " ", governingLine=1) @utils.memoized def getSetupPars(self): """returns all parameters used by setup items, where lexically later items override earlier items of the same name. """ return unionByKey(*[s.pars for s in self.setups]) def getLateSetupCode(self, boundNames): return "\n".join(s.getLateCode(boundNames) for s in self.setups) def getParSetupCode(self, boundNames): return "\n".join(s.getParCode(boundNames) for s in self.setups) def getBodySetupCode(self, boundNames): return "\n".join(s.getBodyCode() for s in self.setups)
class TableDef(base.Structure, base.ComputedMetaMixin, common.PrivilegesMixin, common.IVOMetaMixin, base.StandardMacroMixin, PublishableDataMixin): """A definition of a table, both on-disk and internal. Some attributes are ignored for in-memory tables, e.g., roles or adql. Properties for tables: * supportsModel -- a short name of a data model supported through this table (for TAPRegExt dataModel); you can give multiple names separated by commas. * supportsModelURI -- a URI of a data model supported through this table. You can give multiple URIs separated by blanks. If you give multiple data model names or URIs, the sequences of names and URIs must be identical (in particular, each name needs a URI). """ name_ = "table" resType = "table" # We don't want to force people to come up with an id for all their # internal tables but want to avoid writing default-named tables to # the db. Thus, the default is not a valid sql identifier. _id = base.IdAttribute( "id", default=base.NotGiven, description="Name of the table (must be SQL-legal for onDisk tables)") _cols = common.ColumnListAttribute( "columns", childFactory=column.Column, description="Columns making up this table.", copyable=True) _params = common.ColumnListAttribute( "params", childFactory=column.Param, description='Param ("global columns") for this table.', copyable=True) _viewStatement = base.UnicodeAttribute( "viewStatement", default=None, description="A single SQL statement to create a view. Setting this" " makes this table a view. The statement will typically be something" " like CREATE VIEW \\\\curtable AS (SELECT \\\\colNames FROM...).", copyable=True) # onDisk must not be copyable since queries might copy the tds and havoc # would result if the queries were to end up on disk. _onDisk = base.BooleanAttribute( "onDisk", default=False, description="Table in the database rather than in memory?") _temporary = base.BooleanAttribute( "temporary", default=False, description="If this is an onDisk table, make it temporary?" " This is mostly useful for custom cores and such.", copyable=True) _adql = ADQLVisibilityAttribute( "adql", default=False, description="Should this table be available for ADQL queries? In" " addition to True/False, this can also be 'hidden' for tables" " readable from the TAP machinery but not published in the" " metadata; this is useful for, e.g., tables contributing to a" " published view. Warning: adql=hidden is incompatible with setting" " readProfiles manually.") _system = base.BooleanAttribute( "system", default=False, description="Is this a system table? If it is, it will not be" " dropped on normal imports, and accesses to it will not be logged.") _forceUnique = base.BooleanAttribute( "forceUnique", default=False, description="Enforce dupe policy for primary key (see dupePolicy)?") _dupePolicy = base.EnumeratedUnicodeAttribute( "dupePolicy", default="check", validValues=["check", "drop", "overwrite", "dropOld"], description="Handle duplicate rows with identical primary keys manually" " by raising an error if existing and new rows are not identical (check)," " dropping the new one (drop), updating the old one (overwrite), or" " dropping the old one and inserting the new one (dropOld)?") _primary = ColumnTupleAttribute( "primary", default=(), description= "Comma separated names of columns making up the primary key.", copyable=True) _indices = base.StructListAttribute( "indices", childFactory=DBIndex, description="Indices defined on this table", copyable=True) _foreignKeys = base.StructListAttribute( "foreignKeys", childFactory=ForeignKey, description="Foreign keys used in this table", copyable=False) _groups = base.StructListAttribute( "groups", childFactory=group.Group, description="Groups for columns and params of this table", copyable=True) # this actually induces an attribute annotations with the DM # annotation instances _annotations = base.StructListAttribute( "dm", childFactory=dm.DataModelRoles, description="Annotations for data models.", copyable=True) _properties = base.PropertyAttribute() # don't copy stc -- columns just keep the reference to the original # stc on copy, and nothing should rely on column stc actually being # defined in the parent tableDefs. _stcs = base.StructListAttribute("stc", description="STC-S definitions" " of coordinate systems.", childFactory=STCDef) _rd = common.RDAttribute() _mixins = mixins.MixinAttribute() _original = base.OriginalAttribute() _namePath = common.NamePathAttribute() fixupFunction = None metaModel = ("title(1), creationDate(1), description(1)," "subject, referenceURL(1)") @classmethod def fromColumns(cls, columns, **kwargs): """returns a TableDef from a sequence of columns. You can give additional constructor arguments. makeStruct is used to build the instance, the mixin hack is applied. Columns with identical names will be disambiguated. """ res = MS(cls, columns=common.ColumnList(cls.disambiguateColumns(columns)), **kwargs) return res def __iter__(self): return iter(self.columns) def __contains__(self, name): try: self.columns.getColumnByName(name) except base.NotFoundError: return False return True def __repr__(self): try: return "<Table definition of %s>" % self.getQName() except base.Error: return "<Non-RD table %s>" % self.id def completeElement(self, ctx): # we want a meta parent as soon as possible, and we always let it # be our struct parent if (not self.getMetaParent() and self.parent and hasattr(self.parent, "_getMeta")): self.setMetaParent(self.parent) # Make room for DM annotations (these are currently filled by # gavo.dm.dmrd.DataModelRoles, but we might reconsider this) self.annotations = [] if self.viewStatement and getattr(ctx, "restricted", False): raise base.RestrictedElement( "table", hint="tables with" " view creation statements are not allowed in restricted mode") if self.registration and self.id is base.NotGiven: raise base.StructureError("Published tables need an assigned id.") if not self.id: self._id.feed(ctx, self, utils.intToFunnyWord(id(self))) # allow iterables to be passed in for columns and convert them # to a ColumnList here if not isinstance(self.columns, common.ColumnList): self.columns = common.ColumnList(self.columns) self._resolveSTC() self._completeElementNext(TableDef, ctx) self.columns.withinId = self.params.tableName = "table " + self.id def validate(self): if self.id.upper() in adql.allReservedWords: raise base.StructureError( "Reserved word %s is not allowed as a table" " name" % self.id) self._validateNext(TableDef) def onElementComplete(self): if self.adql: self.readProfiles = (self.readProfiles | base.getConfig("db", "adqlProfiles")) self.dictKeys = [c.key for c in self] self.indexedColumns = set() for index in self.indices: for col in index.columns: if "\\" in col: try: self.indexedColumns.add(self.expand(col)) except (base.Error, ValueError): # cannot expand yet, ignore pass else: self.indexedColumns.add(col) if self.primary: self.indexedColumns |= set(self.primary) self._defineFixupFunction() self._onElementCompleteNext(TableDef) if self.registration: self.registration.register() def getElementForName(self, name): """returns the first of column and param having name name. The function raises a NotFoundError if neiter column nor param with name exists. """ try: try: return self.columns.getColumnByName(name) except base.NotFoundError: return self.params.getColumnByName(name) except base.NotFoundError, ex: ex.within = "table %s" % self.id raise