def setUpApplication(self, app): fileBodies = [] totalValues = OrderedDict() rootDir = app.getPreviousWriteDirInfo(self.tmpInfo) if not os.path.isdir(rootDir): sys.stderr.write("No temporary directory found at " + rootDir + " - not collecting batch reports.\n") return dirlist = os.listdir(rootDir) dirlist.sort() compulsoryVersions = set(app.getBatchConfigValue("batch_collect_compulsory_version")) versionsFound = set() for dir in dirlist: fullDir = os.path.join(rootDir, dir) if os.path.isdir(fullDir) and self.matchesApp(dir, app): currBodies, currVersions = self.parseDirectory(fullDir, app, totalValues) fileBodies += currBodies versionsFound.update(currVersions) if len(fileBodies) == 0: self.diag.info("No information found in " + rootDir) return missingVersions = compulsoryVersions.difference(versionsFound) mailTitle = self.getTitle(app, totalValues) mailContents = self.mailSender.createMailHeaderForSend(self.runId, mailTitle, app) mailContents += self.getBody(fileBodies, missingVersions) allSuccess = len(totalValues.keys()) == 1 and totalValues.keys()[0] == "succeeded" self.mailSender.sendOrStoreMail(app, mailContents, isAllSuccess=allSuccess)
def printClusters(tf_idf_values, dom_values, ind, centroids): xy_dict = dict(zip(tf_idf_values, dom_values)) xy_dict_ordered = OrderedDict( sorted(xy_dict.items(), key=lambda item: item[0])) colors = ['r', 'y', 'm', 'c', 'b', 'g', 'r', 'y', 'm', 'c'] ax.hold(True) ax.set_ylim(0, 1.1) ax.set_xlim(0, 1) #plt.grid(True, linestyle='-', color='0.75') ax.scatter(xy_dict_ordered.keys(), xy_dict_ordered.values(), s=30, c=colors[ind], marker='o', alpha=0.75, linewidths=.1) ax.plot(xy_dict_ordered.keys(), xy_dict_ordered.values(), linestyle='-', c=colors[ind], alpha=.40) # centroids ax.scatter(centroids[ind], 1, marker='o', s=300, linewidths=1, c='w', alpha=0.60) ax.scatter(centroids[ind], 1, marker='x', s=300, linewidths=1, c='k', alpha=0.60) ax.vlines(x=centroids[ind], ymin=0, ymax=1, color='k', linestyles='solid', alpha=0.40) #avg_dom = sum(dom_values)/len(dom_values) #min_val = min([tf_idf for index, tf_idf in enumerate(tf_idf_values) if dom_values[index] > .5]) #max_val = max([tf_idf for index, tf_idf in enumerate(tf_idf_values) if dom_values[index] > .5]) #ax.plot([min_val, centroids[ind], max_val], [0, 1, 0], linewidth=0.3, color='black') #print min_val, max_val canvas.print_figure('arun_plot.pdf', dpi=700)
def test_to_dict(self): """ Note: data file with column headers are tested in test_filters.py """ r = pe.Reader(self.testfile) result = OrderedDict() result.update({"Series_1": [1, 2, 3, 4]}) result.update({"Series_2": [5, 6, 7, 8, ]}) result.update({"Series_3": [9, 10, 11, 12]}) actual = pe.utils.to_dict(r.rows()) assert actual.keys() == result.keys() assert result == actual result = { "Series_1": 1, "Series_2": 2, "Series_3": 3, "Series_4": 4, "Series_5": 5, "Series_6": 6, "Series_7": 7, "Series_8": 8, "Series_9": 9, "Series_10": 10, "Series_11": 11, "Series_12": 12 } actual = pe.utils.to_dict(r.enumerate()) assert result == actual
def split_sets(split, response_sets, survey_cache, split_entities = False): result = [] if split: for ds in split: result.append((ds, flatten_response_queryset(response_sets.filter(data_series=ds), survey_cache))) else: split = [None] result.append((None, flatten_response_queryset(response_sets, survey_cache))) if split_entities: result_dict = OrderedDict() series = [] for ds, qs in result: series.append(ds) for ds, qs in result: filter_dict = OrderedDict() for rs in qs: #Assign all responsesets to the entity that owns them filter_dict.setdefault(rs.entity, []).append(rs) for entity, data in filter_dict.items(): if entity not in result_dict.keys(): for s in series: # Ensure all dataseries are present in entity output result_dict.setdefault(entity, OrderedDict())[s] = [] result_dict[entity][ds] = data for entity, data in result_dict.items(): result_dict[entity] = data.items() result = result_dict return result
class SpecDataBase: "A Base class for parsing Spec tables" def __init__(self, soup, elem=SpecDataElem): self.__elem = elem tabs = self.htmlTables(soup) self.__tables = OrderedDict() for tab in tabs: table = self.parseTable(tab) if table.name() not in self.__tables: self.__tables[table.name()] = table else: raise Exception("Table " + table.name() + " duplicated!") def getElem(self): return self.__elem def htmlTables(self, soup): raise Exception("htmlTables() must be redefined!") def getNames(self): return self.__tables.keys() def getTable(self, name): return self.__tables[name] def parseTable(self, tab): raise Exception("parseTable() must be redefined!") def numTables(self): return(len(self.__tables)) def toString(self, name): return self.__tables[name].toString()
def test_iterators(self): pairs = [('c', 1), ('b', 2), ('a', 3), ('d', 4), ('e', 5), ('f', 6)] shuffle(pairs) od = OrderedDict(pairs) self.assertEqual(list(od), [t[0] for t in pairs]) self.assertEqual(list(od.keys()), [t[0] for t in pairs]) self.assertEqual(list(od.values()), [t[1] for t in pairs]) self.assertEqual(list(od.items()), pairs) self.assertEqual(list(reversed(od)), [t[0] for t in reversed(pairs)])
def aminoacids(a): # Lo mismo que está en aa_handle todo llamarla desde el otro archivo ¿Cómo? from os.path import expanduser from ordereddict import OrderedDict aa_file = expanduser("~") + '/QB9-git/SuperMarioQB9/resources/aminoacidos' result = OrderedDict() res = OrderedDict() with open(aa_file) as aminoa: line = aminoa.readline().replace("\n", '') while line != '': letra = line[0] abr = line[2:5] nombre = line[6:] result[letra] = (abr, nombre) line = aminoa.readline().replace("\n", '') for o in range(0, len(result)): res[result.keys()[o]] = result[result.keys()[o]][a] return res
def process(self): input_text = self.artifact.input_data.data() if input_text.find("<!-- endsplit -->") > 0: body, footer = re.split("<!-- endsplit -->", input_text, maxsplit=1) sections = re.split("<!-- split \"(.+)\" -->\n", body) header = sections[0] pages = OrderedDict() index_content = None for i in range(1, len(sections), 2): if sections[i] == 'index': index_content = sections[i + 1] else: section_name = sections[i] # TODO proper url/filename escaping section_url = section_name.replace(" ", "-") filename = "%s.html" % section_url filepath = os.path.join(self.artifact.parent_dir(), filename) pages[section_name] = filename new_page = self.add_doc(filepath, header + sections[i + 1] + footer) self.artifact.log.debug( "added key %s to artifact %s ; links to file %s" % (filepath, self.artifact.key, new_page.name)) index_items = [] for k in sorted(pages.keys()): index_items.append("""<li><a href="%s">%s</a></li>""" % (pages[k], k)) output = [] output.append(header) if index_content: output.append(index_content) if self.artifact.args.has_key("split-ul-class"): ul = "<ul class=\"%s\">" % self.artifact.args['split-ul-class'] else: ul = "<ul class=\"split\">" output.append("%s\n%s\n</ul>" % (ul, "\n".join(index_items))) output.append(footer) else: # No endsplit found, do nothing. output = input_text self.artifact.output_data.set_data("\n".join(output))
def printClusters(tf_idf_values, dom_values, ind, centroids): xy_dict = dict(zip(tf_idf_values, dom_values)) xy_dict_ordered = OrderedDict(sorted(xy_dict.items(), key=lambda item: item[0])) colors = ['r', 'y', 'm', 'c', 'b', 'g', 'r', 'y', 'm', 'c'] ax.hold(True) ax.set_ylim(0, 1.1) ax.set_xlim(0, 1) #plt.grid(True, linestyle='-', color='0.75') ax.scatter(xy_dict_ordered.keys(), xy_dict_ordered.values(), s=30, c=colors[ind], marker='o', alpha=0.75, linewidths=.1) ax.plot(xy_dict_ordered.keys(), xy_dict_ordered.values(), linestyle='-', c=colors[ind], alpha=.40) # centroids ax.scatter(centroids[ind], 1, marker='o', s=300, linewidths=1, c='w', alpha=0.60) ax.scatter(centroids[ind], 1, marker='x', s=300, linewidths=1, c='k', alpha=0.60 ) ax.vlines(x=centroids[ind], ymin=0, ymax=1, color='k', linestyles='solid', alpha=0.40) #avg_dom = sum(dom_values)/len(dom_values) #min_val = min([tf_idf for index, tf_idf in enumerate(tf_idf_values) if dom_values[index] > .5]) #max_val = max([tf_idf for index, tf_idf in enumerate(tf_idf_values) if dom_values[index] > .5]) #ax.plot([min_val, centroids[ind], max_val], [0, 1, 0], linewidth=0.3, color='black') #print min_val, max_val canvas.print_figure('arun_plot.pdf', dpi=700)
def rotate_nested_dict(d, key): """Given a dictionary of dictionarties, it builds a new one with keys taken from children's values. """ output = OrderedDict() #{} for item in d: if key in d[item]: if d[item][key] not in output.keys(): output[d[item][key]] = [d[item]] else: output[d[item][key]].append(d[item]) return output
def rotate_nested_dict(d, key): """Given a dictionary of dictionarties, it builds a new one with keys taken from children's values. """ output = OrderedDict()#{} for item in d: if key in d[item]: if d[item][key] not in output.keys(): output[d[item][key]] = [d[item]] else: output[d[item][key]].append(d[item]) return output
def process(self): parent_dir = os.path.dirname(self.artifact.canonical_filename()) input_text = self.artifact.input_text() if input_text.find("<!-- endsplit -->") > 0: body, footer = re.split("<!-- endsplit -->", input_text, maxsplit=1) sections = re.split("<!-- split \"(.+)\" -->\n", body) header = sections[0] pages = OrderedDict() index_content = None for i in range(1, len(sections), 2): if sections[i] == 'index': index_content = sections[i+1] else: section_name = sections[i] # TODO proper url/filename escaping section_url = section_name.replace(" ","-") filename = "%s.html" % section_url filepath = os.path.join(parent_dir, filename) pages[section_name] = filename artifact = self.artifact.__class__(filepath) artifact.ext = '.html' artifact.binary = False artifact.final = True artifact.additional = True artifact.artifacts_dir = self.artifact.artifacts_dir artifact.set_data(header + sections[i+1] + footer) artifact.hashstring = str(uuid.uuid4()) artifact.save() self.artifact.inputs()[filepath] = artifact self.log.debug("added key %s to artifact %s ; links to file %s" % (filepath, self.artifact.key, artifact.filename())) index_items = [] for k in sorted(pages.keys()): index_items.append("""<li><a href="%s">%s</a></li>""" % (pages[k], k)) output_dict = OrderedDict() output_dict['header'] = header if index_content: output_dict['index-page-content'] = index_content output_dict['index'] = "<ul>\n%s\n</ul>" % "\n".join(index_items) output_dict['footer'] = footer else: # No endsplit found, do nothing. output_dict = self.artifact.input_data_dict self.artifact.data_dict = output_dict
def run_pattern_response(self, cmd_args, out_stream=sys.stdout, verbose=True, prefix=None, postfix=None, pattern_response=None): """ Run the external command and interact with it using the patter_response dictionary :param cmd_args: command line arguments :param out_stream: stream verbose messages are written to :param verbose: output messages if asserted :param prefix: command line arguments prepended to the given cmd_args :param postfix: command line arguments appended to the given cmd_args :param pattern_response: dictionary whose key is a regular expression pattern that when matched results in the value being sent to the running process. If the value is None, then no response is sent. :returns: the output of the command :rtype: str """ # info("run_pattern_response verbose: %s" % repr(verbose)) self.display("run_pattern_response(%s)\n\n" % cmd_args, out_stream=out_stream, verbose=verbose) if pattern_response is None: pattern_response = OrderedDict() pattern_response[r'\[\S+\](?<!\[sudo\]) '] = CR # accept default prompts, don't match "[sudo] " if self.password is not None: pattern_response[r'\[sudo\] password for \S+\:'] = self.password + CR pattern_response[MOVEMENT] = None pattern_response[pexpect.TIMEOUT] = CR patterns = list(pattern_response.keys()) args = self.expand_args(cmd_args, prefix=prefix, postfix=postfix) command_line = ' '.join(args) # self.display("{line}\n\n".format(line=command_line), out_stream=out_stream, verbose=verbose) output = [] try: child = pexpect.spawn(command_line) while True: try: index = child.expect(patterns, timeout=120) self.display(str(child.before), out_stream=out_stream, verbose=verbose) output.append(str(child.before)) if child.after: self.display(str(child.after), out_stream=out_stream, verbose=verbose) output.append(str(child.after)) key = patterns[index] response = pattern_response[key] if response: child.sendline(response) except pexpect.EOF: break except pexpect.ExceptionPexpect as ex: self.display(str(ex) + '\n', out_stream=out_stream, verbose=verbose) return ''.join(output)
def process(self): input_text = self.artifact.input_data.data() if input_text.find("<!-- endsplit -->") > 0: body, footer = re.split("<!-- endsplit -->", input_text, maxsplit=1) sections = re.split("<!-- split \"(.+)\" -->\n", body) header = sections[0] pages = OrderedDict() index_content = None for i in range(1, len(sections), 2): if sections[i] == 'index': index_content = sections[i+1] else: section_name = sections[i] # TODO proper url/filename escaping section_url = section_name.replace(" ","-") filename = "%s.html" % section_url filepath = os.path.join(self.artifact.parent_dir(), filename) pages[section_name] = filename new_page = self.add_doc(filepath, header + sections[i+1] + footer) self.artifact.log.debug("added key %s to artifact %s ; links to file %s" % (filepath, self.artifact.key, new_page.name)) index_items = [] for k in sorted(pages.keys()): index_items.append("""<li><a href="%s">%s</a></li>""" % (pages[k], k)) output = [] output.append(header) if index_content: output.append(index_content) if self.artifact.args.has_key("split-ul-class"): ul = "<ul class=\"%s\">" % self.artifact.args['split-ul-class'] else: ul = "<ul class=\"split\">" output.append("%s\n%s\n</ul>" % (ul, "\n".join(index_items))) output.append(footer) else: # No endsplit found, do nothing. output = input_text self.artifact.output_data.set_data("\n".join(output))
def addAllPlots(self, graph, results, *args): prevYlist = [ 0 ] * len(results) plotData = OrderedDict() for category in self.labels.keys(): currYlist = [ summary.get(category, 0) for _, summary in results ] if self.hasNonZero(currYlist): ylist = [ (currYlist[x] + prevYlist[x]) for x in range(len(prevYlist)) ] plotData[category] = prevYlist, ylist prevYlist = ylist for category in reversed(plotData.keys()): prevYlist, ylist = plotData[category] if not self.hasNonZero(prevYlist): # Adjust the bottom of the graph to avoid a huge block of green for large suites prevYlist = [ self.getGraphMinimum(ylist, plotData.values()[-1][-1]) ] * len(ylist) self.addPlot(prevYlist, ylist, graph, category=category, *args)
class TaskFile(object): def __init__(self): self._file_name = None self._tasks = OrderedDict() def __str__(self): return '{0}'.format(self._file_name) def load(self, file_name): self._file_name = file_name with open(self._file_name) as f: tasks = yaml.load(f) if isinstance(tasks, list): for task in tasks: self._register_task(task) elif isinstance(tasks, dict): self._register_task(tasks) else: raise TaskFileError(file_name) def _register_task(self, task): try: name = task['TASK'] except KeyError: raise TaskFileError(self._file_name) params = task.get('PARAMS') or [] if not isinstance(params, list): params = shlex.split(params) body = task.get('DO') or [] if not isinstance(body, list): body = [body] self._tasks[name] = Task(name, params, body, self) def get_task(self, task_name=None): try: if task_name: return self._tasks[task_name] else: return self._tasks[self._tasks.keys()[0]] except KeyError: raise TaskFileError(self._file_name, 'no such task: {0}'.format(task_name))
class XLSSheetDefinition(object): def __init__(self, heading_row=0, fields=None): self.heading_row = heading_row self.fields = fields if fields is not None else [] def load_yaml(self, filepath): self.fields = OrderedDict() with open(filepath, "r") as fp: data = yaml.load(fp) self.heading_row = data.get("heading_row", self.heading_row) for fielddef in data.get("fields", []): for name, fdef in fielddef.iteritems(): field = XLSField(name) if fdef is not None: for key, val in fdef.iteritems(): setattr(field, key, val) self.fields[name] = field def names(self): return self.fields.keys() def unique(self): return [f for f in self.fields.values() if f.unique] def multiple(self): return [f for f in self.fields.values() if f.multiple] def i18n(self): return [f for f in self.fields.values() if f.i18n] def required(self): return [f for f in self.fields.values() if f.required] def date(self): return [f for f in self.fields.values() if f.type=="date"] def oftype(self, type): return [f for f in self.fields.values() if f.type==type] def limited(self): return [f for f in self.fields.values() if f.limit] def choices(self): return [f for f in self.fields.values() if f.choices is not None]
class TableEntry: "A generic class that holds table-based data" def __init__(self, attrs): self.__data = OrderedDict() for attr in attrs: self.__data[attr] = None def attrs(self): return self.__data.keys() def update(self, attr, data, allowDup=False): if attr in self.__data: if self.__data[attr] != None: if allowDup: if self.__data[attr] != data: pass #print str("Possible duplicate, exists=" #+ self.__data[attr] + #" , new=" + data) else: raise Exception("Already updated attribute:" + attr) #print attr, data self.__data[attr] = data else: pass #print "Skipping attr " + attr #raise Exception(str("Bad attribute:" + attr)) def get(self, attr): if attr not in self.attrs(): raise Exception(str("Bad attribute:" + attr)) return self.__data[attr] def toString(self): ret = str() tmp = self.attrs() ret += str(self.__data[tmp[0]]).replace(",", ";") for idx in range(1, len(tmp)): ret += str("," + str(self.__data[tmp[idx]]).replace(",",";")) ret += "\n" return ret
def process(self): parent_dir = os.path.dirname(self.artifact.canonical_filename()) input_text = self.artifact.input_text() if input_text.find("%% endsplit\n") > 0: body, footer = re.split("%% endsplit\n", input_text, maxsplit=1) sections = re.split("%% split \"(.+)\"\n", body) header = sections[0] pages = OrderedDict() for i in range(1, len(sections), 2): section_name = sections[i] # TODO proper url/filename escaping section_url = section_name.replace(" ","-") source = header + sections[i+1] + footer ext = '.tex' filename = "%s%s" % (section_url, ext) filepath = os.path.join(parent_dir, filename) pages[section_name] = filename artifact = self.artifact.__class__(filepath) artifact.ext = ext artifact.binary = False artifact.final = True artifact.artifacts_dir = self.artifact.artifacts_dir artifact.hashstring = str(uuid.uuid4()) artifact.set_data(source) artifact.save() self.artifact.inputs()[filepath] = artifact self.artifact.log.debug("added key %s to artifact %s ; links to file %s" % (filepath, self.artifact.key, artifact.filename())) index_items = [] for k in sorted(pages.keys()): index_items.append("""<li><a href="%s">%s</a></li>""" % (pages[k], k)) output_dict = self.artifact.input_data_dict self.artifact.data_dict = output_dict
def addAllPlots(self, graph, results, *args): prevYlist = [0] * len(results) plotData = OrderedDict() for category in self.labels.keys(): currYlist = [summary.get(category, 0) for _, summary in results] if self.hasNonZero(currYlist): ylist = [(currYlist[x] + prevYlist[x]) for x in range(len(prevYlist))] plotData[category] = prevYlist, ylist prevYlist = ylist for category in reversed(plotData.keys()): prevYlist, ylist = plotData[category] if not self.hasNonZero(prevYlist): # Adjust the bottom of the graph to avoid a huge block of green for large suites prevYlist = [ self.getGraphMinimum(ylist, plotData.values()[-1][-1]) ] * len(ylist) self.addPlot(prevYlist, ylist, graph, category=category, *args)
def qacct_to_dict(text, tasks=False, order_list=None): """text is an output from qaccet -j command. When tasks=True, it splits info into per task dictionaries. Returns an OrderedDict class.""" def getValue(value): if value.isdigit(): value = int(value) else: try: value = float(value) except: pass return value def reorder_dict(d, l): out = OrderedDict() left = [] for key in l: if key in d.keys(): out[key] = d[key] for key in d: if key not in out: out[key] = d[key] return out f = text.split(62 * "=") out = OrderedDict() for job in f: j = OrderedDict() job = job.split("\n") for tag in job: tag = tag.strip().split() if len(tag) > 1: j[tag[0]] = getValue(" ".join(tag[1:])) if order_list: j = reorder_dict(j, order_list) if j.keys(): if not tasks: out[str(j['jobnumber'])] = j else: out[".".join([str(j['jobnumber']), str(j['taskid'])])] = j return out
def mapCallings(client, data_dir='data', template_root='templates'): data_fp = FilePath(data_dir) output_root = data_fp.child('output') if not output_root.exists(): output_root.makedirs() jenv = Environment(loader=FileSystemLoader(template_root)) jenv.filters['abbr'] = abbreviateCalling jenv.globals['math'] = math template = jenv.get_template('callingmap.html') #members = client.getRawValue('member_list') callings = client.getRawValue('members_with_callings') no_calling = client.getRawValue('members_without_callings') no_calling = [x for x in no_calling if x['age'] >= 12] # get the groups and subgroups organized into dicts groups = OrderedDict() by_suborg = {} for line in prefOrder: if line.startswith('#'): # heading groups[line[1:].strip()] = OrderedDict() elif line.strip(): subgroup_key = line.strip() groups[groups.keys() [-1]][subgroup_key] = by_suborg[subgroup_key] = [] # put each calling into the right subgroup # also count the number of callings per person calling_counts = defaultdict(lambda: 0) for calling in callings: suborg = calling['subOrgType'] or calling['organization'] by_suborg[suborg].append(calling) calling_counts[calling['id']] += 1 fp = output_root.child('callingmap.html') fp.setContent( template.render(orgs=groups, calling_counts=calling_counts, no_calling=no_calling).encode('utf-8')) print 'wrote', fp.path
def init_centroids(self, docs, limit): 'pick the inital centroids among the data points' 'the candidates should be stored in self.centroids' # merge all the points points = [] for doc in docs: points.extend(doc) import random from ordereddict import OrderedDict centroids = [] weighted_points = {} for point in points: weight = random.random() * 10000 weighted_points[point] = weight # sort out limit number of candidate centroids weighted_points = OrderedDict(sorted(weighted_points.items(), key=lambda d: -d[1])) for i in range(limit): centroids.append(weighted_points.keys()[i]) return points, centroids
def mapCallings(client, data_dir='data', template_root='templates'): data_fp = FilePath(data_dir) output_root = data_fp.child('output') if not output_root.exists(): output_root.makedirs() jenv = Environment(loader=FileSystemLoader(template_root)) jenv.filters['abbr'] = abbreviateCalling jenv.globals['math'] = math template = jenv.get_template('callingmap.html') #members = client.getRawValue('member_list') callings = client.getRawValue('members_with_callings') no_calling = client.getRawValue('members_without_callings') no_calling = [x for x in no_calling if x['age'] >= 12] # get the groups and subgroups organized into dicts groups = OrderedDict() by_suborg = {} for line in prefOrder: if line.startswith('#'): # heading groups[line[1:].strip()] = OrderedDict() elif line.strip(): subgroup_key = line.strip() groups[groups.keys()[-1]][subgroup_key] = by_suborg[subgroup_key] = [] # put each calling into the right subgroup # also count the number of callings per person calling_counts = defaultdict(lambda:0) for calling in callings: suborg = calling['subOrgType'] or calling['organization'] by_suborg[suborg].append(calling) calling_counts[calling['id']] += 1 fp = output_root.child('callingmap.html') fp.setContent(template.render( orgs=groups, calling_counts=calling_counts, no_calling=no_calling).encode('utf-8')) print 'wrote', fp.path
def qacct_to_dict(text, tasks=False, order_list=None): """text is an output from qaccet -j command. When tasks=True, it splits info into per task dictionaries. Returns an OrderedDict class.""" def getValue(value): if value.isdigit(): value = int(value) else: try: value = float(value) except: pass return value def reorder_dict(d, l): out = OrderedDict() left = [] for key in l: if key in d.keys(): out[key] = d[key] for key in d: if key not in out: out[key] = d[key] return out f = text.split(62*"=") out = OrderedDict() for job in f: j = OrderedDict() job = job.split("\n") for tag in job: tag = tag.strip().split() if len(tag) > 1: j[tag[0]] = getValue(" ".join(tag[1:])) if order_list: j = reorder_dict(j, order_list) if j.keys(): if not tasks: out[str(j['jobnumber'])] = j else: out[".".join([str(j['jobnumber']), str(j['taskid'])])] = j return out
def group_member_export(request, group_slug): """ Export all group members for a specific group """ group = get_object_or_404(Group, slug=group_slug) # if they can edit it, they can export it if not has_perm(request.user, 'user_groups.change_group', group): raise Http403 import xlwt from ordereddict import OrderedDict from django.db import connection # create the excel book and sheet book = xlwt.Workbook(encoding='utf8') sheet = book.add_sheet('Group Members') # the key is what the column will be in the # excel sheet. the value is the database lookup # Used OrderedDict to maintain the column order group_mappings = OrderedDict([ ('user_id', 'au.id'), ('first_name', 'au.first_name'), ('last_name', 'au.last_name'), ('email', 'au.email'), ('receives email', 'pp.direct_mail'), ('company', 'pp.company'), ('address', 'pp.address'), ('address2', 'pp.address2'), ('city', 'pp.city'), ('state', 'pp.state'), ('zipcode', 'pp.zipcode'), ('country', 'pp.country'), ('phone', 'pp.phone'), ('is_active', 'au.is_active'), ('date', 'gm.create_dt'), ]) group_lookups = ','.join(group_mappings.values()) # Use custom sql to fetch the rows because we need to # populate the user profiles information and you # cannot do that with django's ORM without using # profile for each user query # pulling 13,000 group members can be done in one # query using Django's ORM but then you need # 13,000 individual queries :( cursor = connection.cursor() sql = "SELECT %s FROM user_groups_groupmembership gm \ INNER JOIN auth_user au ON (au.id = gm.member_id) \ LEFT OUTER JOIN profiles_profile pp \ on (pp.user_id = gm.member_id) WHERE group_id = %%s;" sql = sql % group_lookups cursor.execute(sql, [group.pk]) values_list = list(cursor.fetchall()) # Append the heading to the list of values that will # go into the excel sheet values_list.insert(0, group_mappings.keys()) # excel date styles default_style = xlwt.Style.default_style datetime_style = xlwt.easyxf(num_format_str='mm/dd/yyyy hh:mm') date_style = xlwt.easyxf(num_format_str='mm/dd/yyyy') if values_list: # Write the data enumerated to the excel sheet for row, row_data in enumerate(values_list): for col, val in enumerate(row_data): # styles the date/time fields if isinstance(val, datetime): style = datetime_style elif isinstance(val, date): style = date_style else: style = default_style sheet.write(row, col, val, style=style) response = HttpResponse(content_type='application/vnd.ms-excel') response[ 'Content-Disposition'] = 'attachment; filename=group_%s_member_export.xls' % group.pk book.save(response) return response
def build_dict(self, res): d = OrderedDict() for i in res: if i['d'] not in d.keys(): d[i['d']] = i['ex'] return d
class FormData(object): implements(IFormData) def __init__(self, data=None): if not data: data = {} super(FormData, self).__init__() self._fields = OrderedDict() self.from_dict(data) def __repr__(self): reprlist = ["FormData:", ""] for field in self._fields.keys(): value = self._fields[field].value # small hack for fields that have a dict as value (files) if isinstance(value, dict): if 'name' in value: value = value['name'] reprlist.append("%s: %s\n" % (field, value)) return "\n".join(reprlist) def __getitem__(self, fieldId): """ Always return something... even if the data isn't there. This allows for a somewhat lax policy in evaluation of requiredness, relevance, etc. """ try: return self._fields[fieldId].value except: return None def __setitem__(self, fieldId, val): """ Item assignment on formdata. Setting the value of a non existing field is NOT an error... """ if not fieldId in self._fields: self._fields[fieldId] = Field(fieldId, val) else: self._fields[fieldId].value = val def getField(self, fieldId): return self._fields.get(fieldId, None) def addField(self, field): self._fields[field.id] = field def getFields(self): return self._fields.keys() def update(self, data, ignore_missing=True): """ Update self with fields from data arg """ for field_id in data.getFields(): field = data.getField(field_id) if self.getField(field_id): self.getField(field_id).value = field.value else: if not ignore_missing: self.addField(Field(field.id, field.value)) def as_dict(self): res = {} for field_id in self._fields.keys(): res[field_id] = self._fields[field_id].value return res def from_dict(self, data=None): """ Set the form fields and values from a dict """ if data: for key, val in data.items(): self[key] = val
class Element(object): def __init__(self, elem_name, primaries, parameter_dict, slurp_flags, fn_list=None, del_fn=None): self.elem_name = elem_name self.parameter = parameter_dict self.primaries = primaries self.writables = OrderedDict() for fn in fn_list: self.writables.update(fn.unique_writables) self.write_fns = dict([(fn.name, fn) for fn in fn_list]) self.del_fn = del_fn self.slurp_flags = slurp_flags def __repr__(self): return '%s, %s, %s, %s' % (self.elem_name, str( self.parameter.keys()), str( self.primaries), str(self.writables.keys())) def required_writables(self): """Return a list of the required writable options""" return self.writables.keys() def required_readables(self): """Return a list of the required readable options. This includes the primaries at the start of the returned list, in the order they are required in and the readable names of the writable options at the end.""" readables = list(self.primaries) for option in self.required_writables(): if option not in readables: readables.append(self.writables[option].read_param) return readables def _slurp(self, req_read_names, sid=-1, flags=None): """Return a iterator of the slurped data""" if not flags: flags = self.slurp_flags if app_settings.USE_CASPY: return caspy_info(self.elem_name, req_read_names, sid=sid, flags=flags) else: return subsystem_info(self.elem_name, req_read_names, sid=sid, flags=flags) slurp = _slurp def needs_transform(self, options): for opt in options: if self.writables[opt].trns_fn: return True def map_func(self, read_opts, options): if self.needs_transform(options): trns = [self.writables[opt_name].trns_fn for opt_name in options] def transform(vals): return [ trns_fn(val) if trns_fn else val for trns_fn, val in zip(trns, vals) ] if len(read_opts) == len(options): # just transform the data: def zipper(options, vals): return zip(options, transform(vals)) else: # Some read data needs to be added. val_ind = [ read_opts.index(self.writables[opt_name].read_param) for opt_name in options ] def zipper(options, vals): return zip(options, transform([vals[ind] for ind in val_ind])) return zipper elif len(read_opts) != len(options): # Some read data needs to be added. val_ind = [ read_opts.index(self.writables[opt_name].read_param) for opt_name in options ] def zipper(options, vals): return zip(options, [vals[ind] for ind in val_ind]) return zipper else: # just a regular zipping of files. return zip
def group_member_export(request, group_slug): """ Export all group members for a specific group """ group = get_object_or_404(Group, slug=group_slug) # if they can edit it, they can export it if not has_perm(request.user,'user_groups.change_group', group): raise Http403 import xlwt from ordereddict import OrderedDict from django.db import connection # create the excel book and sheet book = xlwt.Workbook(encoding='utf8') sheet = book.add_sheet('Group Members') # the key is what the column will be in the # excel sheet. the value is the database lookup # Used OrderedDict to maintain the column order group_mappings = OrderedDict([ ('user_id', 'au.id'), ('first_name', 'au.first_name'), ('last_name', 'au.last_name'), ('email', 'au.email'), ('receives email', 'pp.direct_mail'), ('company', 'pp.company'), ('address', 'pp.address'), ('address2', 'pp.address2'), ('city', 'pp.city'), ('state', 'pp.state'), ('zipcode', 'pp.zipcode'), ('country', 'pp.country'), ('phone', 'pp.phone'), ('is_active', 'au.is_active'), ('date', 'gm.create_dt'), ]) group_lookups = ','.join(group_mappings.values()) # Use custom sql to fetch the rows because we need to # populate the user profiles information and you # cannot do that with django's ORM without using # get_profile() for each user query # pulling 13,000 group members can be done in one # query using Django's ORM but then you need # 13,000 individual queries :( cursor = connection.cursor() sql = "SELECT %s FROM user_groups_groupmembership gm \ INNER JOIN auth_user au ON (au.id = gm.member_id) \ LEFT OUTER JOIN profiles_profile pp \ on (pp.user_id = gm.member_id) WHERE group_id = %%s;" sql = sql % group_lookups cursor.execute(sql, [group.pk]) values_list = list(cursor.fetchall()) # Append the heading to the list of values that will # go into the excel sheet values_list.insert(0, group_mappings.keys()) # excel date styles default_style = xlwt.Style.default_style datetime_style = xlwt.easyxf(num_format_str='mm/dd/yyyy hh:mm') date_style = xlwt.easyxf(num_format_str='mm/dd/yyyy') if values_list: # Write the data enumerated to the excel sheet for row, row_data in enumerate(values_list): for col, val in enumerate(row_data): # styles the date/time fields if isinstance(val, datetime): style = datetime_style elif isinstance(val, date): style = date_style else: style = default_style sheet.write(row, col, val, style=style) response = HttpResponse(mimetype='application/vnd.ms-excel') response['Content-Disposition'] = 'attachment; filename=group_%s_member_export.xls' % group.pk book.save(response) return response
def run_pattern_response(self, cmd_args, out_stream=sys.stdout, verbose=True, prefix=None, postfix=None, pattern_response=None, accept_defaults=False, timeout=1200): """ Run the external command and interact with it using the patter_response dictionary :param timeout: :param accept_defaults: :param cmd_args: command line arguments :param out_stream: stream verbose messages are written to :param verbose: output messages if asserted :param prefix: command line arguments prepended to the given cmd_args :param postfix: command line arguments appended to the given cmd_args :param pattern_response: dictionary whose key is a regular expression pattern that when matched results in the value being sent to the running process. If the value is None, then no response is sent. """ pattern_response_dict = OrderedDict(pattern_response or {}) if accept_defaults: sudo_pattern = 'password for {user}: '.format(user=Project.user) sudo_response = "{password}\r".format(password=Project.password) pattern_response_dict[sudo_pattern] = sudo_response # accept default prompts, don't match "[sudo] " pattern_response_dict[r'\[\S+\](?<!\[sudo\])(?!\S)'] = CR pattern_response_dict[MOVEMENT] = None pattern_response_dict[pexpect.TIMEOUT] = None patterns = list(pattern_response_dict.keys()) patterns.append(self.ssh.PROMPT) args = self.expand_args(cmd_args, prefix=prefix, postfix=postfix) command_line = ' '.join(args) # info("pattern_response_dict => %s" % repr(pattern_response_dict)) # self.display("{line}\n".format(line=command_line), out_stream=out_stream, verbose=verbose) output = [] self.ssh.prompt(timeout=0.1) # clear out any pending prompts self._report(output, out_stream=out_stream, verbose=verbose) self.ssh.sendline(command_line) while True: try: index = self.ssh.expect(patterns) if index == patterns.index(pexpect.TIMEOUT): print("ssh.expect TIMEOUT") else: self._report(output, out_stream=out_stream, verbose=verbose) if index == patterns.index(self.ssh.PROMPT): break key = patterns[index] response = pattern_response_dict[key] if response: sleep(0.1) self.ssh.sendline(response) except pexpect.EOF: self._report(output, out_stream=out_stream, verbose=verbose) break self.ssh.prompt(timeout=0.1) self._report(output, out_stream=out_stream, verbose=verbose) return ''.join(output).splitlines()
class ResourceBuilder(object): """ Helper to create a ressource """ def __init__(self, name=None, required=False): self._name = name self._fields = OrderedDict() self._required = required def add_field(self, field, arg=None, value=None, extended=False, hidden=False, e_type=str, required=None): """Add a new field to the current ResourceBuilder. Keyword arguments: field -- field name arg -- name of the attribute name in arg object (argparse) value -- a default for this field, used for resource creation. extended -- If set to true, the current field will be display in extended list mode only. hidden -- If set to true, the current field won't be exposed as available keys. e_type -- field data type (default str) required -- True if the current field is required for create and update methods """ if required is None: required = self._required if arg is None: arg = re.sub('(?!^)([A-Z]+)', r'_\1', field).lower() self._fields[field] = { 'field': field, 'arg': arg, 'value': value, 'extended': extended, 'required': required, 'e_type': e_type, 'hidden': hidden } def get_keys(self, extended=False): res = [] for field in self._fields.values(): if field['hidden']: continue if not field['extended']: res.append(field['field']) if extended and field['extended']: res.append(field['field']) return res def get_fields(self, extended=False, full=False): res = [] if extended: for field in self._fields.values(): if field['extended']: res.append(field['field']) elif full: for field in self._fields.keys(): res.append(field) else: for field in self._fields.values(): if not field['extended']: res.append(field['field']) return res def set_arg(self, key, arg): field = self._fields.get(key, None) if field is not None: field['arg'] = arg def get_value(self, key): field = self._fields.get(key, None) if field is not None: return field['value'] else: return None def set_value(self, key, value): field = self._fields.get(key, None) if field is not None: field['value'] = value def to_resource(self): ret = {} for field in self._fields.values(): ret[field['field']] = field['value'] return ret def load_from_args(self, namespace): for field in self._fields.values(): value = getattr(namespace, field['arg'], None) if value is not None: field['value'] = value def copy(self, data): if isinstance(data, dict): for field, val in self._fields.items(): val['value'] = data.get(field, "") if isinstance(data, ResourceBuilder): for field, val in self._fields.items(): val['value'] = data[field]['value'] def __str__(self): return json.dumps(self.to_resource(), sort_keys=True, indent=2) def check_required_fields(self): for field in self._fields.values(): if field['required']: value = field['value'] if value is None: raise ValueError("missing value for required field : " + field['field']) e_type = field['e_type'] if e_type == int: int(value) if e_type == float: float(value)
class Book: """ Read an excel book that has mutliple sheets For csv file, there will be just one sheet """ def __init__(self, filename=None, **keywords): """ Book constructor Selecting a specific book according to filename extension """ self.path = "" self.filename = "memory" self.name_array = [] self.sheets = {} if is_string(type(filename)): if filename and os.path.exists(filename): self.load_from(filename, **keywords) elif isinstance(filename, tuple): self.load_from_memory(filename, **keywords) def load_from(self, file, **keywords): """Load content from physical file :param str file: the file name :param any keywords: additional parameters """ path, filename = os.path.split(file) self.path = path self.filename = filename book = load_file(file, **keywords) sheets = book.sheets() self.load_from_sheets(sheets) def load_from_memory(self, the_tuple, **keywords): """Load content from memory content :param tuple the_tuple: first element should be file extension, second element should be file content :param any keywords: additional parameters """ book = load_file(the_tuple, **keywords) sheets = book.sheets() self.load_from_sheets(sheets) def load_from_sheets(self, sheets): """Load content from existing sheets :param dict sheets: a dictionary of sheets. Each sheet is a list of lists """ self.sheets = OrderedDict() for name in sheets.keys(): self.sheets[name] = self.get_sheet(sheets[name], name) self.name_array = list(self.sheets.keys()) def get_sheet(self, array, name): """Create a sheet from a list of lists""" return Sheet(array, name) def __iter__(self): return SheetIterator(self) def number_of_sheets(self): """Return the number of sheets""" return len(self.name_array) def sheet_names(self): """Return all sheet names""" return self.name_array def sheet_by_name(self, name): """Get the sheet with the specified name""" return self.sheets[name] def sheet_by_index(self, index): """Get the sheet with the specified index""" if index < len(self.name_array): sheet_name = self.name_array[index] return self.sheets[sheet_name] def remove_sheet(self, sheet): if isinstance(sheet, int): if sheet < len(self.name_array): sheet_name = self.name_array[sheet] del self.sheets[sheet_name] self.name_array = list(self.sheets.keys()) else: raise IndexError elif isinstance(sheet, str): if sheet in self.name_array: del self.sheets[sheet] self.name_array = list(self.sheets.keys()) else: raise KeyError else: raise TypeError def __getitem__(self, key): if type(key) == int: return self.sheet_by_index(key) else: return self.sheet_by_name(key) def __delitem__(self, other): self.remove_sheet(other) return self def __add__(self, other): """Operator overloading example:: book3 = book1 + book2 book3 = book1 + book2["Sheet 1"] """ content = {} a = to_dict(self) for k in a.keys(): new_key = k if len(a.keys()) == 1: new_key = "%s_%s" % (self.filename, k) content[new_key] = a[k] if isinstance(other, Book): b = to_dict(other) for l in b.keys(): new_key = l if len(b.keys()) == 1: new_key = other.filename if new_key in content: uid = uuid.uuid4().hex new_key = "%s_%s" % (l, uid) content[new_key] = b[l] elif isinstance(other, Sheet): new_key = other.name if new_key in content: uid = uuid.uuid4().hex new_key = "%s_%s" % (other.name, uid) content[new_key] = other.array else: raise TypeError c = Book() c.load_from_sheets(content) return c def __iadd__(self, other): """Operator overloading += example:: book += book2 book += book2["Sheet1"] """ if isinstance(other, Book): names = other.sheet_names() for name in names: new_key = name if len(names) == 1: new_key = other.filename if new_key in self.name_array: uid = uuid.uuid4().hex new_key = "%s_%s" % (name, uid) self.sheets[new_key] = self.get_sheet(other[name].array, new_key) elif isinstance(other, Sheet): new_key = other.name if new_key in self.name_array: uid = uuid.uuid4().hex new_key = "%s_%s" % (other.name, uid) self.sheets[new_key] = self.get_sheet(other.array, new_key) else: raise TypeError self.name_array = list(self.sheets.keys()) return self
class Atoms: "Class to deal with a single frame of an xyz movie" def __init__(self, filename=None, *allocargs, **allockwargs): self._atomsptr = None self.alloc(*allocargs, **allockwargs) if filename is not None: self.read(filename) def alloc(self, n=0, n_int=0, n_real=3, n_str=1, n_logical=0, use_libatoms=False, atomsptr=None, properties=None, \ lattice=numpy.array([[100.,0.,0.],[0.,100.,0.],[0.,0.,100.]]), \ params=ParamReader(),element='Si'): if use_libatoms or atomsptr is not None: if atomsptr is None: self.attach(libatoms.atoms_initialise(n, lattice)) else: self.attach(atomsptr) else: self.n = n self.lattice = lattice self.g = numpy.linalg.inv(self.lattice) self.params = params # Create single property for atomic positions self.real = numpy.zeros((self.n, n_real), dtype=float) self.int = numpy.zeros((self.n, n_int), dtype=int) self.str = numpy.zeros((self.n, n_str), dtype='S10') self.logical = numpy.zeros((self.n, n_logical), dtype=bool) if properties is None: self.properties = OrderedDict({ 'species': ('S', slice(0, 1)), 'pos': ('R', slice(0, 3)) }) else: self.properties = properties self.repoint() def attach(self, atomsptr): self.finalise() self._atomsptr = atomsptr self.n, n_int, n_real, n_str, n_logical, iloc, rloc, sloc, lloc, latticeloc, gloc = \ libatoms.atoms_get_data(self._atomsptr) self.int = arraydata((self.n, n_int), int, iloc) self.real = arraydata((self.n, n_real), float, rloc) self.str = arraydata((self.n, n_str), 'S10', sloc) self.logical = arraydata((self.n, n_logical), bool, sloc) self.lattice = arraydata((3, 3), float, latticeloc) self.g = arraydata((3, 3), float, gloc) self.params = {} property_code_map = {1: 'I', 2: 'R', 3: 'S', 4: 'L'} self.properties = OrderedDict() for i in range(libatoms.atoms_n_properties(self._atomsptr)): key, (code, startcol, stopcol) = libatoms.atoms_nth_property( self._atomsptr, i + 1) self.properties[key.strip()] = (property_code_map[code], slice(startcol - 1, stopcol)) self.repoint() def finalise(self): if self._atomsptr is not None: libatoms.atoms_finalise(self._atomsptr) self._atomsptr = None def __repr__(self): return 'Atoms(n=%d, properties=%s, params=%s, lattice=%s)' % \ (self.n, repr(self.properties), repr(self.params), repr(self.lattice)) def __cmp__(self, other): if other is None: return 1 # Quick checks if (self.n != other.n) or (self.comment() != other.comment()): return 1 # Check if arrays match one by one for this, that in \ (self.lattice, other.lattice), \ (self.real, other.real), (self.int, other.int), \ (self.str, other.str), (self.logical, other.logical): if (not numpy.all(this == that)): return 1 return 0 def update(self, other): "Overwrite contents of this Atoms object with a copy of an other" self.n = other.n self.lattice = other.lattice.copy() self.g = other.g.copy() self.params = other.params.copy() self.properties = other.properties.copy() self.real = other.real[:] self.int = other.int[:] self.str = other.str[:] self.logical = other.logical[:] self.repoint() def add_property(self, name, value, ncols=1): "Add a new property to this Atoms object. Value can be a scalar int or float, or an array." # Scalar int or list of all ints if (type(value) == type(0)) or \ ((type(value) == type([])) and numpy.all(numpy.array(map(type,value)) == type(0))): n_int = self.int.shape[1] intcopy = self.int.copy() self.int = numpy.zeros((self.n, n_int + ncols), dtype=int) self.int[:, :n_int] = intcopy if ncols == 1: self.int[:, n_int] = value else: self.int[:, n_int:n_int + ncols] = value self.properties[name] = ('I', slice(n_int, n_int + ncols)) self.repoint() # Scalar real or list of all reals elif (type(value) == type(0.0)) or \ (type(value) == type([]) and numpy.all(numpy.array(map(type,value)) == type(0.0))): n_real = self.real.shape[1] realcopy = self.real.copy() self.real = numpy.zeros((self.n, n_real + ncols), dtype=float) self.real[:, :n_real] = realcopy if ncols == 1: self.real[:, n_real] = value else: self.real[:, n_real:n_real + ncols] = value self.properties[name] = ('R', slice(n_real, n_real + ncols)) self.repoint() # Scalar string or list of strings elif (type(value) == type('')) or \ ((type(value) == type([])) and numpy.all(numpy.array(map(type,value)) == type(''))): n_str = self.str.shape[1] strcopy = self.str.copy() self.str = numpy.zeros((self.n, n_str + ncols), dtype='S10') self.str[:, :n_str] = strcopy if ncols == 1: self.str[:, n_str] = value else: self.str[:, n_str:n_str + ncols] = value self.properties[name] = ('S', slice(n_str, n_str + ncols)) self.repoint() # Scalar logical or list of logicals elif (type(value) == type(False)) or \ ((type(value) == type([])) and numpy.all(numpy.array(map(type,value)) == type(False))): n_logical = self.logical.shape[1] logicalcopy = self.logical.copy() self.logical = numpy.zeros((self.n, n_logical + ncols), dtype=bool) self.logical[:, :n_logical] = logicalcopy if ncols == 1: self.logical[:, n_logical] = value else: self.logical[:, n_logical:n_logical + ncols] = value self.properties[name] = ('L', slice(n_logical, n_logical + ncols)) self.repoint() # Array type elif type(value) == type(numpy.array([])): if value.shape[0] != self.n: raise ValueError('length of value array (%d) != number of atoms (%d)' % \ (value.shape[0],self.n)) if value.dtype.kind == 'f': try: ncols = value.shape[1] except IndexError: ncols = 1 n_real = self.real.shape[1] realcopy = self.real.copy() self.real = numpy.zeros((self.n, n_real + ncols), dtype=float) self.real[:, :n_real] = realcopy if ncols == 1: self.real[:, n_real] = value.copy() else: self.real[:, n_real:n_real + ncols] = value.copy() self.properties[name] = ('R', slice(n_real, n_real + ncols)) self.repoint() elif value.dtype.kind == 'i': try: ncols = value.shape[1] except IndexError: ncols = 1 n_int = self.int.shape[1] intcopy = self.int.copy() self.int = numpy.zeros((self.n, n_int + ncols), dtype=int) self.int[:, :n_int] = intcopy if ncols == 1: self.int[:, n_int] = value.copy() else: self.int[:, n_int:n_int + ncols] = value.copy() self.properties[name] = ('I', slice(n_int, n_int + ncols)) self.repoint() elif value.dtype.kind == 'S': try: ncols = value.shape[1] except IndexError: ncols = 1 n_str = self.str.shape[1] strcopy = self.str.copy() self.str = numpy.zeros((self.n, n_str + ncols), dtype='S10') self.str[:, :n_str] = strcopy if ncols == 1: self.str[:, n_str] = value.copy() else: self.str[:, n_str:n_str + ncols] = value.copy() self.properties[name] = ('S', slice(n_str, n_str + ncols)) self.repoint() elif value.dtype == numpy.dtype('bool'): try: ncols = value.shape[1] except IndexError: ncols = 1 n_logical = self.logical.shape[1] logicalcopy = self.logical.copy() self.logical = numpy.zeros((self.n, n_logical + ncols), dtype=numpy.dtype('bool')) self.logical[:, :n_logical] = logicalcopy if ncols == 1: self.logical[:, n_logical] = value.copy() else: self.logical[:, n_logical:n_logical + ncols] = value.copy() self.properties[name] = ('S', slice(n_logical, n_logical + ncols)) self.repoint() else: raise ValueError( "Don't know how to add array property of type %r" % value.dtype) else: raise ValueError("Don't know how to add property of type %r" % type(value)) def repoint(self): "Make pointers to columns in real and int" for prop, (ptype, cols) in self.properties.items(): if ptype == 'R': if cols.stop - cols.start == 1: setattr(self, prop, self.real[:, cols.start]) else: setattr(self, prop, self.real[:, cols]) elif ptype == 'I': if cols.stop - cols.start == 1: setattr(self, prop, self.int[:, cols.start]) else: setattr(self, prop, self.int[:, cols]) elif ptype == 'S': if cols.stop - cols.start == 1: setattr(self, prop, self.str[:, cols.start]) else: setattr(self, prop, self.str[:, cols]) elif ptype == 'L': if cols.stop - cols.start == 1: setattr(self, prop, self.logical[:, cols.start]) else: setattr(self, prop, self.logical[:, cols]) else: raise ValueError('Bad property type :' + str(self.properties[prop])) def comment(self, properties=None): "Return the comment line for this Atoms object" if properties is None: props = self.properties.keys() else: props = properties lattice_str = 'Lattice="' + ' '.join( map(str, numpy.reshape(self.lattice, 9))) + '"' props_str = 'Properties=' + ':'.join(map(':'.join, \ zip(props, \ [self.properties[k][0] for k in props], \ [str(self.properties[k][1].stop-self.properties[k][1].start) for k in props]))) return lattice_str + ' ' + props_str + ' ' + str(self.params) def _props_dtype(self, props=None): "Return a record array dtype for the specified properties (default all)" if props is None: props = self.properties.keys() result = [] fmt_map = {'R': 'd', 'I': 'i', 'S': 'S10', 'L': 'bool'} for prop in props: ptype, cols = self.properties[prop] if cols.start == cols.stop - 1: result.append((prop, fmt_map[ptype])) else: for c in range(cols.stop - cols.start): result.append((prop + str(c), fmt_map[ptype])) return numpy.dtype(result) def to_recarray(self, props=None): "Return a record array contains specified properties in order (defaults to all properties)" if props is None: props = self.properties.keys() # Create empty record array with correct dtype data = numpy.zeros(self.n, self._props_dtype(props)) # Copy cols from self.real and self.int into data recarray for prop in props: ptype, cols = self.properties[prop] if ptype == 'R': if cols.start == cols.stop - 1: data[prop] = self.real[:, cols.start] else: for c in range(cols.stop - cols.start): data[prop + str(c)] = self.real[:, cols.start + c] elif ptype == 'I': if cols.start == cols.stop - 1: data[prop] = self.int[:, cols.start] else: for c in range(cols.stop - cols.start): data[prop + str(c)] = self.int[:, cols.start + c] elif ptype == 'S': if cols.start == cols.stop - 1: data[prop] = self.str[:, cols.start] else: for c in range(cols.stop - cols.start): data[prop + str(c)] = self.str[:, cols.start + c] elif ptype == 'L': if cols.start == cols.stop - 1: data[prop] = self.logical[:, cols.start] else: for c in range(cols.stop - cols.start): data[prop + str(c)] = self.logical[:, cols.start + c] else: raise ValueError('Bad property type :' + str(self.properties[prop][1])) return data def update_from_recarray(self, data, props=None): """Update Atoms data from a record array. By default all properties are updated; use the props argument to update only a subset""" if props is None: props = self.properties.keys() if data.dtype != self._props_dtype(props) or data.shape != (self.n, ): raise ValueError('Data shape is incorrect') # Copy cols from data recarray into self.real and self.int for prop in props: ptype, cols = self.properties[prop] if ptype == 'R': if cols.start == cols.stop - 1: self.real[:, cols.start] = data[prop] else: for c in range(cols.stop - cols.start): self.real[:, cols.start + c] = data[prop + str(c)] elif ptype == 'I': if cols.start == cols.stop - 1: self.int[:, cols.start] = data[prop] else: for c in range(cols.stop - cols.start): self.int[:, cols.start + c] = data[prop + str(c)] elif ptype == 'S': if cols.start == cols.stop - 1: self.str[:, cols.start] = data[prop] else: for c in range(cols.stop - cols.start): self.str[:, cols.start + c] = data[prop + str(c)] elif ptype == 'L': if cols.start == cols.stop - 1: self.logical[:, cols.start] = data[prop] else: for c in range(cols.stop - cols.start): self.logical[:, cols.start + c] = data[prop + str(c)] else: raise ValueError('Bad property type :' + str(self.properties[prop][1])) def read_xyz(self, xyz): "Read from extended XYZ filename or open file." opened = False if type(xyz) == type(''): xyz = open(xyz, 'r') opened = True line = xyz.next() if not line: return False n = int(line.strip()) comment = (xyz.next()).strip() # Parse comment line params = ParamReader(comment) if not 'Properties' in params: raise ValueError('Properties missing from comment line') properties, n_int, n_real, n_str, n_logical = _parse_properties( params['Properties']) del params['Properties'] # Get lattice if not 'Lattice' in params: raise ValueError('No lattice found in xyz file') lattice = numpy.reshape(params['Lattice'], (3, 3)) del params['Lattice'] self.alloc(n=n,lattice=lattice,properties=properties,params=params,\ n_int=n_int,n_real=n_real,n_str=n_str,n_logical=n_logical) props_dtype = self._props_dtype() converters = [_getconv(props_dtype.fields[name][0]) \ for name in props_dtype.names] X = [] for i, line in enumerate(xyz): vals = line.split() row = tuple([converters[j](val) for j, val in enumerate(vals)]) X.append(row) if i == self.n - 1: break # Only read self.n lines try: data = numpy.array(X, props_dtype) except TypeError: raise IOError('End of file reached before end of frame') if opened: xyz.close() try: self.update_from_recarray(data) except ValueError: # got a partial frame, must be end of file return False else: return True def read_netcdf(self, fname, frame=0): from pupynere import netcdf_file nc = netcdf_file(fname) self.n = nc.dimensions['atom'] self.lattice = make_lattice(nc.variables['cell_lengths'][frame], nc.variables['cell_angles'][frame]) self.g = numpy.linalg.inv(self.lattice) self.params = OrderedDict() self.properties = OrderedDict() self.real = numpy.zeros((self.n, 0), dtype=float) self.int = numpy.zeros((self.n, 0), dtype=int) self.str = numpy.zeros((self.n, 0), dtype='S10') self.logical = numpy.zeros((self.n, 0), dtype=bool) vars = nc.variables.keys() vars = filter(lambda v: not v in ('cell_angles', 'cell_lengths'), vars) # ensure first var is species and second positions sp = vars.index('species') if sp != 0: vars[sp], vars[0] = vars[0], vars[sp] pos = vars.index('coordinates') if pos != 1: vars[pos], vars[1] = vars[1], vars[pos] for v in vars: d = nc.variables[v].dimensions if d[0] != 'frame': continue value = nc.variables[v][frame] if value.dtype == numpy.dtype('|S1'): value = [''.join(x).strip() for x in value] if len(d) == 1 or (len(d) == 2 and d[1] in ('label', 'string')): if (len(d) == 2 and d[1] in ('label', 'string')): value = ''.join(value) self.params[v] = value else: # Name mangling if v == 'coordinates': p = 'pos' elif v == 'velocities': p = 'velo' else: p = v value = nc.variables[v][frame] if value.dtype == numpy.dtype('|S1'): value = [''.join(x).strip() for x in value] self.add_property(p, value) def write_xyz(self, xyz=sys.stdout, properties=None): "Write atoms in extended XYZ format. xyz can be a filename or open file" if properties is None: # Sort by original order props = self.properties.keys() else: props = properties species = getattr(self, props[0]) if len(species.shape) != 1 or species.dtype.kind != 'S': raise ValueError('First property must be species like') pos = getattr(self, props[1]) if pos.shape[1] != 3 or pos.dtype.kind != 'f': raise ValueError('Second property must be position like') data = self.to_recarray(props) format = ''.join( [_getfmt(data.dtype.fields[name][0]) for name in data.dtype.names]) + '\n' opened = False if type(xyz) == type(''): xyz = open(xyz, 'w') opened = True xyz.write('%d\n' % self.n) xyz.write(self.comment(properties) + '\n') for i in range(self.n): xyz.write(format % tuple(data[i])) if opened: xyz.close() def read_cell(self, cell): "Read atoms from a CastepCell object or file" if hasattr(cell, 'next'): # looks like a file cell = castep.CastepCell(cell) self.update(cell.to_atoms()) def write_cell(self, fname): "Write Atoms to a cell file" cell = castep.CastepCell() cell.update_from_atoms(self) cell.write(fname) def read_geom(self, geom): "Read from a CASTEP .geom file" self.update(castep.read_geom(geom)) def read_castep(self, castepfile): "Read from a .castep output file" if self.n != 0: self.update( castep.read_castep_output(castepfile, self, abort=False)) else: self.update(castep.read_castep_output(castepfile, abort=False)) def read(self, fname, filetype=None): "Attempt to guess type of file from extension and call appropriate read method" opened = False if type(fname) == type(''): if fname.endswith('.gz'): import gzip fh = gzip.open(fname) fname = fname[:-3] # remove .gz elif fname.endswith('.nc'): fh = fname else: fh = open(fname, 'r') opened = True # Guess file type from extension if filetype is None: root, filetype = os.path.splitext(fname) filetype = filetype[1:] # remove '.' else: fh = fname # Default to xyz format if not filetype in ['cell', 'geom', 'xyz', 'castep', 'nc']: filetype = 'xyz' if filetype == 'xyz': self.read_xyz(fh) elif filetype == 'cell': self.read_cell(fh) elif filetype == 'geom': self.read_geom(fh) elif filetype == 'castep': self.read_castep(fh) elif filetype == 'nc': self.read_netcdf(fh) if opened: fh.close() def write(self, fname, filetype=None): opened = False if type(fname) == type(''): if fname.endswith('.gz'): import gzip fh = gzip.open(fname, 'w') fname = fname[:-3] # remove .gz else: fh = open(fname, 'w') # Guess file type from extension if filetype is None: root, filetype = os.path.splitext(fname) filetype = filetype[1:] # remove '.' opened = True else: fh = fname # Default to xyz format if not filetype in ['xyz', 'cfg', 'cell']: filetype = 'xyz' if filetype == 'xyz': self.write_xyz(fh) elif filetype == 'cfg': self.write_cfg(fh) elif filetype == 'cell': self.write_cell(fh) if opened: fh.close() def write_cfg(self, cfg=sys.stdout, shift=numpy.array([0., 0., 0.]), properties=None): """Write atoms in AtomEye extended CFG format. Returns a list of auxiliary properties actually written to CFG file, which may be abbreviated compared to those requested since AtomEye has a maximum of 32 aux props.""" opened = False if type(cfg) == type(''): cfg = open(cfg, 'w') opened = True if properties is None: properties = self.properties.keys() # Header line cfg.write('Number of particles = %d\n' % self.n) cfg.write('# ' + self.comment(properties) + '\n') # Lattice vectors for i in 0, 1, 2: for j in 0, 1, 2: cfg.write('H0(%d,%d) = %16.8f\n' % (i + 1, j + 1, self.lattice[i, j])) cfg.write('.NO_VELOCITY.\n') # Check first property is position-like species = getattr(self, properties[0]) if len(species.shape) != 1 or species.dtype.kind != 'S': raise ValueError('First property must be species like') pos = getattr(self, properties[1]) if pos.shape[1] != 3 or pos.dtype.kind != 'f': raise ValueError('Second property must be position like') if not self.properties.has_key('frac_pos'): self.add_property('frac_pos', 0.0, ncols=3) self.frac_pos[:] = numpy.array( [numpy.dot(pos[i, :], self.g) + shift for i in range(self.n)]) if not self.properties.has_key('mass'): self.add_property('mass', map(ElementMass.get, self.species)) properties = filter( lambda p: p not in ('pos', 'frac_pos', 'mass', 'species'), properties) # AtomEye can handle a maximum of 32 columns, so we might have to throw away # some of the less interesting propeeties def count_cols(): n_aux = 0 for p in properties: s = getattr(self, p).shape if len(s) == 1: n_aux += 1 else: n_aux += s[1] return n_aux boring_properties = ['travel', 'avgpos', 'oldpos', 'acc', 'velo'] while count_cols() > 32: if len(boring_properties) == 0: raise ValueError('No boring properties left!') try: next_most_boring = boring_properties.pop(0) del properties[properties.index(next_most_boring)] except IndexError: pass # this boring property isn't in the list: move on to next properties = ['species', 'mass', 'frac_pos'] + properties data = self.to_recarray(properties) cfg.write('entry_count = %d\n' % (len(data.dtype.names) - 2)) # 3 lines per atom: element name, mass and other data format = '%s\n%12.4f\n' for i, name in enumerate(data.dtype.names[2:]): if i > 2: cfg.write('auxiliary[%d] = %s\n' % (i - 3, name)) format = format + _getfmt(data.dtype.fields[name][0]) format = format + '\n' for i in range(self.n): cfg.write(format % tuple(data[i])) if opened: cfg.close() # Return column names as a list return list(data.dtype.names) def filter(self, mask): "Return smaller Atoms with only the elements where mask is true" other = Atoms() if mask is None: mask = numpy.zeros((self.n, ), numpy.bool) mask[:] = True other.n = count(mask) other.lattice = self.lattice.copy() other.g = self.g.copy() other.params = self.params.copy() other.properties = self.properties.copy() other.real = self.real[mask] other.int = self.int[mask] other.str = self.str[mask] other.logical = self.logical[mask] other.repoint() return other def copy(self): if self.n == 0: return Atoms() else: return self.filter(mask=None) def add(self, newpos, newspecies): if type(newpos) == type([]): newpos = numpy.array(newpos) if len(newpos.shape) == 1: n_new = 1 else: n_new = newpos.shape[0] oldn = self.n self.n = self.n + n_new self.real = numpy.resize(self.real, (self.n, self.real.shape[1])) self.int = numpy.resize(self.int, (self.n, self.int.shape[1])) self.str = numpy.resize(self.str, (self.n, self.str.shape[1])) self.logical = numpy.resize(self.logical, (self.n, self.logical.shape[1])) self.repoint() self.pos[oldn:self.n] = newpos self.species[oldn:self.n] = newspecies def remove(self, discard): keep = [i for i in range(self.n) if not i in discard] self.n = len(keep) self.real = self.real[keep] self.int = self.int[keep] self.str = self.str[keep] self.logical = self.logical[keep] self.repoint() def supercell(self, n1, n2, n3): other = Atoms(n=self.n*n1*n2*n3,n_int=self.int.shape[1],\ n_real=self.real.shape[1], \ properties=self.properties.copy()) other.lattice[0, :] = self.lattice[0, :] * n1 other.lattice[1, :] = self.lattice[1, :] * n2 other.lattice[2, :] = self.lattice[2, :] * n3 other.g = numpy.linalg.inv(other.lattice) for i in range(n1): for j in range(n2): for k in range(n3): p = numpy.dot(self.lattice, numpy.array([i, j, k])) for n in range(self.n): nn = ((i * n2 + j) * n3 + k) * self.n + n other.int[nn, :] = self.int[n, :] other.real[nn, :] = self.real[n, :] other.logical[nn, :] = self.logical[n, :] other.str[nn, :] = self.str[n, :] other.pos[nn, :] = self.pos[n, :] + p other.repoint() return other def cell_volume(self): return abs( numpy.dot(numpy.cross(self.lattice[0, :], self.lattice[1, :]), self.lattice[2, :]))
build_Tree() index += 1 if len(Tindices2keep_set) == len(i_iminus1_pool_dict.keys()): print "NO TOCSY INDEX GROUPS WERE REMOVED FROM CONNECTIVITY FILE !!!" sys.exit(0) for Tindex in i_iminus1_pool_dict.keys(): if Tindex not in Tindices2keep_set: print "Deleting TOCSY index group", Tindex, " from connectivities pool file." del i_iminus1_pool_dict[Tindex] del i_iminus1_complete_dict[Tindex] del iaaindex_iminus1aaTypesProbTupleList_dict[Tindex] del iaaindex_iminus1aaTypesProbPoolTupleList_dict[Tindex] for Tindex in iaaindex_iminus1aaTypesProbPoolTupleList_dict.keys(): if args.KEEP_ONLY_GLY: duplet_list = iaaindex_iminus1aaTypesProbPoolTupleList_dict[Tindex] if len(duplet_list) > 1 and duplet_list[0][0] == 'GLY' and duplet_list[0][1] >= 100 * duplet_list[1][1]: print "Setting amino acid type of TOCSY index group ", Tindex, " to ", [duplet_list[0]] iaaindex_iminus1aaTypesProbPoolTupleList_dict[Tindex] = [duplet_list[0]] if args.KEEP_ONLY_ALA: duplet_list = iaaindex_iminus1aaTypesProbPoolTupleList_dict[Tindex] if len(duplet_list) > 1 and duplet_list[0][0] == 'ALA' and duplet_list[0][1] >= 100 * duplet_list[1][1]: print "Setting amino acid type of TOCSY index group ", Tindex, " to ", [duplet_list[0]] iaaindex_iminus1aaTypesProbPoolTupleList_dict[Tindex] = [duplet_list[0]] elif args.MAXIMUM_OCCUPANCY_TOLERANCE != None: # KEEP ALL CONNECTIVITIES (not only those forming long chains) Tindex_maxOccupancy_dict = {} for triplet_list in i_iminus1_pool_dict.values():
for qline_list in sorted_query_lineLists_list: current_resid = qline_list[0].replace('?-?-', '').replace('N-H', '') if current_resid != previous_resid: Tindex_CSlist_dict[current_resid] = [] Tindex_CSlist_dict[current_resid].append((qline_list[1:])) previous_resid = current_resid return Tindex_CSlist_dict Tindex_CSlist_dict = read_spectrum_file(args.TOCSY_fname) residue_CSlist_dict = OrderedDict( ) # ordereddict with keys the resname+resid -> the list of the associated H-C-N-HN resonances. E.g. patched_residues_list = [ ] # list with the residues that were added by -patch option in the chain_linker.py (they have no TOCSY peaks but have NOESY) for residue in residue2Tindex_dict.keys(): print "DEBUG: residue=", residue if residue2Tindex_dict[residue] == None: print "DEBUG: residue2Tindex_dict[residue]=", residue2Tindex_dict[ residue] continue try: residue_CSlist_dict[residue] = Tindex_CSlist_dict[ residue2Tindex_dict[residue]] except KeyError: # in case this residue from the alignment has no TOCSY peaks (was added using -patch in the chain_linker.py), save it patched_residues_list.append(residue) continue print "DEBUG: residue_CSlist_dict =", residue_CSlist_dict print "DEBUG: patched_residues_list=", patched_residues_list NOESY_residue_CSlist_dict = OrderedDict(
class HasParameters(object): """This class provides an implementation of the IHasParameters interface.""" _do_not_promote = [ 'get_expr_depends', 'get_referenced_compnames', 'get_referenced_varpaths', 'get_metadata' ] def __init__(self, parent): self._parameters = OrderedDict() self._allowed_types = ['continuous'] if obj_has_interface(parent, ISolver): self._allowed_types.append('unbounded') self._parent = None if parent is None else weakref.ref(parent) def __getstate__(self): state = self.__dict__.copy() state['_parent'] = self.parent return state def __setstate__(self, state): self.__dict__.update(state) parent = state['_parent'] self._parent = None if parent is None else weakref.ref(parent) @property def parent(self): """ The object we are a delegate of. """ return None if self._parent is None else self._parent() def _item_count(self): """This is used by the replace function to determine if a delegate from the target object is 'empty' or not. If it's empty, it's not an error if the replacing object doesn't have this delegate. """ return len(self._parameters) def add_parameter(self, target, low=None, high=None, scaler=None, adder=None, start=None, fd_step=None, name=None, scope=None): """Adds a parameter or group of parameters to the driver. target: string or iter of strings or Parameter What the driver should vary during execution. A *target* is an expression that can reside on the left-hand side of an assignment statement, so typically it will be the name of a variable or possibly a subscript expression indicating an entry within an array variable, e.g., x[3]. If an iterator of targets is given, then the driver will set all targets given to the same value whenever it varies this parameter during execution. If a Parameter instance is given, then that instance is copied into the driver with any other arguments specified, overiding the values in the given parameter. low: float (optional) Minimum allowed value of the parameter. If scaler and/or adder is supplied, use the transformed value here. If target is an array, this may also be an array, but must have the same size. high: float (optional) Maximum allowed value of the parameter. If scaler and/or adder is supplied, use the transformed value here. If target is an array, this may also be an array, but must have the same size. scaler: float (optional) Value to multiply the possibly offset parameter value by. If target is an array, this may also be an array, but must have the same size. adder: float (optional) Value to add to parameter prior to possible scaling. If target is an array, this may also be an array, but must have the same size. start: any (optional) Value to set into the target or targets of a parameter before starting any executions. If not given, analysis will start with whatever values are in the target or targets at that time. If target is an array, this may also be an array, but must have the same size. fd_step: float (optional) Step-size to use for finite difference calculation. If no value is given, the differentiator will use its own default. If target is an array, this may also be an array, but must have the same size. name: str (optional) Name used to refer to the parameter in place of the name of the variable referred to in the parameter string. This is sometimes useful if, for example, multiple entries in the same array variable are declared as parameters. scope: object (optional) The object to be used as the scope when evaluating the expression. If neither "low" nor "high" is specified, the min and max will default to the values in the metadata of the variable being referenced. """ if isinstance(target, (ParameterBase, ParameterGroup)): self._parameters[target.name] = target target.override(low, high, scaler, adder, start, fd_step, name) else: if isinstance(target, basestring): names = [target] key = target else: names = target key = tuple(target) if name is not None: key = name dups = set(self.list_param_targets()).intersection(names) if len(dups) == 1: self.parent.raise_exception( "'%s' is already a Parameter" " target" % dups.pop(), ValueError) elif len(dups) > 1: self.parent.raise_exception( "%s are already Parameter targets" % sorted(list(dups)), ValueError) if key in self._parameters: self.parent.raise_exception("%s is already a Parameter" % key, ValueError) try: _scope = self._get_scope(scope) if len(names) == 1: target = self._create(names[0], low, high, scaler, adder, start, fd_step, key, _scope) else: # defining a ParameterGroup parameters = [ self._create(n, low, high, scaler, adder, start, fd_step, key, _scope) for n in names ] types = set([p.valtypename for p in parameters]) if len(types) > 1: raise ValueError("Can't add parameter %s because " "%s are not all of the same type" % (key, " and ".join(names))) target = ParameterGroup(parameters) self._parameters[key] = target except Exception: self.parent.reraise_exception(info=sys.exc_info()) self.parent.config_changed() def _create(self, target, low, high, scaler, adder, start, fd_step, key, scope): """ Create one Parameter or ArrayParameter. """ try: expreval = ExprEvaluator(target, scope) except Exception as err: raise err.__class__("Can't add parameter: %s" % err) if not expreval.is_valid_assignee(): raise ValueError("Can't add parameter: '%s' is not a" " valid parameter expression" % expreval.text) try: val = expreval.evaluate() except Exception as err: val = None # Let Parameter code sort out why. name = key[0] if isinstance(key, tuple) else key if isinstance(val, ndarray): return ArrayParameter(target, low=low, high=high, scaler=scaler, adder=adder, start=start, fd_step=fd_step, name=name, scope=scope, _expreval=expreval, _val=val, _allowed_types=self._allowed_types) else: return Parameter(target, low=low, high=high, scaler=scaler, adder=adder, start=start, fd_step=fd_step, name=name, scope=scope, _expreval=expreval, _val=val, _allowed_types=self._allowed_types) def remove_parameter(self, name): """Removes the parameter with the given name.""" param = self._parameters.get(name) if param: del self._parameters[name] else: self.parent.raise_exception( "Trying to remove parameter '%s' " "that is not in this driver." % (name, ), AttributeError) self.parent.config_changed() def config_parameters(self): """Reconfigure parameters from potentially changed targets.""" for param in self._parameters.values(): param.configure() def get_references(self, name): """Return references to component `name` in preparation for subsequent :meth:`restore_references` call. name: string Name of component being removed. """ refs = OrderedDict() for pname, param in self._parameters.items(): if name in param.get_referenced_compnames(): refs[pname] = param return refs def remove_references(self, name): """Remove references to component `name`. name: string Name of component being removed. """ to_remove = [] for pname, param in self._parameters.items(): if name in param.get_referenced_compnames(): to_remove.append(pname) for pname in to_remove: self.remove_parameter(pname) def restore_references(self, refs): """Restore references to component `name` from `refs`. refs: object Value returned by :meth:`get_references`. """ for pname, param in refs.items(): try: self.add_parameter(param) except Exception as err: self.parent._logger.warning( "Couldn't restore parameter '%s': %s" % (pname, str(err))) def list_param_targets(self): """Returns a list of parameter targets. Note that this list may contain more entries than the list of Parameter, ParameterGroup, and ArrayParameter objects since ParameterGroup instances have multiple targets. """ targets = [] for param in self._parameters.values(): targets.extend(param.targets) return targets def list_param_group_targets(self): """Returns a list of tuples that contain the targets for each parameter group. """ targets = [] for param in self.get_parameters().values(): targets.append(tuple(param.targets)) return targets def clear_parameters(self): """Removes all parameters.""" for name in self._parameters.keys(): self.remove_parameter(name) self._parameters = OrderedDict() def get_parameters(self): """Returns an ordered dict of parameter objects.""" return self._parameters def total_parameters(self): """Returns the total number of values to be set.""" return sum([param.size for param in self._parameters.values()]) def init_parameters(self): """Sets all parameters to their start value if a start value is given """ scope = self._get_scope() for param in self._parameters.itervalues(): if param.start is not None: param.set(param.start, scope) def set_parameter_by_name(self, name, value, case=None, scope=None):
def main(): try: sys.stderr.write(g_program_name + ", version " + __version__ + ", " + __date__ + "\n") if sys.version < '2.6': raise Exception( 'Error: Using python ' + sys.version + '\n' + ' Alas, your version of python is too old.\n' ' You must upgrade to a newer version of python (2.6 or later).' ) if sys.version < '2.7': from ordereddict import OrderedDict else: from collections import OrderedDict if sys.version > '3': import io else: import cStringIO # defaults: ffname = "TINKER_FORCE_FIELD" type_subset = Set([]) filename_in = "" file_in = sys.stdin pair_style_name = "lj/cut/coul/long" pair_style_link = "http://lammps.sandia.gov/doc/pair_lj.html" bond_style_name = "harmonic" bond_style_link = "http://lammps.sandia.gov/doc/bond_harmonic.html" angle_style_name = "harmonic" angle_style_link = "http://lammps.sandia.gov/doc/angle_harmonic.html" dihedral_style_name = "fourier" dihedral_style_link = "http://lammps.sandia.gov/doc/dihedral_fourier.html" improper_style_name = "harmonic" improper_style_link = "http://lammps.sandia.gov/doc/improper_harmonic.html" #improper_style_name = "cvff" #improper_style_link = "http://lammps.sandia.gov/doc/improper_cvff.html" special_bonds_command = "special_bonds lj/coul 0.0 0.0 0.5" mixing_style = "geometric" use_hybrid = False contains_united_atoms = False zeropad_ffid = 1 argv = [arg for arg in sys.argv] i = 1 while i < len(argv): #sys.stderr.write('argv['+str(i)+'] = \"'+argv[i]+'\"\n') if argv[i] == '-atoms': if i + 1 >= len(argv): raise Exception( 'Error: the \"' + argv[i] + '\" argument should be followed by a quoted string\n' ' which contains a space-delimited list of of a subset of atom types\n' ' you want to use from the original force-field.\n' ' Make sure you enclose the entire list in quotes.\n' ) type_subset = Set(argv[i + 1].strip('\"\'').strip().split()) del argv[i:i + 2] elif argv[i] == '-name': if i + 1 >= len(argv): raise Exception( 'Error: ' + argv[i] + ' flag should be followed by the name of the force-field\n' ) ffname = argv[i + 1] del argv[i:i + 2] elif argv[i] in ('-file', '-in-file'): if i + 1 >= len(argv): raise Exception( 'Error: ' + argv[i] + ' flag should be followed by the name of a force-field file\n' ) filename_in = argv[i + 1] try: file_in = open(filename_in, 'r') except IOError: sys.stderr.write('Error: Unable to open file\n' ' \"' + filename_in + '\"\n' ' for reading.\n') sys.exit(1) del argv[i:i + 2] elif argv[i] == '-dihedral-style': if i + 1 >= len(argv): raise Exception( 'Error: ' + argv[i] + ' flag should be followed by either \"opls\" or \"fourier\"\n' ) dihedral_style_name = argv[i + 1] if dihedral_style_name == "fourier": dihedral_style_link = "http://lammps.sandia.gov/doc/dihedral_fourier.html" if dihedral_style_name == "opls": dihedral_style_link = "http://lammps.sandia.gov/doc/dihedral_opls.html" else: raise Exception('Error: ' + argv[i] + ' ' + dihedral_style_name + ' not supported.\n') del argv[i:i + 2] elif argv[i] in ('-url', '-in-url'): import urllib2 if i + 1 >= len(argv): raise InputError( 'Error: ' + argv[i] + ' flag should be followed by a URL pointing to\n' ' a TINKER file containing force-field information.\n') url = argv[i + 1] try: request = urllib2.Request(url) file_in = urllib2.urlopen(request) except urllib2.URLError: sys.stdout.write("Error: Unable to open link:\n" + url + "\n") sys.exit(1) del argv[i:i + 2] elif argv[i] == '-hybrid': use_hybrid = True del argv[i:i + 1] elif (argv[i] == '-zeropad' or argv[i] == '-zero-pad'): if (i + 1 >= len(argv)) or (argv[i + 1][1:] == '-'): raise Exception( 'Error: ' + argv[i] + ' flag should be followed by a positive integer\n') zeropad_ffid = int(argv[i + 1]) del argv[i:i + 2] elif argv[i] in ('-help', '--help', '-?', '--?'): sys.stderr.write(doc_msg) sys.exit(0) del argv[i:i + 1] else: i += 1 if len(argv) != 1: raise Exception('Error: Unrecongized arguments: ' + ' '.join(argv[1:]) + '\n\n' + doc_msg) #sys.stderr.write("Reading parameter file...\n") lines = file_in.readlines() atom2charge = OrderedDict() # lookup charge from atom type atom2mass = OrderedDict() # lookup mass from atom type atom2vdw_e = OrderedDict() # lookup Lennard-Jones "epsilon" parameter atom2vdw_s = OrderedDict() # lookup Lennard-Jones "sigma" parameter atom2descr = OrderedDict() atom2ffid = OrderedDict() # lookup force-field-ID from atom type # force-field-ID is an id number/string used to assign # bonds, angles, dihedrals, and impropers. bonds_by_type = OrderedDict( ) # lookup bond parameters by force-field-ID angles_by_type = OrderedDict( ) # lookup angle parameters by force-field-ID dihedrals_by_type = OrderedDict( ) # lookup dihedral parameters by force-field-ID impropers_by_type = OrderedDict( ) # lookup improper parameters by force-field-ID lines_ureybrad = [] lines_biotype = [] for iline in range(0, len(lines)): line = lines[iline] tokens = SplitQuotedString(line.strip(), comment_char='#') if (len(tokens) > 1) and (tokens[0] == 'atom'): tokens = map(RemoveOuterQuotes, SplitQuotedString(line.strip(), comment_char='')) if (len(tokens) > 6): if ((len(type_subset) == 0) or (tokens[1] in type_subset)): atom2ffid[tokens[1]] = tokens[2] #atom2mass[tokens[1]] = float(tokens[6]) # Some atoms in oplsaa.prm have zero mass. Unfortunately this # causes LAMMPS to crash, even if these atoms are never used, # so I give the mass a non-zero value instead. atom2mass[tokens[1]] = max(float(tokens[6]), 1e-30) atom2descr[tokens[1]] = tokens[4] if tokens[4].find('(UA)') != -1: contains_united_atoms = True else: raise Exception('Error: Invalid atom line:\n' + line) elif (len(tokens) > 2) and (tokens[0] == 'charge'): if ((len(type_subset) == 0) or (tokens[1] in type_subset)): atom2charge[tokens[1]] = float(tokens[2]) elif (len(tokens) > 3) and (tokens[0] == 'vdw'): if ((len(type_subset) == 0) or (tokens[1] in type_subset)): atom2vdw_e[tokens[1]] = float(tokens[3]) # "epsilon" atom2vdw_s[tokens[1]] = float(tokens[2]) # "sigma" elif (len(tokens) > 4) and (tokens[0] == 'bond'): k = float(tokens[3]) r0 = float(tokens[4]) bonds_by_type[tokens[1].rjust(zeropad_ffid, '0'), tokens[2].rjust(zeropad_ffid, '0')] = (k, r0) elif (len(tokens) > 5) and (tokens[0] == 'angle'): k = float(tokens[4]) angle0 = float(tokens[5]) angles_by_type[tokens[1].rjust(zeropad_ffid, '0'), tokens[2].rjust(zeropad_ffid, '0'), tokens[3].rjust(zeropad_ffid, '0')] = (k, angle0) elif (len(tokens) > 11) and (tokens[0] == 'torsion'): if dihedral_style_name == 'fourier': # http://lammps.sandia.gov/doc/dihedral_fourier.html m = (len(tokens) - 5) / 3 K = [0.0 for i in range(0, m)] n = [0.0 for i in range(0, m)] d = [0.0 for i in range(0, m)] for i in range(0, m): K[i] = float(tokens[5 + 3 * i]) d[i] = float(tokens[5 + 3 * i + 1]) n[i] = float(tokens[5 + 3 * i + 2]) dihedrals_by_type[tokens[1].rjust(zeropad_ffid, '0'), tokens[2].rjust(zeropad_ffid, '0'), tokens[3].rjust(zeropad_ffid, '0'), tokens[4].rjust(zeropad_ffid, '0')] = (K, n, d) elif dihedral_style_name == 'opls': # http://lammps.sandia.gov/doc/dihedral_opls.html K1 = float(tokens[5]) K2 = float(tokens[8]) K3 = float(tokens[11]) K4 = 0.0 if len(tokens) > 14: K4 = float(tokens[14]) if ((float(tokens[6]) != 0.0) or (float(tokens[7]) != 1.0) or (float(tokens[9]) not in (180.0, -180.0)) or (float(tokens[10]) != 2.0) or (float(tokens[12]) != 0.0) or (float(tokens[13]) != 3.0) or ((K4 != 0.0) and ((len(tokens) <= 16) or (float(tokens[15]) not in (180.0, -180.0)) or (float(tokens[16]) != 4.0)))): raise Exception( "Error: This parameter file is incompatible with -dihedral-style \"" + dihedral_style_name + "\"\n" + " (See line number " + str(iline + 1) + " of parameter file.)\n") dihedrals_by_type[tokens[1].rjust(zeropad_ffid, '0'), tokens[2].rjust(zeropad_ffid, '0'), tokens[3].rjust(zeropad_ffid, '0'), tokens[4].rjust(zeropad_ffid, '0')] = ( K1, K2, K3, K4) else: assert (False) elif (len(tokens) > 7) and (tokens[0] == 'imptors'): k = float(tokens[5]) angle0 = float(tokens[6]) multiplicity = float(tokens[7]) impropers_by_type[tokens[1].rjust(zeropad_ffid, '0'), tokens[2].rjust(zeropad_ffid, '0'), tokens[3].rjust(zeropad_ffid, '0'), tokens[4].rjust(zeropad_ffid, '0')] = ( k / multiplicity, angle0) elif ((len(tokens) > 0) and (tokens[0] == 'biotype')): # I'm not sure what to do with these, so I'll store them for now and # append them as comments to the .lt file generated by the program. lines_biotype.append(line.rstrip()) elif ((len(tokens) > 0) and (tokens[0] == 'ureybrad')): # I'm not sure what to do with these, so I'll store them for now and # append them as comments to the .lt file generated by the program. lines_ureybrad.append(line.rstrip()) elif ((len(tokens) > 1) and (tokens[0] == 'radiusrule')): if tokens[1] == 'GEOMETRIC': mixing_style = 'geometric' elif tokens[1] == 'ARITHMETIC': mixing_style = 'arithmetic' else: raise Exception("Error: Unrecognized mixing style: " + tokens[1] + ", found here:\n" + line) elif ((len(tokens) > 1) and (tokens[0] == 'epsilonrule')): if tokens[1] != 'GEOMETRIC': raise Exception( "Error: As of 2016-9-21, LAMMPS only supports GEOMETRIC mixing of energies\n" + " This force field simply cannot be used with LAMMPS in a general way.\n" + " One way around this is to manually change the \"epsilonrule\" back to\n" + " GEOMETRIC, and limit the number of atom types considered by this\n" + " program by using the -atoms \"LIST OF ATOMS\" argument,\n" + " to only include the atoms you care about, and then explicitly\n" + " define pair_coeffs for all possible pairs of these atom types.\n" + " If this is a popular force-field, then lobby the LAMMPS developers\n" + " to consider alternate mixing rules.\n\n" + "The offending line from the file is line number " + str(iline) + ":\n" + line + "\n") # Zero-pad the atom2ffid values so that they have the same number # of digits. This is usually not necessary, but it can be helpful # to remove uncertainty about the meaning of '4*' which could # pattern match with '4', '4L', '47', '47L'... If you replace '4' # with '04', '04*' becomes distinguishable from '47*'. # This can be useful if you want to augment the force field later, # (for example, adding additional atoms to the LOPLSAA variant of OPLSAA) for k in atom2ffid.keys(): atom2ffid[k] = atom2ffid[k].rjust(zeropad_ffid, '0') # Horrible hack: for LOPLSAA, uncomment the next 3 lines: #ki = atom2ffid[k].find('L') #if ki!=-1: # atom2ffid[k] = atom2ffid[k].rjust(zeropad_ffid + len(atom2ffid[k]) - ki, '0') #sys.stderr.write(" done.\n") #sys.stderr.write("Converting to moltemplate format...\n") system_is_charged = False for atom_type in atom2charge: if atom2charge[atom_type] != 0.0: system_is_charged = True if system_is_charged: pair_style_name = "lj/cut/coul/long" pair_style_params = "10.0 10.0" kspace_style = " kspace_style pppm 0.0001\n" pair_style_link = "http://lammps.sandia.gov/doc/pair_lj.html" else: pair_style_name = "lj/cut" pair_style_params = "10.0" kspace_style = "" pair_style_link = "http://lammps.sandia.gov/doc/pair_lj.html" pair_style_command = " pair_style " + ("hybrid " if use_hybrid else "") + \ pair_style_name + " " + pair_style_params + "\n" sys.stdout.write("# This file was generated automatically using:\n") sys.stdout.write("# " + g_program_name + " " + " ".join(sys.argv[1:]) + "\n") if contains_united_atoms: sys.stdout.write( "#\n" "# WARNING: Many of these atoms are probably UNITED-ATOM (UA) atoms.\n" "# The hydrogen atoms have been absorbed into the heavy atoms, and the\n" "# force-field modified accordingly. Do not mix with ordinary atoms.\n" ) sys.stdout.write( "#\n" "# WARNING: The following 1-2, 1-3, and 1-4 weighting parameters were ASSUMED:\n" ) sys.stdout.write("# " + special_bonds_command + "\n") sys.stdout.write( "# (See http://lammps.sandia.gov/doc/special_bonds.html for details)\n" ) if len(lines_ureybrad) > 0: sys.stdout.write( "#\n" "# WARNING: All Urey-Bradley interactions have been IGNORED including:\n" ) sys.stdout.write( "# ffid1 ffid2 ffid3 K r0\n# ") sys.stdout.write("\n# ".join(lines_ureybrad)) sys.stdout.write("\n\n") sys.stdout.write("\n\n") sys.stdout.write(ffname + " {\n\n") sys.stdout.write( " # Below we will use lammps \"set\" command to assign atom charges\n" " # by atom type. http://lammps.sandia.gov/doc/set.html\n\n") sys.stdout.write(" write_once(\"In Charges\") {\n") for atype in atom2mass: assert (atype in atom2descr) sys.stdout.write(" set type @atom:" + atype + " charge " + str(atom2charge[atype]) + " # \"" + atom2descr[atype] + "\"\n") sys.stdout.write(" } #(end of atom partial charges)\n\n\n") sys.stdout.write(" write_once(\"Data Masses\") {\n") for atype in atom2mass: sys.stdout.write(" @atom:" + atype + " " + str(atom2mass[atype]) + "\n") sys.stdout.write(" } #(end of atom masses)\n\n\n") sys.stdout.write( " # ---------- EQUIVALENCE CATEGORIES for bonded interaction lookup ----------\n" " # Each type of atom has a separate ID used for looking up bond parameters\n" " # and a separate ID for looking up 3-body angle interaction parameters\n" " # and a separate ID for looking up 4-body dihedral interaction parameters\n" " # and a separate ID for looking up 4-body improper interaction parameters\n" #" # (This is because there are several different types of sp3 carbon atoms\n" #" # which have the same torsional properties when within an alkane molecule,\n" #" # for example. If they share the same dihedral-ID, then this frees us\n" #" # from being forced define separate dihedral interaction parameters\n" #" # for all of them.)\n" " # The complete @atom type name includes ALL of these ID numbers. There's\n" " # no need to force the end-user to type the complete name of each atom.\n" " # The \"replace\" command used below informs moltemplate that the short\n" " # @atom names we have been using abovee are equivalent to the complete\n" " # @atom names used below:\n\n") for atype in atom2ffid: ffid = atype + "_ffid" + atom2ffid[atype] sys.stdout.write(" replace{ @atom:" + atype + " @atom:" + atype + "_b" + atom2ffid[atype] + "_a" + atom2ffid[atype] + "_d" + atom2ffid[atype] + "_i" + atom2ffid[atype] + " }\n") sys.stdout.write("\n\n\n\n") sys.stdout.write( " # --------------- Non-Bonded interactions: ---------------------\n" " # " + pair_style_link + "\n" " # Syntax:\n" " # pair_coeff AtomType1 AtomType2 " + ("PairStyleName " if use_hybrid else "") + "parameters...\n\n") sys.stdout.write(" write_once(\"In Settings\") {\n") for atype in atom2vdw_e: assert (atype in atom2vdw_s) if not (atype in atom2ffid): continue sys.stdout.write(" pair_coeff " + "@atom:" + atype + "_b" + atom2ffid[atype] + "_a" + atom2ffid[atype] + "_d" + atom2ffid[atype] + "_i" + atom2ffid[atype] + " " "@atom:" + atype + "_b" + atom2ffid[atype] + "_a" + atom2ffid[atype] + "_d" + atom2ffid[atype] + "_i" + atom2ffid[atype] + " " + (pair_style_name if use_hybrid else "") + " " + str(atom2vdw_e[atype]) + " " + str(atom2vdw_s[atype]) + "\n") sys.stdout.write(" } #(end of pair_coeffs)\n\n\n\n") sys.stdout.write(" # ------- Bonded Interactions: -------\n" " # " + bond_style_link + "\n" " # Syntax: \n" " # bond_coeff BondTypeName " + ("BondStyleName " if use_hybrid else "") + "parameters...\n\n") sys.stdout.write(" write_once(\"In Settings\") {\n") for btype in bonds_by_type: ffid1 = btype[0] if btype[0] != ("0" * zeropad_ffid) else "X" ffid2 = btype[1] if btype[1] != ("0" * zeropad_ffid) else "X" (k, r0) = bonds_by_type[btype] sys.stdout.write(" bond_coeff @bond:" + ffid1 + "_" + ffid2 + " " + (bond_style_name if use_hybrid else "") + " " + str(k) + " " + str(r0) + "\n") sys.stdout.write(" } #(end of bond_coeffs)\n\n") sys.stdout.write(" # Rules for assigning bond types by atom type:\n" " # BondTypeName AtomType1 AtomType2\n" " # (* = wildcard)\n\n") sys.stdout.write(" write_once(\"Data Bonds By Type\") {\n") for btype in bonds_by_type: ffid1 = btype[0] if btype[0] != ("0" * zeropad_ffid) else "X" ffid2 = btype[1] if btype[1] != ("0" * zeropad_ffid) else "X" sys.stdout.write(" @bond:" + ffid1 + "_" + ffid2) ffid1 = "@atom:*_b" + btype[0] + \ "*_a*_d*_i*" if btype[0] != ("0"*zeropad_ffid) else "@atom:*" ffid2 = "@atom:*_b" + btype[1] + \ "*_a*_d*_i*" if btype[1] != ("0"*zeropad_ffid) else "@atom:*" sys.stdout.write(" " + ffid1 + " " + ffid2 + "\n") sys.stdout.write(" } #(end of bonds by type)\n\n\n\n\n") sys.stdout.write(" # ------- Angle Interactions: -------\n" " # " + angle_style_link + "\n" " # Syntax: \n" " # angle_coeff AngleTypeName " + ("AngleStyleName " if use_hybrid else "") + "parameters...\n\n") sys.stdout.write(" write_once(\"In Settings\") {\n") for atype in angles_by_type: ffid1 = atype[0] if atype[0] != ("0" * zeropad_ffid) else "X" ffid2 = atype[1] if atype[1] != ("0" * zeropad_ffid) else "X" ffid3 = atype[2] if atype[2] != ("0" * zeropad_ffid) else "X" (k, angle0) = angles_by_type[atype] sys.stdout.write(" angle_coeff @angle:" + ffid1 + "_" + ffid2 + "_" + ffid3 + " " + (angle_style_name if use_hybrid else "") + " " + str(k) + " " + str(angle0) + "\n") sys.stdout.write(" } #(end of angle_coeffs)\n\n") sys.stdout.write( " # Rules for creating angle interactions according to atom type:\n" " # AngleTypeName AtomType1 AtomType2 AtomType3\n" " # (* = wildcard)\n\n") sys.stdout.write(" write_once(\"Data Angles By Type\") {\n") for atype in angles_by_type: ffid1 = atype[0] if atype[0] != ("0" * zeropad_ffid) else "X" ffid2 = atype[1] if atype[1] != ("0" * zeropad_ffid) else "X" ffid3 = atype[2] if atype[2] != ("0" * zeropad_ffid) else "X" sys.stdout.write(" @angle:" + ffid1 + "_" + ffid2 + "_" + ffid3) ffid1 = "@atom:*_b*_a" + atype[0] + \ "*_d*_i*" if atype[0] != ("0"*zeropad_ffid) else "@atom:*" ffid2 = "@atom:*_b*_a" + atype[1] + \ "*_d*_i*" if atype[1] != ("0"*zeropad_ffid) else "@atom:*" ffid3 = "@atom:*_b*_a" + atype[2] + \ "*_d*_i*" if atype[2] != ("0"*zeropad_ffid) else "@atom:*" sys.stdout.write(" " + ffid1 + " " + ffid2 + " " + ffid3 + "\n") sys.stdout.write(" } #(end of angles by type)\n\n\n\n\n") sys.stdout.write( " # ----------- Dihedral Interactions: ------------\n" " # " + dihedral_style_link + "\n" " # Syntax:\n" " # dihedral_coeff DihedralTypeName " + ("DihedralStyleName " if use_hybrid else "") + "parameters...\n\n") sys.stdout.write(" write_once(\"In Settings\") {\n") for dtype in dihedrals_by_type: ffid1 = dtype[0] if dtype[0] != ("0" * zeropad_ffid) else "X" ffid2 = dtype[1] if dtype[1] != ("0" * zeropad_ffid) else "X" ffid3 = dtype[2] if dtype[2] != ("0" * zeropad_ffid) else "X" ffid4 = dtype[3] if dtype[3] != ("0" * zeropad_ffid) else "X" sys.stdout.write(" dihedral_coeff @dihedral:" + ffid1 + "_" + ffid2 + "_" + ffid3 + "_" + ffid4 + " " + (dihedral_style_name if use_hybrid else "") + " ") if dihedral_style_name == 'fourier': # http://lammps.sandia.gov/doc/dihedral_fourier.html (K, n, d) = dihedrals_by_type[dtype] m = len(K) assert ((m == len(n)) and (m == len(d))) sys.stdout.write(str(m)) for i in range(0, m): sys.stdout.write(" " + str(K[i]) + " " + str(n[i]) + " " + str(d[i])) sys.stdout.write("\n") elif dihedral_style_name == 'opls': # http://lammps.sandia.gov/doc/dihedral_opls.html (K1, K2, K3, K4) = dihedrals_by_type[dtype] sys.stdout.write( str(K1) + " " + str(K2) + " " + str(K3) + " " + str(K4) + "\n") else: assert (False) sys.stdout.write(" } #(end of dihedral_coeffs)\n\n") sys.stdout.write( " # Rules for creating dihedral interactions according to atom type:\n" " # DihedralTypeName AtomType1 AtomType2 AtomType3 AtomType4\n" " # (* = wildcard)\n\n") sys.stdout.write(" write_once(\"Data Dihedrals By Type\") {\n") for dtype in dihedrals_by_type: ffid1 = dtype[0] if dtype[0] != ("0" * zeropad_ffid) else "X" ffid2 = dtype[1] if dtype[1] != ("0" * zeropad_ffid) else "X" ffid3 = dtype[2] if dtype[2] != ("0" * zeropad_ffid) else "X" ffid4 = dtype[3] if dtype[3] != ("0" * zeropad_ffid) else "X" sys.stdout.write(" @dihedral:" + ffid1 + "_" + ffid2 + "_" + ffid3 + "_" + ffid4) ffid1 = "@atom:*_b*_a*_d" + dtype[0] + \ "*_i*" if dtype[0] != ("0"*zeropad_ffid) else "@atom:*" ffid2 = "@atom:*_b*_a*_d" + dtype[1] + \ "*_i*" if dtype[1] != ("0"*zeropad_ffid) else "@atom:*" ffid3 = "@atom:*_b*_a*_d" + dtype[2] + \ "*_i*" if dtype[2] != ("0"*zeropad_ffid) else "@atom:*" ffid4 = "@atom:*_b*_a*_d" + dtype[3] + \ "*_i*" if dtype[3] != ("0"*zeropad_ffid) else "@atom:*" sys.stdout.write(" " + ffid1 + " " + ffid2 + " " + ffid3 + " " + ffid4 + "\n") sys.stdout.write(" } #(end of dihedrals by type)\n\n\n\n\n") sys.stdout.write(" # ---------- Improper Interactions: ----------\n" " # " + improper_style_link + "\n" " # Syntax:\n" " # improper_coeff ImproperTypeName " + ("ImproperStyleName " if use_hybrid else "") + "parameters\n\n") sys.stdout.write(" write_once(\"In Settings\") {\n") for itype in impropers_by_type: ffid1 = itype[0] if itype[0] != ("0" * zeropad_ffid) else "X" ffid2 = itype[1] if itype[1] != ("0" * zeropad_ffid) else "X" ffid3 = itype[2] if itype[2] != ("0" * zeropad_ffid) else "X" ffid4 = itype[3] if itype[3] != ("0" * zeropad_ffid) else "X" (k, angle0) = impropers_by_type[itype] sys.stdout.write(" improper_coeff @improper:" + ffid1 + "_" + ffid2 + "_" + ffid3 + "_" + ffid4 + " " + (improper_style_name if use_hybrid else "") + " " + str(k) + " " + str(angle0) + "\n") sys.stdout.write(" } #(end of improper_coeffs)\n\n") sys.stdout.write( " # Rules for creating improper interactions according to atom type:\n" " # ImproperTypeName AtomType1 AtomType2 AtomType3 AtomType4\n" " # (* = wildcard)\n") sys.stdout.write( " write_once(\"Data Impropers By Type (opls_imp.py)\") {\n") for itype in impropers_by_type: ffid1 = itype[0] if itype[0] != ("0" * zeropad_ffid) else "X" ffid2 = itype[1] if itype[1] != ("0" * zeropad_ffid) else "X" ffid3 = itype[2] if itype[2] != ("0" * zeropad_ffid) else "X" ffid4 = itype[3] if itype[3] != ("0" * zeropad_ffid) else "X" sys.stdout.write(" @improper:" + ffid1 + "_" + ffid2 + "_" + ffid3 + "_" + ffid4) ffid1 = "@atom:*_b*_a*_d*_i" + itype[0] + "*" if itype[0] != ( "0" * zeropad_ffid) else "@atom:*" ffid2 = "@atom:*_b*_a*_d*_i" + itype[1] + "*" if itype[1] != ( "0" * zeropad_ffid) else "@atom:*" ffid3 = "@atom:*_b*_a*_d*_i" + itype[2] + "*" if itype[2] != ( "0" * zeropad_ffid) else "@atom:*" ffid4 = "@atom:*_b*_a*_d*_i" + itype[3] + "*" if itype[3] != ( "0" * zeropad_ffid) else "@atom:*" sys.stdout.write(" " + ffid1 + " " + ffid2 + " " + ffid3 + " " + ffid4 + "\n") sys.stdout.write(" } #(end of impropers by type)\n\n\n\n\n") sys.stdout.write(" # -------- (descriptive comment) --------\n") sys.stdout.write( " # ---- biologically relevant atom types: ----\n # ") sys.stdout.write("\n # ".join(lines_biotype)) sys.stdout.write("\n # ---------- (end of comment) ----------\n") sys.stdout.write("\n\n\n\n") sys.stdout.write( " # LAMMPS supports many different kinds of bonded and non-bonded\n" " # interactions which can be selected at run time. Eventually\n" " # we must inform LAMMPS which of them we will need. We specify\n" " # this in the \"In Init\" section: \n\n") sys.stdout.write(" write_once(\"In Init\") {\n") sys.stdout.write(" units real\n") sys.stdout.write(" atom_style full\n") sys.stdout.write(" bond_style " + ("hybrid " if use_hybrid else "") + bond_style_name + "\n") sys.stdout.write(" angle_style " + ("hybrid " if use_hybrid else "") + angle_style_name + "\n") sys.stdout.write(" dihedral_style " + ("hybrid " if use_hybrid else "") + dihedral_style_name + "\n") sys.stdout.write(" improper_style " + ("hybrid " if use_hybrid else "") + improper_style_name + "\n") sys.stdout.write(pair_style_command) sys.stdout.write(" pair_modify mix " + mixing_style + "\n") sys.stdout.write(" " + special_bonds_command + "\n") sys.stdout.write(kspace_style) sys.stdout.write(" } #end of init parameters\n\n") sys.stdout.write( " # Note: We use \"hybrid\" styles in case the user later wishes to\n" " # combine the molecules built using this force-field with other\n" " # molecules that use other styles. (This is not necessarily\n" " # a good idea, but LAMMPS and moltemplate both allow it.)\n" " # For more information:\n" " # http://lammps.sandia.gov/doc/pair_hybrid.html\n" " # http://lammps.sandia.gov/doc/bond_hybrid.html\n" " # http://lammps.sandia.gov/doc/angle_hybrid.html\n" " # http://lammps.sandia.gov/doc/dihedral_hybrid.html\n" " # http://lammps.sandia.gov/doc/improper_hybrid.html\n\n\n" ) sys.stdout.write("} # " + ffname + "\n\n") #sys.stderr.write(" done.\n") if filename_in != "": file_in.close() except Exception as err: sys.stderr.write('\n\n' + str(err) + '\n') sys.exit(1)
class Api(object): """ The main entry point for the application. You need to initialize it with a Flask Application: :: >>> app = Flask(__name__) >>> api = restful.Api(app) Alternatively, you can use :meth:`init_app` to set the Flask application after it has been constructed. :param app: the Flask application object :type app: flask.Flask :type app: flask.Blueprint :param prefix: Prefix all routes with a value, eg v1 or 2010-04-01 :type prefix: str :param default_mediatype: The default media type to return :type default_mediatype: str :param decorators: Decorators to attach to every resource :type decorators: list :param catch_all_404s: Use :meth:`handle_error` to handle 404 errors throughout your app :param serve_challenge_on_401: Whether to serve a challenge response to clients on receiving 401. This usually leads to a username/password popup in web browers. :param url_part_order: A string that controls the order that the pieces of the url are concatenated when the full url is constructed. 'b' is the blueprint (or blueprint registration) prefix, 'a' is the api prefix, and 'e' is the path component the endpoint is added with :type catch_all_404s: bool :param errors: A dictionary to define a custom response for each exception or error raised during a request :type errors: dict """ def __init__(self, app=None, prefix='', default_mediatype='application/json', decorators=None, catch_all_404s=False, serve_challenge_on_401=False, url_part_order='bae', errors=None): self.representations = OrderedDict(DEFAULT_REPRESENTATIONS) self.urls = {} self.prefix = prefix self.default_mediatype = default_mediatype self.decorators = decorators if decorators else [] self.catch_all_404s = catch_all_404s self.serve_challenge_on_401 = serve_challenge_on_401 self.url_part_order = url_part_order self.errors = errors or {} self.blueprint_setup = None self.endpoints = set() self.resources = [] self.app = None self.blueprint = None if app is not None: self.app = app self.init_app(app) def init_app(self, app): """Initialize this class with the given :class:`flask.Flask` application or :class:`flask.Blueprint` object. :param app: the Flask application or blueprint object :type app: flask.Flask :type app: flask.Blueprint Examples:: api = Api() api.add_resource(...) api.init_app(app) """ # If app is a blueprint, defer the initialization try: app.record(self._deferred_blueprint_init) # Flask.Blueprint has a 'record' attribute, Flask.Api does not except AttributeError: self._init_app(app) else: self.blueprint = app def _complete_url(self, url_part, registration_prefix): """This method is used to defer the construction of the final url in the case that the Api is created with a Blueprint. :param url_part: The part of the url the endpoint is registered with :param registration_prefix: The part of the url contributed by the blueprint. Generally speaking, BlueprintSetupState.url_prefix """ parts = { 'b': registration_prefix, 'a': self.prefix, 'e': url_part } return ''.join(parts[key] for key in self.url_part_order if parts[key]) @staticmethod def _blueprint_setup_add_url_rule_patch(blueprint_setup, rule, endpoint=None, view_func=None, **options): """Method used to patch BlueprintSetupState.add_url_rule for setup state instance corresponding to this Api instance. Exists primarily to enable _complete_url's function. :param blueprint_setup: The BlueprintSetupState instance (self) :param rule: A string or callable that takes a string and returns a string(_complete_url) that is the url rule for the endpoint being registered :param endpoint: See BlueprintSetupState.add_url_rule :param view_func: See BlueprintSetupState.add_url_rule :param **options: See BlueprintSetupState.add_url_rule """ if callable(rule): rule = rule(blueprint_setup.url_prefix) elif blueprint_setup.url_prefix: rule = blueprint_setup.url_prefix + rule options.setdefault('subdomain', blueprint_setup.subdomain) if endpoint is None: endpoint = _endpoint_from_view_func(view_func) defaults = blueprint_setup.url_defaults if 'defaults' in options: defaults = dict(defaults, **options.pop('defaults')) blueprint_setup.app.add_url_rule(rule, '%s.%s' % (blueprint_setup.blueprint.name, endpoint), view_func, defaults=defaults, **options) def _deferred_blueprint_init(self, setup_state): """Synchronize prefix between blueprint/api and registration options, then perform initialization with setup_state.app :class:`flask.Flask` object. When a :class:`flask_restbolt.Api` object is initialized with a blueprint, this method is recorded on the blueprint to be run when the blueprint is later registered to a :class:`flask.Flask` object. This method also monkeypatches BlueprintSetupState.add_url_rule with _blueprint_setup_add_url_rule_patch. :param setup_state: The setup state object passed to deferred functions during blueprint registration :type setup_state: flask.blueprints.BlueprintSetupState """ self.blueprint_setup = setup_state if setup_state.add_url_rule.__name__ != '_blueprint_setup_add_url_rule_patch': setup_state._original_add_url_rule = setup_state.add_url_rule setup_state.add_url_rule = MethodType(Api._blueprint_setup_add_url_rule_patch, setup_state) if not setup_state.first_registration: raise ValueError('flask-restful blueprints can only be registered once.') self._init_app(setup_state.app) def _init_app(self, app): """Perform initialization actions with the given :class:`flask.Flask` object. :param app: The flask application object :type app: flask.Flask """ app.handle_exception = partial(self.error_router, app.handle_exception) app.handle_user_exception = partial(self.error_router, app.handle_user_exception) if len(self.resources) > 0: for resource, urls, kwargs in self.resources: self._register_view(app, resource, *urls, **kwargs) def owns_endpoint(self, endpoint): """Tests if an endpoint name (not path) belongs to this Api. Takes in to account the Blueprint name part of the endpoint name. :param endpoint: The name of the endpoint being checked :return: bool """ if self.blueprint: if endpoint.startswith(self.blueprint.name): endpoint = endpoint.split(self.blueprint.name + '.', 1)[-1] else: return False return endpoint in self.endpoints def _should_use_fr_error_handler(self): """ Determine if error should be handled with FR or default Flask The goal is to return Flask error handlers for non-FR-related routes, and FR errors (with the correct media type) for FR endpoints. This method currently handles 404 and 405 errors. :return: bool """ adapter = current_app.create_url_adapter(request) try: adapter.match() except MethodNotAllowed as e: # Check if the other HTTP methods at this url would hit the Api valid_route_method = e.valid_methods[0] rule, _ = adapter.match(method=valid_route_method, return_rule=True) return self.owns_endpoint(rule.endpoint) except NotFound: return self.catch_all_404s except: # Werkzeug throws other kinds of exceptions, such as Redirect pass def _has_fr_route(self): """Encapsulating the rules for whether the request was to a Flask endpoint""" # 404's, 405's, which might not have a url_rule if self._should_use_fr_error_handler(): return True # for all other errors, just check if FR dispatched the route if not request.url_rule: return False return self.owns_endpoint(request.url_rule.endpoint) def error_router(self, original_handler, e): """This function decides whether the error occured in a flask-restful endpoint or not. If it happened in a flask-restful endpoint, our handler will be dispatched. If it happened in an unrelated view, the app's original error handler will be dispatched. In the event that the error occurred in a flask-restful endpoint but the local handler can't resolve the situation, the router will fall back onto the original_handler as last resort. :param original_handler: the original Flask error handler for the app :type original_handler: function :param e: the exception raised while handling the request :type e: Exception """ if self._has_fr_route(): try: return self.handle_error(e) except Exception: pass # Fall through to original handler return original_handler(e) def handle_error(self, e): """Error handler for the API transforms a raised exception into a Flask response, with the appropriate HTTP status code and body. :param e: the raised Exception object :type e: Exception """ got_request_exception.send(current_app._get_current_object(), exception=e) if not isinstance(e, HTTPException) and current_app.propagate_exceptions: exc_type, exc_value, tb = sys.exc_info() if exc_value is e: raise else: raise e headers = Headers() if isinstance(e, HTTPException): code = e.code default_data = { 'message': getattr(e, 'description', http_status_message(code)) } headers = e.get_response().headers else: code = 500 default_data = { 'message': http_status_message(code), } # Werkzeug exceptions generate a content-length header which is added # to the response in addition to the actual content-length header # https://github.com/flask-restful/flask-restful/issues/534 remove_headers = ('Content-Length',) for header in remove_headers: headers.pop(header, None) data = getattr(e, 'data', default_data) if code >= 500: exc_info = sys.exc_info() if exc_info[1] is None: exc_info = None current_app.log_exception(exc_info) help_on_404 = current_app.config.get("ERROR_404_HELP", True) if code == 404 and help_on_404: rules = dict([(re.sub('(<.*>)', '', rule.rule), rule.rule) for rule in current_app.url_map.iter_rules()]) close_matches = difflib.get_close_matches(request.path, rules.keys()) if close_matches: # If we already have a message, add punctuation and continue it. if "message" in data: data["message"] = data["message"].rstrip('.') + '. ' else: data["message"] = "" data['message'] += 'You have requested this URI [' + request.path + \ '] but did you mean ' + \ ' or '.join(( rules[match] for match in close_matches) ) + ' ?' error_cls_name = type(e).__name__ if error_cls_name in self.errors: custom_data = self.errors.get(error_cls_name, {}) code = custom_data.get('status', 500) data.update(custom_data) if code == 406 and self.default_mediatype is None: # if we are handling NotAcceptable (406), make sure that # make_response uses a representation we support as the # default mediatype (so that make_response doesn't throw # another NotAcceptable error). supported_mediatypes = list(self.representations.keys()) fallback_mediatype = supported_mediatypes[0] if supported_mediatypes else "text/plain" resp = self.make_response( data, code, headers, fallback_mediatype = fallback_mediatype ) else: resp = self.make_response(data, code, headers) if code == 401: resp = self.unauthorized(resp) return resp def mediatypes_method(self): """Return a method that returns a list of mediatypes """ return lambda resource_cls: self.mediatypes() + [self.default_mediatype] def add_resource(self, resource, *urls, **kwargs): """Adds a resource to the api. :param resource: the class name of your resource :type resource: :class:`Resource` :param urls: one or more url routes to match for the resource, standard flask routing rules apply. Any url variables will be passed to the resource method as args. :type urls: str :param endpoint: endpoint name (defaults to :meth:`Resource.__name__.lower` Can be used to reference this route in :class:`fields.Url` fields :type endpoint: str :param resource_class_args: args to be forwarded to the constructor of the resource. :type resource_class_args: tuple :param resource_class_kwargs: kwargs to be forwarded to the constructor of the resource. :type resource_class_kwargs: dict Additional keyword arguments not specified above will be passed as-is to :meth:`flask.Flask.add_url_rule`. Examples:: api.add_resource(HelloWorld, '/', '/hello') api.add_resource(Foo, '/foo', endpoint="foo") api.add_resource(FooSpecial, '/special/foo', endpoint="foo") """ if self.app is not None: self._register_view(self.app, resource, *urls, **kwargs) else: self.resources.append((resource, urls, kwargs)) def resource(self, *urls, **kwargs): """Wraps a :class:`~flask_restbolt.Resource` class, adding it to the api. Parameters are the same as :meth:`~flask_restbolt.Api.add_resource`. Example:: app = Flask(__name__) api = restful.Api(app) @api.resource('/foo') class Foo(Resource): def get(self): return 'Hello, World!' """ def decorator(cls): self.add_resource(cls, *urls, **kwargs) return cls return decorator def _register_view(self, app, resource, *urls, **kwargs): endpoint = kwargs.pop('endpoint', None) or resource.__name__.lower() self.endpoints.add(endpoint) resource_class_args = kwargs.pop('resource_class_args', ()) resource_class_kwargs = kwargs.pop('resource_class_kwargs', {}) # NOTE: 'view_functions' is cleaned up from Blueprint class in Flask 1.0 if endpoint in getattr(app, 'view_functions', {}): previous_view_class = app.view_functions[endpoint].__dict__['view_class'] # if you override the endpoint with a different class, avoid the collision by raising an exception if previous_view_class != resource: raise ValueError('This endpoint (%s) is already set to the class %s.' % (endpoint, previous_view_class.__name__)) resource.mediatypes = self.mediatypes_method() # Hacky resource.endpoint = endpoint resource_func = self.output(resource.as_view(endpoint, *resource_class_args, **resource_class_kwargs)) for decorator in self.decorators: resource_func = decorator(resource_func) for url in urls: # If this Api has a blueprint if self.blueprint: # And this Api has been setup if self.blueprint_setup: # Set the rule to a string directly, as the blueprint is already # set up. self.blueprint_setup.add_url_rule(url, view_func=resource_func, **kwargs) continue else: # Set the rule to a function that expects the blueprint prefix # to construct the final url. Allows deferment of url finalization # in the case that the associated Blueprint has not yet been # registered to an application, so we can wait for the registration # prefix rule = partial(self._complete_url, url) else: # If we've got no Blueprint, just build a url with no prefix rule = self._complete_url(url, '') # Add the url to the application or blueprint app.add_url_rule(rule, view_func=resource_func, **kwargs) def output(self, resource): """Wraps a resource (as a flask view function), for cases where the resource does not directly return a response object :param resource: The resource as a flask view function """ @wraps(resource) def wrapper(*args, **kwargs): resp = resource(*args, **kwargs) if isinstance(resp, ResponseBase): # There may be a better way to test return resp data, code, headers = unpack(resp) return self.make_response(data, code, headers=headers) return wrapper def url_for(self, resource, **values): """Generates a URL to the given resource. Works like :func:`flask.url_for`.""" endpoint = resource.endpoint if self.blueprint: endpoint = '{0}.{1}'.format(self.blueprint.name, endpoint) return url_for(endpoint, **values) def make_response(self, data, *args, **kwargs): """Looks up the representation transformer for the requested media type, invoking the transformer to create a response object. This defaults to default_mediatype if no transformer is found for the requested mediatype. If default_mediatype is None, a 406 Not Acceptable response will be sent as per RFC 2616 section 14.1 :param data: Python object containing response data to be transformed """ default_mediatype = kwargs.pop('fallback_mediatype', None) or self.default_mediatype mediatype = request.accept_mimetypes.best_match( self.representations, default=default_mediatype, ) if mediatype is None: raise NotAcceptable() if mediatype in self.representations: resp = self.representations[mediatype](data, *args, **kwargs) resp.headers['Content-Type'] = mediatype return resp elif mediatype == 'text/plain': resp = original_flask_make_response(str(data), *args, **kwargs) resp.headers['Content-Type'] = 'text/plain' return resp else: raise InternalServerError() def mediatypes(self): """Returns a list of requested mediatypes sent in the Accept header""" return [h for h, q in sorted(request.accept_mimetypes, key=operator.itemgetter(1), reverse=True)] def representation(self, mediatype): """Allows additional representation transformers to be declared for the api. Transformers are functions that must be decorated with this method, passing the mediatype the transformer represents. Three arguments are passed to the transformer: * The data to be represented in the response body * The http status code * A dictionary of headers The transformer should convert the data appropriately for the mediatype and return a Flask response object. Ex:: @api.representation('application/xml') def xml(data, code, headers): resp = make_response(convert_data_to_xml(data), code) resp.headers.extend(headers) return resp """ def wrapper(func): self.representations[mediatype] = func return func return wrapper def unauthorized(self, response): """ Given a response, change it to ask for credentials """ if self.serve_challenge_on_401: realm = current_app.config.get("HTTP_BASIC_AUTH_REALM", "flask-restful") challenge = u"{0} realm=\"{1}\"".format("Basic", realm) response.headers['WWW-Authenticate'] = challenge return response
def group_all_export(request, group_slug): """ Export all group members for a specific group """ group = get_object_or_404(Group, slug=group_slug) # if they can edit it, they can export it if not has_perm(request.user, 'user_groups.change_group', group): raise Http403 import xlwt from ordereddict import OrderedDict from django.db import connection from tendenci.apps.forms_builder.forms.models import FieldEntry # create the excel book and sheet book = xlwt.Workbook(encoding='utf8') sheet = book.add_sheet('Group Members and Subscribers') #initialize indexes row_index = {} col_index = {} #--------- # MEMBERS #--------- # excel date styles default_style = xlwt.Style.default_style datetime_style = xlwt.easyxf(num_format_str='mm/dd/yyyy hh:mm') date_style = xlwt.easyxf(num_format_str='mm/dd/yyyy') # the key is what the column will be in the # excel sheet. the value is the database lookup # Used OrderedDict to maintain the column order group_mappings = OrderedDict([ ('user_id', 'au.id'), ('first_name', 'au.first_name'), ('last_name', 'au.last_name'), ('email', 'au.email'), ('receives email', 'pp.direct_mail'), ('company', 'pp.company'), ('address', 'pp.address'), ('address2', 'pp.address2'), ('city', 'pp.city'), ('state', 'pp.state'), ('zipcode', 'pp.zipcode'), ('country', 'pp.country'), ('phone', 'pp.phone'), ('is_active', 'au.is_active'), ('date', 'gm.create_dt'), ]) group_lookups = ','.join(group_mappings.values()) # Use custom sql to fetch the rows because we need to # populate the user profiles information and you # cannot do that with django's ORM without using # profile for each user query # pulling 13,000 group members can be done in one # query using Django's ORM but then you need # 13,000 individual queries :( cursor = connection.cursor() sql = "SELECT %s FROM user_groups_groupmembership gm \ INNER JOIN auth_user au ON (au.id = gm.member_id) \ LEFT OUTER JOIN profiles_profile pp \ on (pp.user_id = gm.member_id) WHERE group_id = %%s;" sql = sql % group_lookups cursor.execute(sql, [group.pk]) values_list = list(cursor.fetchall()) # index the group key mappings and insert them into the sheet. for key in group_mappings.keys(): if not key in col_index: col = len(col_index.keys()) col_index[key] = col sheet.write(0, col, key, style=default_style) if values_list: # Write the data enumerated to the excel sheet for row, row_data in enumerate(values_list): for col, val in enumerate(row_data): if not row in row_index: # assign the row if it is not yet available row_index[row] = row + 1 # styles the date/time fields if isinstance(val, datetime): style = datetime_style elif isinstance(val, date): style = date_style else: style = default_style sheet.write(row + 1, col, val, style=style) #------------- # Subscribers #------------- entries = FieldEntry.objects.filter( entry__subscriptions__group=group).distinct() for entry in entries: val = entry.value field = entry.field.label.lower().replace(" ", "_") if "subscriber %s" % str(entry.entry.pk) in row_index: # get the subscriber's row number row = row_index["subscriber %s" % str(entry.entry.pk)] else: # assign the row if it is not yet available row = len(row_index.keys()) + 1 row_index["subscriber %s" % str(entry.entry.pk)] = row if field in col_index: # get the entry's col number col = col_index[field] else: # assign the col if it is not yet available # and label the new column col = len(col_index.keys()) col_index[field] = col sheet.write(0, col, field, style=default_style) # styles the date/time fields if isinstance(val, datetime): style = datetime_style elif isinstance(val, date): style = date_style else: style = default_style sheet.write(row, col, val, style=style) response = HttpResponse(content_type='application/vnd.ms-excel') response[ 'Content-Disposition'] = 'attachment; filename=group_%s_all_export.xls' % group.pk book.save(response) return response
class ObservationRows: """Store index file information. The ObserservationRows class defines a structure to get specific information about the spectra out of the index file which was produced by the sdfits filler program. This is essientially a table of the raw SDFITS file rows, organized with a lookup key of scan/feed/window/polarization. When rows are added to this object (addRow), the FITS extension, row of the FITS table and scan type are stored. A list of rows for each scan/feed/window/polarization can be retrieved with the 'get' method. """ def __init__(self): self.rows = OrderedDict() self.Key = namedtuple('key', 'scan, feed, window, polarization') def __repr__(self): return ('Scans: {0}\nFeeds: {1}\nWindows: {2}\nPols: {3}'.format(self.scans(), self.feeds(), self.windows(), self.pols())) def addRow(self, scan, feed, window, polarization, fitsExtension, rowOfFitsFile, obsid, procname, procscan, nchans): """Add rows to the ObservationRows object. When rows are added to this object (addRow), the FITS extension, row of the FITS table and scan type are stored. """ key = self.Key(scan, feed, window, polarization) if key in self.rows: self.rows[key]['ROW'].append(rowOfFitsFile) else: self.rows[key] = {'EXTENSION': fitsExtension, 'ROW': [rowOfFitsFile], 'OBSID': obsid, 'PROCNAME': procname, 'PROCSCAN': procscan, 'NCHANS': nchans} def get(self, scan, feed, window, polarization): """Retreive a list of rows for scan/feed/win/pol. """ try: key = (scan, feed, window, polarization) return self.rows[key] except(KeyError): raise def scans(self): """Return a list of scans in the observation. """ return sorted(list(set([xx.scan for xx in self.rows.keys()]))) def feeds(self): """Return a list of feeds in the observation. """ return list(set([xx.feed for xx in self.rows.keys()])) def windows(self): """Return a list of windows in the observation. """ return list(set([xx.window for xx in self.rows.keys()])) def pols(self): """Return a list of polarizations in the observation. """ return list(set([xx.polarization for xx in self.rows.keys()]))
class Application(object): """Poor WSGI application which is called by WSGI server. Working of is describe in PEP 0333. This object store route dispatch table, and have methods for it's using and of course __call__ method for use as WSGI application. """ __instances = [] def __init__(self, name="__main__"): """Application class is per name singleton. That means, there could be exist only one instance with same name. """ if Application.__instances.count(name): raise RuntimeError('Application with name %s exist yet.' % name) Application.__instances.append(name) # Application name self.__name = name # list of pre and post process handlers self.__pre = [] self.__post = [] # dhandlers table for default handers on methods {METHOD_GET: handler} self.__dhandlers = {} # handlers table of simple paths: {'/path': {METHOD_GET: handler}} self.__handlers = {} self.__filters = { ':int': (r'-?\d+', int), ':float': (r'-?\d+(\.\d+)?', float), ':word': (r'\w+', uni), ':hex': (r'[0-9a-fA-F]+', str), ':re:': (None, uni), 'none': (r'[^/]+', uni) } # handlers of regex paths: {r'/user/([a-z]?)': {METHOD_GET: handler}} self.__rhandlers = OrderedDict() # http state handlers: {HTTP_NOT_FOUND: {METHOD_GET: my_404_handler}} self.__shandlers = {} # -- Application variable self.__config = { 'auto_args': True, 'auto_form': True, 'auto_json': True, 'keep_blank_values': 0, 'strict_parsing': 0, 'json_content_types': [ 'application/json', 'application/javascript', 'application/merge-patch+json'], 'form_content_types': [ 'application/x-www-form-urlencoded', 'multipart/form-data' ], 'auto_cookies': True, 'debug': 'Off', 'document_root': '', 'document_index': 'Off', 'secret_key': '%s%s%s%s' % (__version__, version, getcwd(), ''.join(str(x) for x in uname())) } try: self.__log_level = levels[environ.get('poor_LogLevel', 'warn').lower()] except: self.__log_level = LOG_WARNING self.log_error('Bad poor_LogLevel, default is warn.', LOG_WARNING) # endtry # enddef def __regex(self, match): groups = match.groups() _filter = str(groups[1]).lower() if _filter in self.__filters: regex = self.__filters[_filter][0] elif _filter[:4] == ':re:': # :re: filter have user defined regex regex = _filter[4:] else: try: regex = self.__filters[_filter][0] except KeyError: raise RuntimeError("Undefined route group filter '%s'" % _filter) return "(?P<%s>%s)" % (groups[0], regex) # enddef def __convertor(self, _filter): _filter = str(_filter).lower() _filter = ':re:' if _filter[:4] == ':re:' else _filter try: return self.__filters[_filter][1] except KeyError: raise RuntimeError("Undefined route group filter '%s'" % _filter) @property def name(self): """Return application name.""" return self.__name @property def filters(self): """Copy of filter table. Filter table contains regular expressions and convert functions, see Application.set_filter and Application.route. Default filters are: :int - match number and convert it to int :float - match number and convert it to float :word - match one unicoee word :hex - match hexadecimal value and convert it to str :re: - match user defined regular expression none - match any string withount '/' character For more details see {/debug-info} page of your application, where you see all filters with regular expression definition. """ return self.__filters.copy() @property def pre(self): """Tuple of table with pre-process handlers. See Application.pre_process. """ return tuple(self.__pre) @property def post(self): """Tuple of table with post-process handlers. See Application.post_process. """ return tuple(self.__post) @property def dhandlers(self): """Copy of table with default handlers. See Application.set_default """ return self.__dhandlers.copy() @property def handlers(self): """Copy of table with static handlers. See Application.route. """ return self.__handlers.copy() @property def rhandlers(self): """Copy of table with regular expression handlers. See Application.route and Application.rroute. """ return self.__rhandlers.copy() @property def shandlers(self): """Copy of table with http state aka error handlers. See Application.http_state """ return self.__shandlers.copy() @property def auto_args(self): """Automatic parsing request arguments from uri. If it is True (default), Request object do automatic parsing request uri to its args variable. """ return self.__config['auto_args'] @auto_args.setter def auto_args(self, value): self.__config['auto_args'] = bool(value) @property def auto_form(self): """Automatic parsing arguments from request body. If it is True (default) and method is POST, PUT or PATCH, and request content type is one of form_content_types, Request object do automatic parsing request body to its form variable. """ return self.__config['auto_form'] @auto_form.setter def auto_form(self, value): self.__config['auto_form'] = bool(value) @property def auto_json(self): """Automatic parsing JSON from request body. If it is True (default), method is POST, PUT or PATCH and request content type is one of json_content_types, Request object do automatic parsing request body to json variable. """ return self.__config['auto_json'] @auto_json.setter def auto_json(self, value): self.__config['auto_json'] = bool(value) @property def auto_cookies(self): """Automatic parsing cookies from request headers. If it is True (default) and Cookie request header was set, SimpleCookie object was paresed to Request property cookies. """ return self.__config['auto_cookies'] @auto_cookies.setter def auto_cookies(self, value): self.__config['auto_cookies'] = bool(value) @property def debug(self): """Application debug as another way how to set poor_Debug. This setting will be rewrite by poor_Debug environment variable. """ return self.__config['debug'] == 'On' @debug.setter def debug(self, value): self.__config['debug'] = 'On' if bool(value) else 'Off' @property def document_root(self): """Application document_root as another way how to set poor_DocumentRoot. This setting will be rewrite by poor_DocumentRoot environ variable. """ return self.__config['document_root'] @document_root.setter def document_root(self, value): self.__config['document_root'] = value @property def document_index(self): """Application document_root as another way how to set poor_DocumentRoot. This setting will be rewrite by poor_DocumentRoot environ variable. """ return self.__config['document_index'] == 'On' @document_index.setter def document_index(self, value): self.__config['document_index'] = 'On' if bool(value) else 'Off' @property def secret_key(self): """Application secret_key could be replace by poor_SecretKey in request. Secret key is used by PoorSession class. It is generate from some server variables, and the best way is set to your own long key.""" return self.__config['secret_key'] @secret_key.setter def secret_key(self, value): self.__config['secret_key'] = value @property def keep_blank_values(self): """Keep blank values in request arguments. If it is 1 (0 is default), automatic parsing request uri or body keep blank values as empty string. """ return self.__config['keep_blank_values'] @keep_blank_values.setter def keep_blank_values(self, value): self.__config['keep_blank_values'] = int(value) @property def strict_parsing(self): """Strict parse request arguments. If it is 1 (0 is default), automatic parsing request uri or body raise with exception on parsing error. """ return self.__config['strict_parsing'] @strict_parsing.setter def strict_parsing(self, value): self.__config['strict_parsing'] = int(value) @property def json_content_types(self): """Copy of json content type list. Containt list of strings as json content types, which is use for testing, when automatics Json object is create from request body. """ return self.__config['json_content_types'] @property def form_content_types(self): """Copy of form content type list. Containt list of strings as form content types, which is use for testing, when automatics Form object is create from request body. """ return self.__config['form_content_types'] def set_filter(self, name, regex, convertor=uni): """Create new filter or overwrite builtins. Arguments: name - Name of filter which is used in route or set_route method. regex - regular expression which used for filter convertor - convertor function or class, which gets unicode in input. Default is uni function, which is wrapper to unicode string. app.set_filter('uint', r'\d+', int) """ name = ':'+name if name[0] != ':' else name self.__filters[name] = (regex, convertor) def pre_process(self): """Append pre process hendler. This is decorator for function to call before each request. @app.pre_process() def before_each_request(req): ... """ def wrapper(fn): self.__pre.append(fn) return fn return wrapper # enddef def add_pre_process(self, fn): """Append pre proccess handler. Method adds function to list functions which is call before each request. app.add_pre_process(before_each_request) """ self.__pre.append(fn) # enddef def post_process(self): """Append post process handler. This decorator append function to be called after each request, if you want to use it redefined all outputs. @app.pre_process() def after_each_request(req): ... """ def wrapper(fn): self.__post.append(fn) return fn return wrapper # enddef def add_post_process(self, fn): """Append post process handler. Method for direct append function to list functions which are called after each request. app.add_post_process(after_each_request) """ self.__post.append(fn) # enddef def default(self, method=METHOD_HEAD | METHOD_GET): """Set default handler. This is decorator for default handler for http method (called before error_not_found). @app.default(METHOD_GET_POST) def default_get_post(req): # this function will be called if no uri match in internal # uri table with method. It's similar like not_found error, # but without error ... """ def wrapper(fn): self.set_default(fn, method) return wrapper # enddef def set_default(self, fn, method=METHOD_HEAD | METHOD_GET): """Set default handler. Set fn default handler for http method called befor error_not_found. app.set_default(default_get_post, METHOD_GET_POST) """ for m in methods.values(): if method & m: self.__dhandlers[m] = fn # enddef def pop_default(self, method): """Pop default handler for method.""" return self.__dhandlers(method) def route(self, uri, method=METHOD_HEAD | METHOD_GET): """Wrap function to be handler for uri and specified method. You can define uri as static path or as groups which are hand to handler as next parameters. # static uri @app.route('/user/post', method=METHOD_POST) def user_create(req): ... # group regular expression @app.route('/user/<name>') def user_detail(req, name): ... # group regular expression with filter @app.route('/<surname:word>/<age:int>') def surnames_by_age(req, surname, age): ... # group with own regular expression filter @app.route('/<car:re:\w+>/<color:re:#[\da-fA-F]+>') def car(req, car, color): ... If you can use some name of group which is python keyword, like class, you can use **kwargs syntax: @app.route('/<class>/<len:int>') def classes(req, **kwargs): return "'%s' class is %d lenght." % \ (kwargs['class'], kwargs['len']) Be sure with ordering of call this decorator or set_route function with groups regular expression. Regular expression routes are check with the same ordering, as you create internal table of them. First match stops any other searching. In fact, if groups are detect, they will be transfer to normal regular expression, and will be add to second internal table. """ def wrapper(fn): self.set_route(uri, fn, method) return fn return wrapper # enddef def set_route(self, uri, fn, method=METHOD_HEAD | METHOD_GET): """Set handler for uri and method. Another way to add fn as handler for uri. See Application.route documentation for details. app.set_route('/use/post', user_create, METHOD_POST) """ uri = uni(uri) if re_filter.search(uri): r_uri = re_filter.sub(self.__regex, uri) + '$' convertors = tuple((g[0], self.__convertor(g[1])) for g in (m.groups() for m in re_filter.finditer(uri))) self.set_rroute(r_uri, fn, method, convertors) else: if uri not in self.__handlers: self.__handlers[uri] = {} for m in methods.values(): if method & m: self.__handlers[uri][m] = fn # enddef def pop_route(self, uri, method): """Pop handler for uri and method from handers table. Method must be define unique, so METHOD_GET_POST could not be use. If you want to remove handler for both methods, you must call pop route for each method state. """ uri = uni(uri) if re_filter.search(uri): r_uri = re_filter.sub(self.__regex, uri) + '$' return self.pop_rroute(r_uri, method) else: handlers = self.__handlers.get(uri, {}) rv = handlers.pop(method) if not handlers: # is empty self.__handlers.pop(uri, None) return rv def is_route(self, uri): """Check if uri have any registered record.""" uri = uni(uri) if re_filter.search(uri): r_uri = re_filter.sub(self.__regex, uri) + '$' return self.is_rroute(r_uri) return uri in self.__handlers def rroute(self, ruri, method=METHOD_HEAD | METHOD_GET): """Wrap function to be handler for uri defined by regular expression. Both of function, rroute and set_rroute store routes to special internal table, which is another to table of static routes. @app.rroute(r'/user/\w+') # simple regular expression def any_user(req): ... @app.rroute(r'/user/(?P<user>\w+)') # regular expression with def user_detail(req, user): # groups ... Be sure with ordering of call this decorator or set_rroute function. Regular expression routes are check with the same ordering, as you create internal table of them. First match stops any other searching. """ def wrapper(fn): self.set_rroute(ruri, fn, method) return fn return wrapper # enddef def set_rroute(self, r_uri, fn, method=METHOD_HEAD | METHOD_GET, convertors=()): """Set hanlder for uri defined by regular expression. Another way to add fn as handler for uri defined by regular expression. See Application.rroute documentation for details. app.set_rroute('/use/\w+/post', user_create, METHOD_POST) This method is internally use, when groups are found in static route, adding by route or set_route method. """ r_uri = re.compile(r_uri, re.U) if r_uri not in self.__rhandlers: self.__rhandlers[r_uri] = {} for m in methods.values(): if method & m: self.__rhandlers[r_uri][m] = (fn, convertors) # enddef def pop_rroute(self, r_uri, method): """Pop handler and convertors for uri and method from handlers table. For mor details see Application.pop_route. """ r_uri = re.compile(r_uri, re.U) handlers = self.__rhandlers.get(r_uri, {}) rv = handlers.pop(method) if not handlers: # is empty self.__rhandlers.pop(r_uri, None) return rv def is_rroute(self, r_uri): """Check if regular expression uri have any registered record.""" r_uri = re.compile(r_uri, re.U) return r_uri in self.__rhandlers def http_state(self, code, method=METHOD_HEAD | METHOD_GET | METHOD_POST): """Wrap function to handle http status codes like http errors.""" def wrapper(fn): self.set_http_state(code, fn, method) return wrapper # enddef def set_http_state(self, code, fn, method=METHOD_HEAD | METHOD_GET | METHOD_POST): """Set fn as handler for http state code and method.""" if code not in self.__shandlers: self.__shandlers[code] = {} for m in methods.values(): if method & m: self.__shandlers[code][m] = fn # enddef def pop_http_state(self, code, method): """Pop handerl for http state and method. As Application.pop_route, for pop multimethod handler, you must call pop_http_state for each method. """ handlers = self.__shandlers(code, {}) return handlers.pop(method) def error_from_table(self, req, code): """Internal method, which is called if error was accured. If status code is in Application.shandlers (fill with http_state function), call this handler. """ if code in self.__shandlers \ and req.method_number in self.__shandlers[code]: try: handler = self.__shandlers[code][req.method_number] if 'uri_handler' not in req.__dict__: req.uri_rule = '_%d_error_handler_' % code req.uri_handler = handler self.handler_from_pre(req) # call pre handlers now handler(req) except: internal_server_error(req) elif code in default_shandlers: handler = default_shandlers[code][METHOD_GET] handler(req) else: not_implemented(req, code) # enddef def handler_from_default(self, req): """Internal method, which is called if no handler is found.""" if req.method_number in self.__dhandlers: req.uri_rule = '_default_handler_' req.uri_handler = self.__dhandlers[req.method_number] self.handler_from_pre(req) # call pre handlers now retval = self.__dhandlers[req.method_number](req) if retval != DECLINED: raise SERVER_RETURN(retval) # enddef def handler_from_pre(self, req): """Internal method, which run all pre (pre_proccess) handlers. This method was call before end-point route handler. """ for fn in self.__pre: fn(req) def handler_from_table(self, req): """Call right handler from handlers table (fill with route function). If no handler is fined, try to find directory or file if Document Root, resp. Document Index is set. Then try to call default handler for right method or call handler for status code 404 - not found. """ # static routes if req.uri in self.__handlers: if req.method_number in self.__handlers[req.uri]: handler = self.__handlers[req.uri][req.method_number] req.uri_rule = req.uri # nice variable for pre handlers req.uri_handler = handler self.handler_from_pre(req) # call pre handlers now retval = handler(req) # call right handler now # return text is allowed if isinstance(retval, str) \ or (_unicode_exist and isinstance(retval, unicode)): req.write(retval, 1) # write data and flush retval = DONE if retval != DECLINED: raise SERVER_RETURN(retval or DONE) # could be state.DONE else: raise SERVER_RETURN(HTTP_METHOD_NOT_ALLOWED) # endif # endif # regular expression for ruri in self.__rhandlers.keys(): match = ruri.match(req.uri) if match and req.method_number in self.__rhandlers[ruri]: handler, convertors = self.__rhandlers[ruri][req.method_number] req.uri_rule = ruri.pattern # nice variable for pre handlers req.uri_handler = handler self.handler_from_pre(req) # call pre handlers now if len(convertors): # create OrderedDict from match insead of dict for # convertors applying req.groups = OrderedDict( (g, c(v))for ((g, c), v) in zip(convertors, match.groups())) retval = handler(req, *req.groups.values()) else: req.groups = match.groupdict() retval = handler(req, *match.groups()) # return text is allowed if isinstance(retval, str) \ or (_unicode_exist and isinstance(retval, unicode)): req.write(retval, 1) # write data and flush retval = DONE if retval != DECLINED: raise SERVER_RETURN(retval or DONE) # could be state.DONE # endif - no METHOD_NOT_ALLOWED here # endfor # try file or index if req.document_root(): rfile = "%s%s" % (uni(req.document_root()), path.normpath("%s" % uni(req.uri))) if not path.exists(rfile): if req.debug and req.uri == '/debug-info': # work if debug req.uri_rule = '_debug_info_' req.uri_handler = debug_info self.handler_from_pre(req) # call pre handlers now raise SERVER_RETURN(debug_info(req, self)) self.handler_from_default(req) # try default raise SERVER_RETURN(HTTP_NOT_FOUND) # not found # return file if path.isfile(rfile) and access(rfile, R_OK): req.uri_rule = '_send_file_' req.uri_handler = send_file self.handler_from_pre(req) # call pre handlers now req.log_error("Return file: %s" % req.uri, LOG_INFO) raise SERVER_RETURN(send_file(req, rfile)) # return directory index if req.document_index and path.isdir(rfile) \ and access(rfile, R_OK): req.log_error("Return directory: %s" % req.uri, LOG_INFO) req.uri_rule = '_directory_index_' req.uri_handler = directory_index self.handler_from_pre(req) # call pre handlers now raise SERVER_RETURN(directory_index(req, rfile)) raise SERVER_RETURN(HTTP_FORBIDDEN) # endif if req.debug and req.uri == '/debug-info': req.uri_rule = '_debug_info_' req.uri_handler = debug_info self.handler_from_pre(req) # call pre handlers now raise SERVER_RETURN(debug_info(req, self)) self.handler_from_default(req) req.log_error("404 Not Found: %s" % req.uri, LOG_ERR) raise SERVER_RETURN(HTTP_NOT_FOUND) # enddef def __request__(self, environ, start_response): """Create Request instance and return wsgi response. This method create Request object, call handlers from Application.__pre (Application.handler_from_pre), uri handler (handler_from_table), default handler (Application.handler_from_default) or error handler (Application.error_from_table), and handlers from Application.__post. """ req = Request(environ, start_response, self.__config) try: self.handler_from_table(req) except SERVER_RETURN as e: code = e.args[0] if code in (OK, HTTP_OK, DONE): pass # XXX: elif code in (HTTP_MOVED_PERMANENTLY, # HTTP_MOVED_TEMPORARILY): else: req.status = code self.error_from_table(req, code) except (BrokenClientConnection, SystemExit) as e: req.log_error(str(e), LOG_ERR) req.log_error(' *** You shoud ignore next error ***', LOG_ERR) return () except: self.error_from_table(req, 500) # endtry try: # call post_process handler for fn in self.__post: fn(req) except: self.error_from_table(req, 500) # endtry return req.__end_of_request__() # private call of request # enddef def __call__(self, environ, start_response): """Callable define for Application instance. This method run __request__ method. """ if self.__name == '__poorwsgi__': stderr.write("[W] Using deprecated instance of Application.\n") stderr.write(" Please, create your own instance\n") stderr.flush() return self.__request__(environ, start_response) def __profile_request__(self, environ, start_response): """Profiler version of __request__. This method is used if set_profile is used.""" def wrapper(rv): rv.append(self.__original_request__(environ, start_response)) rv = [] uri_dump = (self._dump + environ.get('PATH_INFO').replace('/', '_') + '.profile') self.log_error('Generate %s' % uri_dump, LOG_INFO) self._runctx('wrapper(rv)', globals(), locals(), filename=uri_dump) return rv[0] # enddef def __repr__(self): return '%s - callable Application class instance' % self.__name def set_profile(self, runctx, dump): """Set profiler for __call__ function. Arguments: runctx - function from profiler module dump - path and prefix for .profile files Typical usage: import cProfile cProfile.runctx('from simple import *', globals(), locals(), filename="log/init.profile") app.set_profile(cProfile.runctx, 'log/req') """ self._runctx = runctx self._dump = dump self.__original_request__ = self.__request__ self.__request__ = self.__profile_request__ # enddef def del_profile(self): """Remove profiler from application.""" self.__request__ = self.__original_request__ def get_options(self): """Returns dictionary with application variables from system environment. Application variables start with {app_} prefix, but in returned dictionary is set without this prefix. #!ini poor_LogLevel = warn # Poor WSGI variable app_db_server = localhost # application variable db_server app_templates = app/templ # application variable templates This method works like Request.get_options, but work with os.environ, so it works only with wsgi servers, which set not only request environ, but os.environ too. Apaches mod_wsgi don't do that, uWsgi and PoorHTTP do that. """ options = {} for key, val in environ.items(): key = key.strip() if key[:4].lower() == 'app_': options[key[4:].lower()] = val.strip() return options # enddef def log_error(self, message, level=LOG_ERR): """Logging method with the same functionality like in Request object. But as get_options read configuration from os.environ which could not work in same wsgi servers like Apaches mod_wsgi. This method write to stderr so messages, could not be found in servers error log! """ if self.__log_level[0] >= level[0]: if _unicode_exist and isinstance(message, unicode): message = message.encode('utf-8') try: stderr.write("<%s> [%s] %s\n" % (level[1], self.__name, message)) except UnicodeEncodeError: if _unicode_exist: message = message.decode('utf-8').encode( 'ascii', 'backslashreplace') else: message = message.encode( 'ascii', 'backslashreplace').decode('ascii') stderr.write("<%s> [%s] %s\n" % (level[1], self.__name, message)) stderr.flush() # enddef def log_info(self, message): """Logging method, which create message as LOG_INFO level.""" self.log_error(message, LOG_INFO) def log_debug(self, message): """Logging method, which create message as LOG_DEBUG level.""" self.log_error(message, LOG_DEBUG) def log_warning(self, message): """Logging method, which create message as LOG_WARNING level.""" self.log_error(message, LOG_WARNING)
def include_schema(): for schema, value in globals.edg_schema.items()[:]: schema_data = OrderedDict() schema_files = [ globals.edg_conf['conf']['metadata_info'][schema]['schema_path'] ] # Continue to next schema if this schema has no other include files if not value.get('include_schema'): continue # Check for self inclusion if list(set(schema_files) & set(value.get('include_schema'))): print "Error: Cannot self include schema in %s : schema_include" % schema return False # Check for duplicate inclusion schema_files = value.get('include_schema') if len(list(set(schema_files))) != len(schema_files): print "Error: File duplication in %s : schema_include list " % schema return False for schema_file in schema_files: # Check whether file exist or not try: with open(schema_file) as f: pass except IOError as e: print 'Error : Included schema file: "%s", do not exist' % schema_file return False # Parse JSON data in schema file schema_data_json_file = open(schema_file) str_data = bytearray(schema_data_json_file.read()).decode("utf-8") schema_data_json = re.sub(r'<Application>', globals.edg_schema[schema]['name'], str_data) try: schema_data[schema_file] = json.loads( schema_data_json, object_pairs_hook=OrderedDict) schema_data_json_file.close() except ValueError as e: print "Error parsing JSON in included schema file : ", schema_file print e return False print "Include Schema File " + schema_file + " Parsed" # Check for more than one level of include if schema_data[schema_file].get('include_schema'): print "Error: more than one level of schema nesting in", schema, ": include_schema :", schema_file return False # Schema structure validation and expansion if 'name' not in schema_data[schema_file]: print schema_file, "schema does not contain mandatory 'name' tag" return False if 'fields' not in schema_data[schema_file]: print schema_file, "schema does not contain mandatory 'fields' tag" return False if not validate_fields(schema_file, schema_data[schema_file]['fields']): return False # Tweak for maintaining order of included fields ordered_fields = OrderedDict() for schema_file in schema_files: key_intersect = list( set(ordered_fields.keys()) & set(schema_data[schema_file]['fields'].keys())) if (key_intersect): print "Following duplicate keys found in nested schema file: ", schema_file, "in", schema print key_intersect return False ordered_fields.update(schema_data[schema_file]['fields']) # Now add fields of the Top schema key_intersect = list( set(ordered_fields.keys()) & set(value['fields'].keys())) if (key_intersect): print "Following duplicate keys found in top level schema file of", schema print key_intersect return False ordered_fields.update(value['fields']) globals.edg_schema[schema]['fields'] = ordered_fields #value['fields'].update(schema_data[schema_file]['fields']) return True
def callservice(conn, schemaname, servicename, querystring): try: t1 = datetime.datetime.now() # log the request - not enabled at the moment because of permission issues # logging.basicConfig(filename='/srv/www/dopa-services/cgi-bin/logs/REST_Services_Log.log', level=logging.INFO, format='%(asctime)s %(levelname)s %(message)s',) # logging.info("REST REQUEST: " + web.ctx.home + web.ctx.path + web.ctx.query) # PARSE THE STANDARD OPTIONAL INPUT PARAMETERS # get the input parameters params = getQueryStringParams(querystring) # the unquoting is to handle encoded parameters (like from extJS - 1,2,3 as a parameter becomes 1%2C2%2C3 # get the standard optional parameters from the url format = params.setdefault('format', 'json') fields = params.setdefault('fields', '').split(",") # fields will be passed as an array, e.g. iucn_species_id,wdpa_id includemetadata = params.setdefault('includemetadata', 'true') metadataName = params.setdefault('metadataname', 'metadata') rootName = params.setdefault('rootname', 'records') parseparams = params.setdefault('parseparams', 'true') sortField = params.setdefault('sortfield', '') decimalPlaceLimit = params.setdefault('dplimit', '2') isHadoop = ('true' if (servicename[-2:] == '_h') else 'false') # if the service is a call to a hadoop method then set a flag # remove the standard optional parameters from the dictionary so we are left with just the parameters required for the function del (params['format'], params['fields'], params['includemetadata'], params['parseparams'], params['metadataname'], params['rootname'], params['sortfield'], params['dplimit']) if 'callback' in params.keys(): del(params['callback']) # check if the service name is valid if not (isValidServiceName(servicename)): raise RESTServicesError('Invalid servicename') # authorise with ecas if needed # if requiresAuthentication(servicename): # if isAuthenticated() == False: # web.ctx.status = '401 Unauthorized' # web.header("Content-Type", "text/html") # return "<html><head></html><body><h1>Authentication required</h1></body></html>" # if it is a Hadoop query then we need to run if first before we actually use the values to get the data from postgresql if (isHadoop.lower() == 'true'): hadoopData = runHadoopQuery(conn, servicename, params) if hadoopData == '[]': hadoopData = '[-1]' servicename = "_" + servicename # now call the postgresql function params.clear() params['species_ids'] = str(hadoopData)[1:-1]; # PARSE AND CONVERT THE DATA TYPES OF THE OTHER INPUT PARAMETERS # get all the parameters for the function from postgresql conn.cur.callproc('utils.dopa_rest_getparams', [servicename]) # get the function parameters as a string and split this into a list, e.g. wdpa_id integer, presence_id integer[] --> ['wdpa_id integer', ' presence_id integer[]'] functionparams = conn.cur.fetchone() hasparams = True if functionparams[0] else False if hasparams: functionparams = functionparams[0].split(',') # get the names of the function parameters which are array types arrayparamnames = [p.strip().split(" ")[0] for p in functionparams if '[' in p] # convert the array values into lists for key in params.keys(): if key in arrayparamnames: strlist = params[key].split(",") isnum = isNumeric(strlist[0]) if isnum: params[key] = [int(s) for s in strlist] else: params[key] = strlist # get the full list of function parameter names functionparamnames = [p.strip().split(" ")[0] for p in functionparams] # check that all parameters are correct invalidparamnames = [n for n in params.keys() if n not in functionparamnames] if invalidparamnames and parseparams == 'true': raise RESTServicesError('Invalid parameters: ' + ",".join(invalidparamnames)) # put the input parameters in the right order params = OrderedDict([(n, params[n]) for n in functionparamnames if n in params.keys()]) # GET THE SORT CLAUSE if sortField != "": sortClause = ' ORDER BY "' + sortField + '"' else: sortClause = "" # GET THE FIELDS CLAUSE if fields != ['']: fieldsClause = ",".join(fields) else: fieldsClause = "*" # RUN THE QUERY if hasparams : sql = "SELECT " + fieldsClause + " from " + schemaname + "." + servicename + "(" + ",".join([n + ":=%(" + n + ")s" for n in params]) + ")" + sortClause + ";" # run the query using named parameters conn.cur.execute(sql, params) else: sql = "SELECT * from " + schemaname + "." + servicename + "()" + sortClause + ";" conn.cur.execute(sql) rows = conn.cur.fetchall() # PROCESS THE ROWS AND WRITE THEM BACK TO THE CLIENT conn.cur.close() t2 = datetime.datetime.now() # METADATA SECTION OF RESPONSE allfields = [d.name for d in conn.cur.description] if (fields == ['']): fields = allfields fieldcount = len(fields) fieldsdict = [dict([("name", d.name), ("type", gettypefromtypecode(d.type_code))]) for d in conn.cur.description if (d.name in fields)] if len(fieldsdict) != len(fields): raise RESTServicesError('Invalid output fields') metadatadict = OrderedDict([("duration", str(t2 - t1)), ("error", None), ("idProperty", conn.cur.description[0].name), ("successProperty", 'success'), ("totalProperty", 'recordCount'), ("success", True), ("recordCount", int(conn.cur.rowcount)), ("root", rootName), ("fields", fieldsdict)]) # RECORDS SECTION OF THE RESPONSE # parse the float values and set the correct number of decimal places according to the decimalPlaceLimit variable - dont include lat/long fields as these must have more decimal places floatColumns = [i for i, d in enumerate(fieldsdict) if d['type'] == 'float' and d['name'] not in ['lat', 'lng']] if len(floatColumns) > 0: for floatColumn in floatColumns: for row in rows: if type(row[floatColumn]) != NoneType: # check that the data is not null row[floatColumn] = round(row[floatColumn], int(decimalPlaceLimit)) # return the data colsRequired = [allfields.index(field) for field in fields] if format in ['json', 'array']: if format == 'json': recordsdict = [OrderedDict([(allfields[col], row[col]) for col in range(fieldcount) if (col in colsRequired)]) for row in rows] else: recordsdict = [[row[col] for col in range(fieldcount) if (col in colsRequired)] for row in rows] json.encoder.FLOAT_REPR = lambda f: ("%.14g" % f) # this specifies how many decimal places are returned in the json with float values - currently set to 14 - good enough for returning lat/long coordinates if (includemetadata.lower() == 'true'): responsejson = json.dumps(dict([(metadataName, metadatadict), (rootName, recordsdict)]), indent=1, cls=CustomJSONEncoder) else: responsejson = json.dumps(dict([(rootName, recordsdict)]), indent=1, cls=CustomJSONEncoder) return getJsonResponse(responsejson) elif format in ['xml', 'xmlverbose']: root = etree.Element('results') recordsnode = etree.Element(rootName) recordsdicts = [OrderedDict([(allfields[col], str(row[col]).decode('utf-8')) for col in range(fieldcount) if (col in colsRequired) and str(row[col]) != 'None']) for row in rows ] # if format == 'xml': recordselements = [etree.Element('record', element) for element in recordsdicts] for recordelement in recordselements: recordsnode.append(recordelement) else: for recordelement in recordsdicts: record = etree.Element('record') for (n, v) in recordelement.items(): el = etree.Element(n) el.text = v record.append(el) recordsnode.append(record) root.append(recordsnode) web.header("Content-Type", "text/xml") # web.header("Content-Type", "application/Excel") # doesnt work! # web.header("Content-Disposition", "attachment; filename=test.xml") return etree.tostring(root) elif format == 'sms': _twilio = twilio() client = TwilioRestClient(_twilio.twilio_account_sid, _twilio.twilio_auth_token) # use the twilio api account bodystr = 'Hi Andrew - test species data: ' bodystr = bodystr + str(rows[0])[:160 - len(bodystr)] message = client.sms.messages.create(to="+393668084920", from_="+19712647662", body=bodystr) # my mobile return message elif format == 'email': _amazon_ses = amazon_ses() amazonSes = AmazonSES(_amazon_ses.AccessKeyID, _amazon_ses.SecretAccessKey) # use the amazon simple email service api account message = EmailMessage() message.subject = 'JRC REST Services Information Request' message.bodyHtml = getResultsAsHTML(rows, fieldcount, colsRequired, metadatadict) result = amazonSes.sendEmail('*****@*****.**', '*****@*****.**', message) # to me return result elif format == 'html': htmlData = getResultsAsHTML(rows, fieldcount, colsRequired, metadatadict) web.header("Content-Type", "text/html") return "<html><head></head><body>" + htmlData + "</body></html>" elif format == 'csv': data = [[row[col] for col in range(fieldcount) if (col in colsRequired)] for row in rows] colnames = ",".join([f["name"] for f in metadatadict["fields"]]) + "\n" output = colnames + "\n".join([p for p in [",".join(h) for h in [[getStringValue(col) for col in row] for row in data]]]) filename = "dataDownload.csv" #hardcoded for now f = open(r'/tmp/' + filename, 'wb') f.write(output) f.close() web.header("Content-Type", "text/plain") web.header("Content-Disposition", "attachment; filename=%s" % filename) return output elif format == 'pdf': config = pdfkit.configuration(wkhtmltopdf='/usr/local/bin/wkhtmltopdf') web.header("Content-Type", "application/pdf") htmlData = getResultsAsHTML(rows, fieldcount, colsRequired, metadatadict) return pdfkit.from_string(htmlData.decode('utf8'), False, configuration=config, options={'quiet': '', 'encoding': "UTF-8"}) else: raise RESTServicesError('Invalid response format: ' + format) except (RESTServicesError, DataError, ProgrammingError, exceptions.TypeError, IndexError, IntegrityError, AmazonError, OperationalError) as e: # web.webapi.internalerror() #returns a internal server error 500 t2 = datetime.datetime.now() msg = "There was an error sending the email. Make sure that the email address has been verified in Amazon Simple Email Services" if type(e) == AmazonError else e.message logging.error(msg + "\n") if type(e) == ProgrammingError: if ("column" in e.message) & ("does not exist" in e.message) & (sortField != ""): msg = "Invalid sortfield parameter: " + sortField return returnError(metadataName, rootName, t2 - t1, msg)
class Controller(object): def __init__(self, args={}): self.args = args # arguments from command line self.config = {} # config to be processed from .dexy files self.docs = [] self.timing = [] self.virtual_docs = [] self.batch_start_time = None self.batch_finish_time = None self.batch_elapsed_time = None # Set up logging if args.has_key("logsdir") and args.has_key("logfile"): self.log = dexy.utils.get_log("dexy.controller", args['logsdir'], args['logfile'], args['loglevel']) else: self.log = Constants.NULL_LOGGER # Set up db if args.has_key('dbclass') and args.has_key("logsdir") and args.has_key("dbfile"): self.db = dexy.utils.get_db(self.args['dbclass'], logsdir=self.args['logsdir'], dbfile=args['dbfile']) else: self.db = None # List of directories that reporters use, these will not be processed by dexy self.reports_dirs = dexy.introspect.reports_dirs(self.log) # list of artifact classes - if nothing else uses this then move # it into the if statement below and don't cache it self.artifact_classes = dexy.introspect.artifact_classes(self.log) if args.has_key('artifactclass'): if self.artifact_classes.has_key(args['artifactclass']): self.artifact_class = self.artifact_classes[args['artifactclass']] else: raise dexy.commands.UserFeedback("Artifact class name %s not found in %s" % (args['artifactclass'], ",".join(self.artifact_classes.keys()))) def run(self): """ This does all the work. """ self.batch_start_time = time.time() start = self.batch_start_time self.log.debug("populating Document class filter list") dexy.document.Document.filter_list = dexy.introspect.filters(self.log) self.timing.append(("populate-filter-list", time.time() - start)) start = time.time() self.log.debug("loading config...") self.load_config() self.log.debug("finished loading config.") self.timing.append(("load-config", time.time() - start)) start = time.time() self.log.debug("processing config, populating document list...") self.process_config() self.log.debug("finished processing config.") self.timing.append(("process-config", time.time() - start)) start = time.time() # set the list of documents which are virtual self.virtual_docs = [d for d in self.docs if d.virtual] try: if not self.args['dryrun']: [doc.setup() for doc in self.docs] self.docs = [doc.run() for doc in self.docs] except dexy.commands.UserFeedback as e: self.persist() raise e self.timing.append(("run-docs", time.time() - start)) self.batch_finish_time = time.time() self.batch_elapsed_time = self.batch_finish_time - self.batch_start_time self.log.debug("persisting batch info...") self.persist() self.log.debug("finished persisting.") self.log.debug("finished processing. elapsed time %s" % self.batch_elapsed_time) def persist(self): """ Persists the database. Saves some information about this batch in a JSON file (for use by reporters or for debugging). """ self.db.persist() dexy.utils.save_batch_info(self.batch_id, self.batch_info(), self.args['logsdir']) def batch_info(self): """ Dict of info to save """ return { "id" : self.batch_id, "config" : self.config, "args" : self.args, "docs" : dict((doc.key(), doc.document_info()) for doc in self.docs), "start_time" : self.batch_start_time, "finish_time" : self.batch_finish_time, "elapsed" : self.batch_elapsed_time, "timing" : self.timing } def config_for_directory(self, path): """ Determine the config applicable within a directory by looking in every parent directory (up as far as the dexy project root) for config files and combining them, such that subdirectories override parents. """ self.log.debug("Determining configuration applicable in %s" % path) global_args = {} config_dict = {} variables = {} config_file = self.args['config'] path_elements = path.split(os.sep) for i in range(0,len(path_elements)+1): config_path = os.path.join(*(path_elements[0:i] + [config_file])) config_files = glob.glob(config_path) # Don't propagate virtual files for k in config_dict.keys(): propagate_virtual = config_dict[k].has_key('propagate') and config_dict[k]['propagate'] if k.startswith("@") and not propagate_virtual: del config_dict[k] for f in config_files: self.log.info("loading config file %s" % f) with open(f, "r") as cf: try: json_dict = json.load(cf) except ValueError as e: msg = "Your config file %s has invalid JSON\n%s" % (f, e.message) raise dexy.commands.UserFeedback(msg) if json_dict.has_key("$reset"): # Reset the config, i.e. ignore everything from parent # directories, just use this directory's config in json_dict config_dict = json_dict else: # Combine any config in this dir with parent dir config. config_dict.update(json_dict) if json_dict.has_key("$globals"): global_args.update(json_dict["$globals"]) if json_dict.has_key("$variables"): variables.update(json_dict["$variables"]) config_dict['$globals'] = global_args config_dict['$variables'] = variables return config_dict def load_config(self): """ This method determines which subdirectories will be included in the dexy batch and populates the config dict for each of them. """ if self.args['recurse']: # Figure out which directories need to be skipped exclude_at_root = Constants.EXCLUDE_DIRS_ROOT + self.reports_dirs + [self.args['artifactsdir'], self.args['logsdir']] self.log.debug("project root excluded directories %s" % ", ".join(exclude_at_root)) exclude_everywhere = Constants.EXCLUDE_DIRS_ALL_LEVELS self.log.debug("directories excluded at all levels %s" % ", ".join(exclude_everywhere)) for dirpath, dirnames, filenames in os.walk(self.args['directory']): # Figure out if we should process this directory and recurse # into its children. Start with process_dir = True process_dir = True # Remove any children we don't want to recurse into. if dirpath == ".": for x in exclude_at_root: if x in dirnames: dirnames.remove(x) for x in exclude_everywhere: if x in dirnames: dirnames.remove(x) # Look for a .nodexy file if os.path.isfile(os.path.join(dirpath, '.nodexy')): # If we find one... self.log.info(".nodexy file found in %s" % dirpath) # ...remove all child dirs from processing... for i in xrange(len(dirnames)): dirnames.pop() # ...and skip this directory. process_dir = False # Check if we match any excludes specified on the command line args_exclude = self.args['exclude'] if isinstance(args_exclude, str): args_exclude = args_exclude.split() for pattern in args_exclude: for d in dirnames: m1 = re.match(pattern, d) m2 = re.match("./%s" % pattern, d) m3 = re.match("%s/" % pattern, d) m4 = re.match("./%s/" % pattern, d) if m1 or m2 or m3 or m4: dirnames.remove(d) if process_dir: self.config[dirpath] = self.config_for_directory(dirpath) else: # Not recursing dirpath = self.args['directory'] self.config[dirpath] = self.config_for_directory(dirpath) def process_config(self): """ Processes a populated config dict, identifies files to be processed, creates Document objects for each, links dependencies and finally does topological sort to establish order of batch run. """ # Define the parse_doc nested function which we will call recursively. def parse_doc(path, input_directive, args = {}): # If a specification is nested in a dependency, then input_directive # may be a dict. If so, split it into parts before continuing. try: a, b = input_directive.popitem() input_directive = a args = b except AttributeError: pass tokens = input_directive.split("|") if "/" in tokens[0]: raise dexy.commands.UserFeedback("paths not allowed in tokens: %s" % tokens[0]) if path == '.': glob_string = tokens[0] else: glob_string = os.path.join(re.sub("^\./", "", path), tokens[0]) filters = tokens[1:] docs = [] # virtual document if re.search("@", glob_string): virtual = True dangerous = any(k in ['url', 'repo', 'path'] for k in args) if dangerous and not self.args['danger']: msg = "You are attempting to access a remote file %s." % glob_string msg += " You must specify -danger option to do this.\n" raise dexy.commands.UserFeedback(msg) glob_string = glob_string.replace("@", "") else: virtual = False regex = fnmatch.translate(glob_string).replace(".*", "(.*)") matcher = re.compile(regex) files = glob.glob(glob_string) nofiles = len(files) == 0 if nofiles and virtual: files = [glob_string] for f in files: create = True if not virtual: if os.path.isdir(f): create = False if args.has_key('disabled'): if args['disabled']: create = False self.log.warn("document %s|%s disabled" % (f, "|".join(filters))) inputs = [] if args.has_key('inputs'): if isinstance(args['inputs'], str) or isinstance(args['inputs'], unicode): raise dexy.commands.UserFeedback("inputs for %s should be an array" % f) for i in args['inputs']: # Create document objects for input patterns (just in this directory) for doc in parse_doc(path, i): inputs.append(doc.key()) m = matcher.match(f) if m and len(m.groups()) > 0: rootname = matcher.match(f).group(1) # The 'ifinput' directive says that if an input exists matching # the specified pattern, we should create this document and it # will depend on the specified input. if args.has_key('ifinput'): if isinstance(args['ifinput'], str) or isinstance(args['ifinput'], unicode): ifinputs = [args['ifinput']] else: self.log.debug("treating input %s as iterable. class: %s" % ( args['ifinput'], args['ifinput'].__class__.__name__)) ifinputs = args['ifinput'] for s in ifinputs: self.log.debug("evaluating ifinput %s" % s) ifinput = s.replace("%", rootname) self.log.debug("evaluating ifinput %s" % ifinput) input_docs = parse_doc(path, ifinput, {}) for input_doc in input_docs: inputs.append(input_doc.key()) if len(input_docs) == 0: create = False if args.has_key('ifnoinput'): ifinput = args['ifnoinput'].replace("%", rootname) input_docs = parse_doc(path, ifinput, {}) if len(input_docs) > 0: create = False if args.has_key('except'): try: except_re = re.compile(args['except']) except sre_constants.error as e: raise dexy.commands.UserFeedback("""You passed 'except' value of %s. Please pass a valid Python-style regular expression for 'except', NOT a glob-style matcher. Error message from re.compile: %s""" % (args['except'], e)) if re.match(except_re, f): self.log.warn("skipping %s for %s as it matches except pattern %s" % ( f, input_directive, args['except'] )) create = False if create: doc = dexy.document.Document() doc.set_controller(self) # Filters can either be included in the name... doc.set_name_and_filters(f, filters) # ...or they may be listed explicitly. if args.has_key('filters'): doc.filters += args['filters'] if args.has_key('loglevel'): doc.loglevelname = args['loglevel'] doc.setup_log() # After name has been set doc.virtual = virtual key = doc.key() self.log.debug("creating doc %s for glob %s" % (key, glob_string)) if self.members.has_key(key): doc = self.members[key] if args.has_key('priority'): doc.priority = args['priority'] del args['priority'] doc.args.update(args) if args.has_key('allinputs'): doc.use_all_inputs = args['allinputs'] if args.has_key('inputs'): doc.input_args = copy.copy(args['inputs']) doc.input_keys = [] for i in inputs: doc.add_input_key(i) self.members[key] = doc docs.append(doc) # docs is a local list of docs return docs # end of parse_doc nested function def get_pos(member): key = member.key() return self.members.keys().index(key) def depend(parent, child): self.depends.append((get_pos(child), get_pos(parent))) # The real processing starts here. self.members = OrderedDict() self.depends = [] self.batch_id = self.db.next_batch_id() if not self.args['silent']: print "batch id is", self.batch_id for path, config in self.config.iteritems(): ### @export "features-global-args-1" if config.has_key("$globals"): global_args = config["$globals"] else: global_args = {} if config.has_key("$variables"): global_variables = config["$variables"] else: global_variables = {} if self.args.has_key('globals'): global_args.update(self.args['globals']) for k, v in config.iteritems(): local_args = global_args.copy() local_args.update(v) local_args['$variables'] = global_variables for kg in global_args.keys(): if local_args.has_key(kg): if isinstance(local_args[kg], dict): local_args[kg].update(global_args[kg]) parse_doc(path, k, local_args) ### @end # Determine dependencies total_dependencies = 0 self.log.debug("Finalizing dependencies between documents...") for doc in self.members.values(): doc.finalize_inputs(self.members) total_dependencies += len(doc.inputs) for input_doc in doc.inputs: depend(doc, input_doc) self.log.debug("finalized dependencies for %s" % doc.key()) if len(doc.inputs) > 10: self.log.debug("%s inputs added" % len(doc.inputs)) elif len(doc.inputs) == 0: self.log.debug("no inputs added") else: self.log.debug("inputs added: %s" % ", ".join(d.key() for d in doc.inputs)) if len(self.args['run']) > 0: # Only run the specified document, and its dependencies. new_members = OrderedDict() new_depends = [] def new_get_pos(member): key = member.key() return new_members.keys().index(key) def new_depend(parent, child): new_depends.append((new_get_pos(child), new_get_pos(parent))) def parse_new_document(d): new_members[d.key()] = d for input_doc in d.inputs: if not input_doc.key() in new_members.keys(): new_members[input_doc.key()] = input_doc new_depend(d, input_doc) parse_new_document(input_doc) run_key = self.args['run'] if self.members.has_key(run_key): doc = self.members[run_key] else: matches = [k for k in self.members.keys() if k.startswith(run_key)] matches.sort(key=lambda k: len(self.members[k].inputs)) doc = self.members[matches[-1]] parse_new_document(doc) if not self.args['silent']: print "limiting members list to %s and its dependencies, %s/%s documents will be run" % (doc.key(), len(new_members), len(self.members)) self.members = new_members self.depends = new_depends num_members = len(self.members) if num_members > 0: dep_ratio = float(total_dependencies)/num_members else: dep_ratio = None if not self.args['silent']: print "sorting %s documents into run order, there are %s total dependencies" % (num_members, total_dependencies) if dep_ratio: print "ratio of dependencies to documents is %0.1f" % (dep_ratio) if dep_ratio > 10: print "if you are experiencing performance problems:" print "call dexy with -dryrun and inspect logs/batch-XXXX.json to debug dependencies" print "consider using -strictinherit or reducing your use of 'allinputs' " try: self.log.debug("Beginning topological sort...") topsort_ordering = topsort(self.depends) self.log.debug("Topological sort completed successfully.") except CycleError as e: print "There are circular dependencies!" answer, num_parents, children = e.args for child, parents in children.items(): for parent in parents: print "%s depends on %s" % (self.members.keys()[parent], self.members.keys()[child]) raise dexy.commands.UserFeedback(e.message) docs_without_dependencies = frozenset(range(len(self.members))) - frozenset(topsort_ordering) self.ordering = topsort_ordering + list(docs_without_dependencies) for i in self.ordering: key = self.members.keys()[i] self.docs.append(self.members[key])
def group_all_export(request, group_slug): """ Export all group members for a specific group """ group = get_object_or_404(Group, slug=group_slug) # if they can edit it, they can export it if not has_perm(request.user,'user_groups.change_group', group): raise Http403 import xlwt from ordereddict import OrderedDict from django.db import connection from forms_builder.forms.models import FieldEntry # create the excel book and sheet book = xlwt.Workbook(encoding='utf8') sheet = book.add_sheet('Group Members and Subscribers') #initialize indexes row_index = {} col_index = {} #--------- # MEMBERS #--------- # excel date styles default_style = xlwt.Style.default_style datetime_style = xlwt.easyxf(num_format_str='mm/dd/yyyy hh:mm') date_style = xlwt.easyxf(num_format_str='mm/dd/yyyy') # the key is what the column will be in the # excel sheet. the value is the database lookup # Used OrderedDict to maintain the column order group_mappings = OrderedDict([ ('user_id', 'au.id'), ('first_name', 'au.first_name'), ('last_name', 'au.last_name'), ('email', 'au.email'), ('receives email', 'pp.direct_mail'), ('company', 'pp.company'), ('address', 'pp.address'), ('address2', 'pp.address2'), ('city', 'pp.city'), ('state', 'pp.state'), ('zipcode', 'pp.zipcode'), ('country', 'pp.country'), ('phone', 'pp.phone'), ('is_active', 'au.is_active'), ('date', 'gm.create_dt'), ]) group_lookups = ','.join(group_mappings.values()) # Use custom sql to fetch the rows because we need to # populate the user profiles information and you # cannot do that with django's ORM without using # get_profile() for each user query # pulling 13,000 group members can be done in one # query using Django's ORM but then you need # 13,000 individual queries :( cursor = connection.cursor() sql = "SELECT %s FROM user_groups_groupmembership gm \ INNER JOIN auth_user au ON (au.id = gm.member_id) \ LEFT OUTER JOIN profiles_profile pp \ on (pp.user_id = gm.member_id) WHERE group_id = %%s;" sql = sql % group_lookups cursor.execute(sql, [group.pk]) values_list = list(cursor.fetchall()) # index the group key mappings and insert them into the sheet. for key in group_mappings.keys(): if not key in col_index: col = len(col_index.keys()) col_index[key] = col sheet.write(0, col, key, style=default_style) if values_list: # Write the data enumerated to the excel sheet for row, row_data in enumerate(values_list): for col, val in enumerate(row_data): if not row in row_index: # assign the row if it is not yet available row_index[row] = row + 1 # styles the date/time fields if isinstance(val, datetime): style = datetime_style elif isinstance(val, date): style = date_style else: style = default_style sheet.write(row + 1, col, val, style=style) #------------- # Subscribers #------------- entries = FieldEntry.objects.filter(entry__subscriptions__group=group).distinct() for entry in entries: val = entry.value field = entry.field.label.lower().replace(" ", "_") if "subscriber %s" % str(entry.entry.pk) in row_index: # get the subscriber's row number row = row_index["subscriber %s" % str(entry.entry.pk)] else: # assign the row if it is not yet available row = len(row_index.keys()) + 1 row_index["subscriber %s" % str(entry.entry.pk)] = row if field in col_index: # get the entry's col number col = col_index[field] else: # assign the col if it is not yet available # and label the new column col = len(col_index.keys()) col_index[field] = col sheet.write(0, col, field, style=default_style) # styles the date/time fields if isinstance(val, datetime): style = datetime_style elif isinstance(val, date): style = date_style else: style = default_style sheet.write(row, col, val, style=style) response = HttpResponse(mimetype='application/vnd.ms-excel') response['Content-Disposition'] = 'attachment; filename=group_%s_all_export.xls' % group.pk book.save(response) return response
class Artifact(object): HASH_WHITELIST = Constants.ARTIFACT_HASH_WHITELIST MAX_DATA_DICT_DECIMALS = 5 MAX_DATA_DICT_LENGTH = 10 ** MAX_DATA_DICT_DECIMALS META_ATTRS = [ 'additional_inputs', 'binary_input', 'binary_output', 'created_by', 'document_key', 'ext', 'final', 'hashfunction', 'initial', 'logstream', 'key', 'name', 'output_hash', 'state', 'stdout', 'virtual' ] BINARY_EXTENSIONS = [ '.docx', '.eot', '.epub', '.gif', '.gz', '.jpg', '.kch', '.odt', '.pdf', '.png', '.rtf', '.sqlite', '.sqlite3', '.swf', '.tgz', '.ttf', '.wav', '.woff', '.xls', '.zip' ] def __init__(self): if not hasattr(self.__class__, 'FILTERS'): self.__class__.FILTERS = dexy.introspect.filters(Constants.NULL_LOGGER) self._inputs = {} self.additional = None self.additional_inputs = [] self.args = {} self.args['globals'] = {} self.artifacts_dir = 'artifacts' # TODO don't hard code self.batch_id = None self.batch_order = None self.binary_input = None self.binary_output = None self.controller_args = {} self.controller_args['globals'] = {} self.created_by = None self.ctime = None self.data_dict = OrderedDict() self.dexy_version = Version.VERSION self.dirty = False self.document_key = None self.elapsed = 0 self.ext = None self.final = None self.finish_time = None self.hashfunction = 'md5' self.initial = None self.inode = None self.input_data_dict = OrderedDict() self.is_last = False self.key = None self.log = logging.getLogger() self.logstream = "" self.mtime = None self.name = None self.source = None self.start_time = None self.state = 'new' self.stdout = None self.virtual_docs = None def keys(self): return self.data_dict.keys() def may_have_kv_storage(self): return self.binary_output and (self.ext in dexy.helpers.KeyValueData.EXTENSIONS) def __getitem__(self, key): if not hasattr(self, "_storage") and self.binary_output and (self.ext in dexy.helpers.KeyValueData.EXTENSIONS): self.setup_kv_storage() if hasattr(self, "_storage"): if self._storage.mode == "write": # Change from write mode to read mode... self.setup_kv_storage() return self._storage.retrieve(key) elif self.data_dict.has_key(key): return self.data_dict[key] elif hasattr(self, key): return getattr(self, key) elif self.ext in dexy.helpers.KeyValueData.EXTENSIONS: self.setup_kv_storage() return self._storage.retrieve(key) else: raise dexy.commands.UserFeedback("Can't find key '%s' in %s" % (key, self.key)) def __unicode__(self): """ When d[key] is used without attributes being accessed, need to return artifact output text. Jinja calls the __unicode__ method so we override that. """ return self.output_text() def is_complete(self): return str(self.state) == 'complete' @classmethod def retrieve(klass, hashstring, hashfunction='md5'): if not hasattr(klass, 'retrieved_artifacts'): klass.retrieved_artifacts = {} if klass.retrieved_artifacts.has_key(hashstring): return klass.retrieved_artifacts[hashstring] else: artifact = klass() artifact.hashstring = hashstring artifact.hashfunction = hashfunction artifact.load() klass.retrieved_artifacts[hashstring] = artifact return artifact def load(self): self.load_meta() self.load_input() if self.is_complete() and not self.is_loaded(): self.load_output() def load_inputs(self): for a in self.inputs(): a.load() def save(self): if self.is_abstract(): pass # For testing. elif not self.hashstring: raise Exception("can't persist an artifact without a hashstring!") else: self.save_meta() if self.is_complete() and not self.is_output_cached(): try: self.save_output() except IOError as e: print "An error occured while saving %s" % self.key raise e def is_abstract(self): return not hasattr(self, 'save_meta') def filter_args(self): """ Returns args specified in the .dexy file for this filter alias. """ args = {} for a in self.filter_class.ALIASES: if self.args.has_key(a): try: args.update(self.args[a]) except ValueError as e: if "dictionary update sequence element" in e.message: raise dexy.commands.UserFeedback("You need to supply a dict to argument '%s', rather than the single value '%s'" % (a, self.args[a])) else: print self.args[a] raise e return args def setup_initial(self): """ Set up an initial artifact (the first artifact in a document's filter chain). """ if self.args.has_key('binary'): self.binary_input = self.args['binary'] else: self.binary_input = (self.doc.ext in self.BINARY_EXTENSIONS) self.binary_output = self.binary_input self.ext = self.doc.ext self.initial = True self.virtual = self.doc.virtual self.virtual_docs = self.doc.virtual_docs if self.args.has_key('final'): self.final = self.args['final'] elif os.path.basename(self.name).startswith("_"): self.final = False if not self.doc.virtual: stat_info = os.stat(self.name) self.ctime = stat_info[stat.ST_CTIME] self.mtime = stat_info[stat.ST_MTIME] self.inode = stat_info[stat.ST_INO] self.set_data(self.doc.initial_artifact_data()) # TODO remove? if not self.data_dict: raise Exception("no data dict!") elif len(self.data_dict) == 0: raise Exception("data dict has len 0!") self.state = 'complete' def setup_from_filter_class(self): # cache filter class source code so it only has to be calculated once filter_class_source_const = "SOURCE_CODE_%s" % self.filter_class.__name__ if not hasattr(self.filter_class, filter_class_source_const): # get source code of this filter class + all parent filter classes. source = "" klass = self.filter_class # get source code from filter class and all parent classes while klass != dexy.dexy_filter.DexyFilter: source += inspect.getsource(klass) klass = klass.__base__ # and then get source code of DexyFilter class source += inspect.getsource(dexy.dexy_filter.DexyFilter) filter_class_source_hash = self.compute_hash(source) setattr(self.filter_class, filter_class_source_const, filter_class_source_hash) assert filter_class_source_hash == getattr(self.filter_class, filter_class_source_const) self.log.debug("Source code hash for %s is %s" % (self.filter_class.__name__, filter_class_source_hash)) if not hasattr(self.filter_class, 'VERSION'): filter_version = self.filter_class.version(self.log) self.filter_class.VERSION = filter_version self.filter_name = self.filter_class.__name__ self.filter_source = getattr(self.filter_class, filter_class_source_const) self.filter_version = self.filter_class.VERSION if self.final is None: self.final = self.filter_class.FINAL def setup_from_previous_artifact(self, previous_artifact): for a in ['args', 'final', 'mtime', 'ctime', 'inode', 'virtual', 'virtual_docs']: setattr(self, a, getattr(previous_artifact, a)) # Look for additional inputs in previous artifacts or previous # artifacts' inputs. for k, a in previous_artifact.inputs().iteritems(): if a.additional and not k in self._inputs: self.add_input(k, a) elif not k in self._inputs and not a.virtual: # We should have all other inputs already. Validate this. raise Exception("Missing input %s" % k) for kk, aa in a.inputs().iteritems(): if aa.additional and not kk in self._inputs: self.add_input(kk, aa) self.binary_input = previous_artifact.binary_output self.input_data_dict = previous_artifact.data_dict self.input_ext = previous_artifact.ext self.previous_artifact_hashstring = previous_artifact.hashstring self.previous_artifact_filename = previous_artifact.filename() self.previous_artifact_filepath = previous_artifact.filepath() self.previous_canonical_filename = previous_artifact.canonical_filename(True) self.previous_long_canonical_filename = previous_artifact.long_canonical_filename() self.previous_websafe_key = previous_artifact.websafe_key() # The JSON output of previous artifact if not previous_artifact.binary_output: self.previous_cached_output_filepath = previous_artifact.cached_output_filepath() # Determine file extension of output if hasattr(self, 'next_filter_class'): next_inputs = self.next_filter_class.INPUT_EXTENSIONS else: next_inputs = None if self.filter_args().has_key('ext'): ext = self.filter_args()['ext'] if not ext.startswith("."): ext = ".%s" % ext self.ext = ext else: self.ext = self.filter_class.output_file_extension( previous_artifact.ext, self.name, next_inputs) self.binary_output = self.filter_class.BINARY if self.binary_output is None: self.set_binary_from_ext() self.state = 'setup' @classmethod def setup(klass, doc, artifact_key, filter_class = None, previous_artifact = None): """ Create an Artifact instance and load all information needed to calculate its hashstring. """ artifact = klass() artifact.key = artifact_key artifact.filter_class = filter_class artifact.is_last = (artifact.key == doc.key()) # Add references for convenience artifact.artifacts_dir = doc.artifacts_dir artifact.controller_args = doc.controller.args artifact.hashfunction = doc.controller.args['hashfunction'] artifact.db = doc.db artifact.doc = doc artifact.log = doc.log # These attributes are the same for all artifacts pertaining to a document artifact.args = doc.args artifact.batch_id = doc.batch_id artifact.document_key = doc.key() artifact.name = doc.name # Set batch order to next in sequence artifact.batch_order = artifact.db.next_batch_order(artifact.batch_id) next_filter_class = doc.next_filter_class() if next_filter_class: artifact.next_filter_name = next_filter_class.__name__ artifact.next_filter_class = next_filter_class # Set inputs from original document inputs. artifact._inputs.update(artifact.doc.input_artifacts()) if len(artifact.doc.input_artifacts().keys()) > 10: doc.log.debug("Setting inputs to include %s document inputs" % len(artifact.doc.input_artifacts())) elif len(artifact.doc.input_artifacts().keys()) > 0: doc.log.debug("Setting inputs to include inputs: %s" % ",".join(artifact.doc.input_artifacts().keys())) for k, a in artifact.doc.input_artifacts().iteritems(): if a.additional and not k in artifact._inputs: doc.log.debug("Adding additional input %s" % k) artifact.add_input(k, a) for kk, aa in a.inputs().iteritems(): if aa.additional and not kk in artifact._inputs: doc.log.debug("Adding additional input %s" % kk) artifact.add_input(kk, aa) if previous_artifact: artifact.setup_from_previous_artifact(previous_artifact) artifact.setup_from_filter_class() else: artifact.setup_initial() artifact.set_hashstring() return artifact def run(self): start = time.time() if self.controller_args['nocache'] or not self.is_complete(): # We have to actually run things... if not self.filter_class: self.filter_class = dexy.introspect.get_filter_by_name(self.filter_name, self.doc.__class__.filter_list) # Set up instance of filter. filter_instance = self.filter_class() filter_instance.artifact = self filter_instance.log = self.log # Make sure previous artifact is loaded. if not self.binary_input and len(self.input_text()) == 0: f = open(self.previous_artifact_filepath, "rb") self.data_dict['1'] = f.read() f.close() try: filter_instance.process() except dexy.commands.UserFeedback as e: messages = [] err_msg_args = (self.doc.key(), self.filter_alias, self.doc.step, len(self.doc.filters)) messages.append("ERROR in %s (in filter '%s' - step %s of %s)" % err_msg_args) messages.append(e.message) for message in messages: self.log.debug(message) messages.append("This exception information has been written to logs/dexy.log") messages.append("There may be more information in logs/dexy.log") if self.log.getEffectiveLevel() > logging.DEBUG: messages.append("If you can't find clues in the log, try running again with -loglevel DEBUG") raise dexy.commands.UserFeedback("\n".join(messages)) except dexy.commands.InternalDexyProblem as e: err_msg_args = (self.doc.key(), self.filter_alias, self.doc.step, len(self.doc.filters)) sys.stderr.write("ERROR in %s (in filter '%s' - step %s of %s)\n" % err_msg_args) raise e except Exception as e: traceback.print_tb(sys.exc_info()[2]) err_msg_args = (self.doc.key(), self.filter_alias, self.doc.step, len(self.doc.filters)) sys.stderr.write("ERROR in %s (in filter '%s' - step %s of %s)\n" % err_msg_args) if e.message: raise dexy.commands.InternalDexyProblem("error class: %s\nerror message: %s" % (e.__class__.__name__, e.message)) else: raise dexy.commands.InternalDexyProblem("error class: %s" % e.__class__.__name__) if self.data_dict and len(self.data_dict) > 0: pass elif self.is_canonical_output_cached: self.state = 'complete' self.save() else: raise Exception("data neither in memory nor on disk") self.logstream = self.doc.logstream.getvalue() self.state = 'complete' self.source = 'run' self.save() else: self.source = 'cache' self.log.debug("using cached artifact for %s" % self.key) # make sure additional artifacts are added to db for a in self.inputs().values(): if a.additional and not a.key in self.db.extra_keys: a.batch_id = self.batch_id self.db.append_artifact(a) self.elapsed = time.time() - start self.db.update_artifact(self) def add_additional_artifact(self, key_with_ext, ext=None): if not ext: ext = os.path.splitext(key_with_ext)[1] new_artifact = self.__class__() new_artifact.key = key_with_ext if ext.startswith("."): new_artifact.ext = ext else: new_artifact.ext = ".%s" % ext new_artifact.final = True new_artifact.hashfunction = self.hashfunction new_artifact.additional = True new_artifact.set_binary_from_ext() new_artifact.artifacts_dir = self.artifacts_dir new_artifact.inode = self.hashstring new_artifact.created_by = self.key new_artifact.virtual = True new_artifact.name = key_with_ext.split("|")[0] # TODO this is duplicated in setup_from_previous_artifact, should reorganize for at in ['batch_id', 'document_key', 'mtime', 'ctime', 'virtual_docs']: val = getattr(self, at) setattr(new_artifact, at, val) new_artifact.set_hashstring() self.log.debug("new artifact %s hashstring %s" % (key_with_ext, new_artifact.hashstring)) self.add_input(key_with_ext, new_artifact) self.db.append_artifact(new_artifact) # append to db because not part of doc.artifacts return new_artifact def add_input(self, key, artifact): self._inputs[key] = artifact self.additional_inputs.append(artifact.hashstring) def inputs(self): return self._inputs def set_binary_from_ext(self): # TODO list more binary extensions or find better way to do this if self.ext in self.BINARY_EXTENSIONS: self.binary_output = True else: self.binary_output = False def set_data(self, data): self.data_dict['1'] = data def set_data_from_artifact(self): f = codecs.open(self.filepath(), "r", encoding="utf-8") self.data_dict['1'] = f.read() def is_loaded(self): return hasattr(self, 'data_dict') and len(self.data_dict) > 0 def compute_hash(self, text): unicode_text = None if type(text) == unicode: unicode_text = text elif type(text) in [dict, list]: unicode_text = json.dumps(text) elif self.binary_input: pass else: unicode_text = unicode(text, encoding="utf-8") if unicode_text: text = unicode_text.encode("utf-8") if self.hashfunction == 'md5': h = hashlib.md5(text).hexdigest() elif self.hashfunction == 'sha1': h = hashlib.sha1(text).hexdigest() elif self.hashfunction == 'sha224': h = hashlib.sha224(text).hexdigest() elif self.hashfunction == 'sha256': h = hashlib.sha256(text).hexdigest() elif self.hashfunction == 'sha384': h = hashlib.sha384(text).hexdigest() elif self.hashfunction == 'sha512': h = hashlib.sha512(text).hexdigest() elif self.hashfunction == 'crc32': h = str(zlib.crc32(text) & 0xffffffff) elif self.hashfunction == 'adler32': h = str(zlib.adler32(text) & 0xffffffff) else: raise Exception("unexpected hash function %s" % self.hashfunction) return h def input_hashes(self): """ Returns an OrderedDict of key, hashstring for each input artifact, sorted by key. """ return OrderedDict((k, str(self.inputs()[k].hashstring)) for k in sorted(self.inputs())) def hash_dict(self): """ Calculate and cache the elements used to compute the hashstring """ if not hasattr(self.__class__, 'SOURCE_CODE'): artifact_class_source = inspect.getsource(self.__class__) artifact_py_source = inspect.getsource(Artifact) self.__class__.SOURCE_CODE = self.compute_hash(artifact_class_source + artifact_py_source) self.artifact_class_source = self.__class__.SOURCE_CODE if self.dirty: self.dirty_string = time.gmtime() hash_dict = OrderedDict() hash_dict['inputs'] = self.input_hashes() for k in self.HASH_WHITELIST: if self.__dict__.has_key(k): v = self.__dict__[k] if hasattr(v, 'items'): hash_v = OrderedDict() for k1 in sorted(v.keys()): v1 = v[k1] try: if len(str(v1)) > 50: raise Exception() json.dumps(v1) hash_v[str(k1)] = v1 except Exception: # Use a hash if we will have problems saving to JSON # or if the data is large (don't want to clutter up the DB, # makes it harder to spot differences) hash_v[str(k1)] = self.compute_hash(v1) else: hash_v = str(v) hash_dict[str(k)] = hash_v return hash_dict def set_hashstring(self): if hasattr(self, 'hashstring'): raise Exception("setting hashstring twice") hash_data = str(self.hash_dict()) self.hashstring = self.compute_hash(hash_data) try: original_document_key = self.document_key if not self.is_loaded(): self.load() self.document_key = original_document_key except AttributeError as e: if not self.is_abstract(): raise e except IOError as e: self.save_meta() def convert_if_not_unicode(self, s): if type(s) == unicode: return s elif s == None: return u"" else: try: ut = unicode(s, encoding="utf-8") return ut except Exception as e: print "error occurred trying to convert text to unicode in", self.key raise e def input_text(self): return u"".join([self.convert_if_not_unicode(v) for k, v in self.input_data_dict.items()]) def output_text(self): return u"".join([self.convert_if_not_unicode(v) for k, v in self.data_dict.items()]) def read_binary_output(self): self.binary_output = True self.load_output() self.binary_output = False return self.binary_data def output(self): if not self.is_complete(): raise Exception("can't call output unless complete!") if self.binary_output: if not hasattr(self, 'binary_data'): self.load_output() return self.binary_data else: return self.output_text() def relative_refs(self, relative_to_file): """How to refer to this artifact, relative to another.""" doc_dir = os.path.dirname(relative_to_file) return [ os.path.relpath(self.key, doc_dir), os.path.relpath(self.long_canonical_filename(), doc_dir), "/%s" % self.key, "/%s" % self.long_canonical_filename() ] def use_canonical_filename(self): """Returns the canonical filename after saving contents under this name in the artifacts directory.""" self.write_to_file(os.path.join(self.artifacts_dir, self.canonical_filename())) return self.canonical_filename() def write_to_file(self, filename): dirname = os.path.dirname(filename) if not os.path.exists(dirname) and not dirname == '': os.makedirs(dirname) shutil.copyfile(self.filepath(), filename) def work_filename(self): return "%s.work%s" % (self.hashstring, self.input_ext) def generate_workfile(self, work_filename = None): if not work_filename: work_filename = self.work_filename() work_path = os.path.join(self.artifacts_dir, work_filename) work_file = codecs.open(work_path, "w", encoding="utf-8") work_file.write(self.input_text()) work_file.close() def temp_filename(self, ext): return "%s.work%s" % (self.hashstring, ext) def open_tempfile(self, ext): tempfile_path = os.path.join(self.artifacts_dir, self.temp_filename(ext)) codecs.open(tempfile_path, "w", encoding="utf-8") def temp_dir(self): return os.path.join(self.artifacts_dir, self.hashstring) def create_temp_dir(self, populate=False): tempdir = self.temp_dir() shutil.rmtree(tempdir, ignore_errors=True) os.mkdir(tempdir) if populate: # write all inputs to this directory, under their canonical names for input_artifact in self._inputs.values(): filename = os.path.join(tempdir, input_artifact.canonical_filename()) if os.path.exists(input_artifact.filepath()): input_artifact.write_to_file(filename) self.log.debug("Populating temp dir for %s with %s" % (self.key, filename)) else: self.log.warn("Not populating temp dir for %s with file %s, file does not exist (yet)" % (self.key, filename)) # write the workfile to this directory under its canonical name previous = self.previous_artifact_filepath workfile = os.path.join(tempdir, self.previous_canonical_filename) if not os.path.exists(os.path.dirname(workfile)): os.makedirs(os.path.dirname(workfile)) self.log.debug("Copying %s to %s" % (previous, workfile)) shutil.copyfile(previous, workfile) def alias(self): """ Whether this artifact includes an alias. """ aliases = [k for k in self.key.split("|") if k.startswith("-")] if len(aliases) > 0: return aliases[0] def canonical_dir(self, ignore_args = False): return os.path.dirname(self.name) def canonical_basename(self, ignore_args = False): return os.path.basename(self.canonical_filename(ignore_args)) def canonical_filename(self, ignore_args = False): fn = os.path.splitext(self.key.split("|")[0])[0] if self.args.has_key('canonical-name') and not ignore_args: parent_dir = os.path.dirname(fn) return os.path.join(parent_dir, self.args['canonical-name']) elif self.args.has_key('postfix') and not ignore_args: return "%s%s%s" % (fn, self.ext, self.args['postfix']) elif self.alias(): return "%s%s%s" % (fn, self.alias(), self.ext) else: return "%s%s" % (fn, self.ext) def long_canonical_filename(self): if not "|" in self.key: return self.key.replace("|", "-") else: return "%s%s" % (self.key.replace("|", "-"), self.ext) def websafe_key(self): return self.long_canonical_filename().replace("/", "--") def web_safe_document_key(self): # duplicate, remove this alias return self.websafe_key() def filename(self): """ The filename where artifact content is stored, based on the hashstring. """ if not hasattr(self, 'ext'): raise Exception("artifact %s has no ext" % self.key) return "%s%s" % (self.hashstring, self.ext) def filepath(self): """ Full path (including artifacts dir location) to location where artifact content is stored. """ return os.path.join(self.artifacts_dir, self.filename()) def abs_filepath(self): return os.path.abspath(self.filepath()) def breadcrumbs(self): """A list of parent dirs, plus the filename if it's not 'index.html'.""" parent_dirs = os.path.dirname(self.canonical_filename()).split("/") if self.canonical_basename() == "index.html": result = parent_dirs else: result = parent_dirs.append(self.canonical_basename()) if not result: result = [] return result def titleized_name(self): if self.canonical_basename() == "index.html": return self.breadcrumbs()[-1].replace("-"," ").title() else: return os.path.splitext(self.canonical_basename())[0].replace("-"," ").title() def unique_key(self): return "%s:%s:%s" % (self.batch_id, self.document_key, self.key) def websafe_unique_key(self): return self.unique_key().replace("/", "--") def url(self): # TODO test for final return "/%s" % self.canonical_filename() def hyperlink(self, link_text = None): # TODO test for final if not link_text: link_text = self.canonical_basename() return """<a href="%s">%s</a>""" % (self.url(), link_text) def iframe(self, link_text = None, width = "600px", height = "300px"): # TODO test for final args = { 'url' : self.url(), 'hyperlink' : self.hyperlink(link_text), 'width' : width, 'height' : height } return """ <iframe src="%(url)s" width="%(width)s" height="%(height)s" style="border: thin solid gray;"> %(hyperlink)s </iframe> """ % args def img(self): # TODO test for final return """<img src="/%s" alt="Image generated by dexy %s" />""" % (self.canonical_filename(), self.key) def relpath(self, artifact_key): """ Returns relative path from self to other artifact key, e.g. for linking to CSS relatively """ artifact = self.inputs()[artifact_key] return os.path.join(self.relative_path_to_input(artifact), artifact.canonical_basename()) def has_sections(self): return (self.data_dict.keys() != ['1']) def relative_path_to_input(self, input_artifact): my_dir = os.path.dirname(self.name) input_dir = os.path.dirname(input_artifact.name) self.log.debug("Calculating relative path between %s and %s" % (self.name, input_artifact.name)) if not my_dir: my_dir = "." if not input_dir: input_dir = "." if my_dir == input_dir: relpath = "" else: relpath = os.path.relpath(input_dir, my_dir) return relpath def relative_key_for_input(self, input_artifact): relpath = self.relative_path_to_input(input_artifact) return os.path.join(relpath, os.path.basename(input_artifact.key)) def convert_numbered_dict_to_ordered_dict(self, numbered_dict): ordered_dict = OrderedDict() for x in sorted(numbered_dict.keys()): k = x.split(":", 1)[1] ordered_dict[k] = numbered_dict[x] return ordered_dict def convert_data_dict_to_numbered_dict(self): if len(self.data_dict) >= self.MAX_DATA_DICT_LENGTH: exception_msg = """Your data dict has %s items, which is greater than the arbitrary limit of %s items. You can increase this limit by changing MAX_DATA_DICT_DECIMALS.""" raise Exception(exception_msg % (len(self.data_dict), self.MAX_DATA_DICT_LENGTH)) data_dict = {} i = -1 for k, v in self.data_dict.iteritems(): i += 1 fmt = "%%0%sd:%%s" % self.MAX_DATA_DICT_DECIMALS data_dict[fmt % (i, k)] = v return data_dict def storage(self, reset=False): if not hasattr(self, "_storage") or reset: # Assume we want KV storage self.setup_kv_storage() return self._storage def key_prefixes(self): return sorted(set(":".join(k.split(":")[:-1]) for k in self.storage().keys())) def kv_storage(self): return self.storage() def row_storage(self): if not hasattr(self, "_storage"): self.setup_row() return self._storage def setup_kv_storage(self): try: self._storage = dexy.helpers.KeyValueData(self.filepath()) except ValueError as e: raise dexy.commands.UserFeedback("Can't get key-value data from %s for %s: %s" % self.filepath(), self.key, e.message) def setup_row_storage(self): self._storage = dexy.helpers.RowData(self.filepath())
class ObservationRows: """Store index file information. The ObserservationRows class defines a structure to get specific information about the spectra out of the index file which was produced by the sdfits filler program. This is essientially a table of the raw SDFITS file rows, organized with a lookup key of scan/feed/window/polarization. When rows are added to this object (addRow), the FITS extension, row of the FITS table and scan type are stored. A list of rows for each scan/feed/window/polarization can be retrieved with the 'get' method. """ def __init__(self): self.rows = OrderedDict() self.Key = namedtuple('key', 'scan, feed, window, polarization') def __repr__(self): return ('Scans: {0}\nFeeds: {1}\nWindows: {2}\nPols: {3}'.format( self.scans(), self.feeds(), self.windows(), self.pols())) def addRow(self, scan, feed, window, polarization, fitsExtension, rowOfFitsFile, obsid, procname, procscan, nchans): """Add rows to the ObservationRows object. When rows are added to this object (addRow), the FITS extension, row of the FITS table and scan type are stored. """ key = self.Key(scan, feed, window, polarization) if key in self.rows: self.rows[key]['ROW'].append(rowOfFitsFile) else: self.rows[key] = { 'EXTENSION': fitsExtension, 'ROW': [rowOfFitsFile], 'OBSID': obsid, 'PROCNAME': procname, 'PROCSCAN': procscan, 'NCHANS': nchans } def get(self, scan, feed, window, polarization): """Retreive a list of rows for scan/feed/win/pol. """ try: key = (scan, feed, window, polarization) return self.rows[key] except (KeyError): raise def scans(self): """Return a list of scans in the observation. """ return sorted(list(set([xx.scan for xx in self.rows.keys()]))) def feeds(self): """Return a list of feeds in the observation. """ return list(set([xx.feed for xx in self.rows.keys()])) def windows(self): """Return a list of windows in the observation. """ return list(set([xx.window for xx in self.rows.keys()])) def pols(self): """Return a list of polarizations in the observation. """ return list(set([xx.polarization for xx in self.rows.keys()]))
class Artifact(object): HASH_WHITELIST = Constants.ARTIFACT_HASH_WHITELIST META_ATTRS = [ 'additional', 'binary_input', 'binary_output', 'created_by', 'document_key', 'ext', 'final', 'hashfunction', 'initial', 'is_last', 'logstream', 'key', 'name', 'output_hash', 'state', 'stdout' ] BINARY_EXTENSIONS = [ '.gif', '.jpg', '.png', '.pdf', '.zip', '.tgz', '.gz', '.eot', '.ttf', '.woff', '.sqlite', '.sqlite3', '.swf' ] def __init__(self): if not hasattr(self.__class__, 'FILTERS'): self.__class__.FILTERS = dexy.introspect.filters(Constants.NULL_LOGGER) self._inputs = {} self.additional = None self.args = {} self.args['globals'] = {} self.artifacts_dir = 'artifacts' # TODO don't hard code self.batch_id = None self.batch_order = None self.binary_input = None self.binary_output = None self.controller_args = {} self.controller_args['globals'] = {} self.created_by = None self.ctime = None self.data_dict = OrderedDict() self.dexy_version = Version.VERSION self.dirty = False self.document_key = None self.elapsed = 0 self.ext = None self.final = None self.finish_time = None self.hashfunction = 'md5' self.initial = None self.inode = None self.input_data_dict = OrderedDict() self.is_last = False self.key = None self.log = logging.getLogger() self.logstream = "" self.mtime = None self.name = None self.source = None self.start_time = None self.state = 'new' self.stdout = None def is_complete(self): return str(self.state) == 'complete' @classmethod def retrieve(klass, hashstring): if not hasattr(klass, 'retrieved_artifacts'): klass.retrieved_artifacts = {} if klass.retrieved_artifacts.has_key(hashstring): return klass.retrieved_artifacts[hashstring] else: artifact = klass() artifact.hashstring = hashstring artifact.load() klass.retrieved_artifacts[hashstring] = artifact return artifact def load(self): self.load_meta() self.load_input() if self.is_complete() and not self.is_loaded(): self.load_output() def load_inputs(self): for a in self.inputs(): a.load() def save(self): if self.is_abstract(): pass # For testing. elif not self.hashstring: raise Exception("can't persist an artifact without a hashstring!") else: self.save_meta() if self.is_complete() and not self.is_output_cached(): try: self.save_output() except IOError as e: print "An error occured while saving %s" % self.key raise e def is_abstract(self): return not hasattr(self, 'save_meta') def setup_initial(self): """ Set up an initial artifact (the first artifact in a document's filter chain). """ self._inputs = self.doc.input_artifacts() self.binary_input = (self.doc.ext in self.BINARY_EXTENSIONS) self.binary_output = self.binary_input self.ext = self.doc.ext self.initial = True if self.args.has_key('final'): self.final = self.args['final'] elif os.path.basename(self.name).startswith("_"): self.final = False if not self.doc.virtual: stat_info = os.stat(self.name) self.ctime = stat_info[stat.ST_CTIME] self.mtime = stat_info[stat.ST_MTIME] self.inode = stat_info[stat.ST_INO] self.set_data(self.doc.initial_artifact_data()) # TODO remove? if not self.data_dict: raise Exception("no data dict!") elif len(self.data_dict) == 0: raise Exception("data dict has len 0!") self.state = 'complete' def setup_from_filter_class(self): # cache filter class source code so it only has to be calculated once if not hasattr(self.filter_class, 'SOURCE_CODE'): # get source code of this filter class + all parent filter classes. source = "" klass = self.filter_class # get source code from filter class and all parent classes while klass != dexy.dexy_filter.DexyFilter: source += inspect.getsource(klass) klass = klass.__base__ # and then get source code of DexyFilter class source += inspect.getsource(dexy.dexy_filter.DexyFilter) filter_class_source = source self.filter_class.SOURCE_CODE = self.compute_hash(filter_class_source) if not hasattr(self.filter_class, 'VERSION'): filter_version = self.filter_class.version(self.log) self.filter_class.VERSION = filter_version self.filter_name = self.filter_class.__name__ self.filter_source = self.filter_class.SOURCE_CODE self.filter_version = self.filter_class.VERSION if self.final is None: self.final = self.filter_class.FINAL def setup_from_previous_artifact(self, previous_artifact): for a in ['final', 'mtime', 'ctime', 'inode']: setattr(self, a, getattr(previous_artifact, a)) self._inputs.update(previous_artifact.inputs()) # Need to loop over each artifact's inputs in case extra ones have been # added anywhere. for k, a in previous_artifact.inputs().iteritems(): self._inputs.update(a.inputs()) self.binary_input = previous_artifact.binary_output self.input_data_dict = previous_artifact.data_dict self.input_ext = previous_artifact.ext self.previous_artifact_filename = previous_artifact.filename() self.previous_artifact_filepath = previous_artifact.filepath() self.previous_canonical_filename = previous_artifact.canonical_filename(True) # The JSON output of previous artifact if not previous_artifact.binary_output: self.previous_cached_output_filepath = previous_artifact.cached_output_filepath() # Determine file extension of output if hasattr(self, 'next_filter_class'): next_inputs = self.next_filter_class.INPUT_EXTENSIONS else: next_inputs = None self.ext = self.filter_class.output_file_extension( previous_artifact.ext, self.name, next_inputs) self.binary_output = self.filter_class.BINARY if self.binary_output is None: self.set_binary_from_ext() self.state = 'setup' @classmethod def setup(klass, doc, artifact_key, filter_class = None, previous_artifact = None): """ Create an Artifact instance and load all information needed to calculate its hashstring. """ artifact = klass() artifact.key = artifact_key artifact.filter_class = filter_class # Add references for convenience artifact.artifacts_dir = doc.artifacts_dir artifact.controller_args = doc.controller.args artifact.hashfunction = doc.controller.args['hashfunction'] artifact.db = doc.db artifact.doc = doc artifact.log = doc.log # These attributes are the same for all artifacts pertaining to a document artifact.args = doc.args artifact.batch_id = doc.batch_id artifact.document_key = doc.key() artifact.name = doc.name # Set batch order to next in sequence artifact.batch_order = artifact.db.next_batch_order(artifact.batch_id) next_filter_class = doc.next_filter_class() if next_filter_class: artifact.next_filter_name = next_filter_class.__name__ artifact.next_filter_class = next_filter_class if previous_artifact: artifact.setup_from_previous_artifact(previous_artifact) artifact.setup_from_filter_class() else: artifact.setup_initial() artifact.set_hashstring() return artifact def run(self): start = time.time() if self.controller_args['nocache'] or not self.is_complete(): # We have to actually run things... if not self.filter_class: self.filter_class = dexy.introspect.get_filter_by_name(self.filter_name, self.doc.__class__.filter_list) # Set up instance of filter. filter_instance = self.filter_class() filter_instance.artifact = self filter_instance.log = self.log try: filter_instance.process() except Exception as e: print "Error occurred while running", self.key x, y, tb = sys.exc_info() print "Original traceback:" traceback.print_tb(tb) pattern = os.path.join(self.artifacts_dir, self.hashstring) files_matching = glob.glob(pattern) if len(files_matching) > 0: print "Here are working files which might have clues about this error:" for f in files_matching: print f raise e h = hashlib.sha512() if self.data_dict and len(self.data_dict) > 0: h.update(self.output_text().encode("utf-8")) elif self.is_canonical_output_cached: self.state = 'complete' self.save() f = open(self.filepath(), "rb") while True: data = f.read(h.block_size) if not data: break h.update(data) else: raise Exception("data neither in memory nor on disk") self.output_hash = h.hexdigest() self.logstream = self.doc.logstream.getvalue() self.state = 'complete' self.source = 'run' self.save() else: self.source = 'cache' self.log.debug("using cached artifact for %s" % self.key) # make sure additional artifacts are added to db for a in self.inputs().values(): if a.created_by == self.key: if not a.additional: raise Exception("created_by should only apply to additional artifacts") # TODO Should this be done in Artifact.retrieve? a.batch_id = self.batch_id self.db.append_artifact(a) self.elapsed = time.time() - start self.db.update_artifact(self) def add_additional_artifact(self, key_with_ext, ext): """create an 'additional' artifact with random hashstring""" new_artifact = self.__class__() new_artifact.key = key_with_ext if ext.startswith("."): new_artifact.ext = ext else: new_artifact.ext = ".%s" % ext new_artifact.final = True new_artifact.hashfunction = self.hashfunction new_artifact.additional = True new_artifact.set_binary_from_ext() new_artifact.artifacts_dir = self.artifacts_dir new_artifact.inode = self.hashstring new_artifact.created_by = self.document_key # TODO filter class source? # TODO this is duplicated in setup_from_previous_artifact, should reorganize for at in ['batch_id', 'document_key', 'mtime', 'ctime']: val = getattr(self, at) setattr(new_artifact, at, val) new_artifact.set_hashstring() self.add_input(key_with_ext, new_artifact) self.db.append_artifact(new_artifact) # append to db because not part of doc.artifacts return new_artifact def add_input(self, key, artifact): self._inputs[key] = artifact def inputs(self): return self._inputs def set_binary_from_ext(self): # TODO list more binary extensions or find better way to do this if self.ext in self.BINARY_EXTENSIONS: self.binary_output = True else: self.binary_output = False def set_data(self, data): self.data_dict['1'] = data def set_data_from_artifact(self): f = codecs.open(self.filepath(), "r", encoding="utf-8") self.data_dict['1'] = f.read() def is_loaded(self): return hasattr(self, 'data_dict') and len(self.data_dict) > 0 def compute_hash(self, text): if self.hashfunction == 'md5': unicode_text = None if type(text) == unicode: unicode_text = text else: unicode_text = unicode(text, encoding="utf-8") h = hashlib.md5(unicode_text.encode("utf-8")).hexdigest() elif self.hashfunction == 'crc32': h =str(zlib.crc32(str(text)) & 0xffffffff ) elif self.hashfunction == 'adler32': h =str(zlib.adler32(str(text)) & 0xffffffff ) else: raise Exception("unexpected hash function %s" % self.hashfunction) return h def input_hashes(self): """ Returns an OrderedDict of key, hashstring for each input artifact, sorted by key. """ return OrderedDict((k, str(self.inputs()[k].hashstring)) for k in sorted(self.inputs())) def hash_dict(self): """ Calculate and cache the elements used to compute the hashstring """ if not hasattr(self.__class__, 'SOURCE_CODE'): artifact_class_source = inspect.getsource(self.__class__) artifact_py_source = inspect.getsource(Artifact) self.__class__.SOURCE_CODE = self.compute_hash(artifact_class_source + artifact_py_source) self.artifact_class_source = self.__class__.SOURCE_CODE if self.dirty: self.dirty_string = time.gmtime() hash_dict = OrderedDict() hash_dict['inputs'] = self.input_hashes() for k in self.HASH_WHITELIST: if self.__dict__.has_key(k): v = self.__dict__[k] if hasattr(v, 'items'): hash_v = OrderedDict() for k1 in sorted(v.keys()): v1 = v[k1] try: if len(str(v1)) > 50: raise Exception() json.dumps(v1) hash_v[str(k1)] = v1 except Exception: # Use a hash if we will have problems saving to JSON # or if the data is large (don't want to clutter up the DB, # makes it harder to spot differences) hash_v[str(k1)] = self.compute_hash(v1) else: hash_v = str(v) hash_dict[str(k)] = hash_v return hash_dict def set_hashstring(self): if hasattr(self, 'hashstring'): raise Exception("setting hashstring twice") hash_data = str(self.hash_dict()) self.hashstring = self.compute_hash(hash_data) try: original_document_key = self.document_key if not self.is_loaded(): self.load() self.document_key = original_document_key except AttributeError as e: if not self.is_abstract(): raise e except IOError as e: self.save_meta() def convert_if_not_unicode(self, s): if type(s) == unicode: return s else: return unicode(s, encoding="utf-8") def input_text(self): return u"".join([self.convert_if_not_unicode(v) for k, v in self.input_data_dict.items()]) def output_text(self): return u"".join([self.convert_if_not_unicode(v) for k, v in self.data_dict.items()]) def relative_refs(self, relative_to_file): """How to refer to this artifact, relative to another.""" doc_dir = os.path.dirname(relative_to_file) return [ os.path.relpath(self.key, doc_dir), os.path.relpath(self.long_canonical_filename(), doc_dir), "/%s" % self.key, "/%s" % self.long_canonical_filename() ] def use_canonical_filename(self): """Returns the canonical filename after saving contents under this name in the artifacts directory.""" self.write_to_file(os.path.join(self.artifacts_dir, self.canonical_filename())) return self.canonical_filename() def write_to_file(self, filename): dirname = os.path.dirname(filename) if not os.path.exists(dirname) and not dirname == '': os.makedirs(dirname) shutil.copyfile(self.filepath(), filename) def work_filename(self): return "%s.work%s" % (self.hashstring, self.input_ext) def generate_workfile(self, work_filename = None): if not work_filename: work_filename = self.work_filename() work_path = os.path.join(self.artifacts_dir, work_filename) work_file = codecs.open(work_path, "w", encoding="utf-8") work_file.write(self.input_text()) work_file.close() def temp_filename(self, ext): return "%s.work%s" % (self.hashstring, ext) def open_tempfile(self, ext): tempfile_path = os.path.join(self.artifacts_dir, self.temp_filename(ext)) codecs.open(tempfile_path, "w", encoding="utf-8") def temp_dir(self): return os.path.join(self.artifacts_dir, self.hashstring) def create_temp_dir(self, populate=False): tempdir = self.temp_dir() shutil.rmtree(tempdir, ignore_errors=True) os.mkdir(tempdir) if populate: # write all inputs to this directory, under their canonical names for input_artifact in self._inputs.values(): filename = os.path.join(tempdir, input_artifact.canonical_filename()) if os.path.exists(input_artifact.filepath()): input_artifact.write_to_file(filename) self.log.debug("Populating temp dir for %s with %s" % (self.key, filename)) else: self.log.warn("Skipping file %s for temp dir for %s, file does not exist (yet)" % (filename, self.key)) # write the workfile to this directory under its canonical name previous = self.previous_artifact_filepath workfile = os.path.join(tempdir, self.previous_canonical_filename) if not os.path.exists(os.path.dirname(workfile)): os.makedirs(os.path.dirname(workfile)) shutil.copyfile(previous, workfile) def canonical_dir(self, ignore_args = False): return os.path.dirname(self.name) def canonical_basename(self, ignore_args = False): return os.path.basename(self.canonical_filename(ignore_args)) def canonical_filename(self, ignore_args = False): fn = os.path.splitext(self.key.split("|")[0])[0] if self.args.has_key('canonical-name') and not ignore_args: parent_dir = os.path.dirname(fn) return os.path.join(parent_dir, self.args['canonical-name']) elif self.args.has_key('postfix') and not ignore_args: return "%s%s%s" % (fn, self.ext, self.args['postfix']) else: return "%s%s" % (fn, self.ext) def long_canonical_filename(self): return "%s%s" % (self.key.replace("|", "-"), self.ext) def filename(self): """ The filename where artifact content is stored, based on the hashstring. """ if not hasattr(self, 'ext'): raise Exception("artifact %s has no ext" % self.key) return "%s%s" % (self.hashstring, self.ext) def filepath(self): """ Full path (including artifacts dir location) to location where artifact content is stored. """ return os.path.join(self.artifacts_dir, self.filename()) def abs_filepath(self): return os.path.abspath(self.filepath()) def breadcrumbs(self): """A list of parent dirs, plus the filename if it's not 'index.html'.""" parent_dirs = os.path.dirname(self.canonical_filename()).split("/") if self.canonical_basename() == "index.html": result = parent_dirs else: result = parent_dirs.append(self.canonical_basename()) if not result: result = [] return result def titleized_name(self): if self.canonical_basename() == "index.html": return self.breadcrumbs()[-1].replace("-"," ").title() else: return os.path.splitext(self.canonical_basename())[0].replace("-"," ").title() def unique_key(self): return "%s:%s:%s" % (self.batch_id, self.document_key, self.key) def web_safe_document_key(self): # TODO this might not be unique return self.document_key.replace("/","-").replace("|", "-") def url(self): # TODO test for final return "/%s" % self.canonical_filename() def hyperlink(self, link_text = None): # TODO test for final if not link_text: link_text = self.canonical_basename() return """<a href="%s">%s</a>""" % (self.url(), link_text) def iframe(self, link_text = None, width = "600px", height = "300px"): # TODO test for final args = { 'url' : self.url(), 'hyperlink' : self.hyperlink(link_text), 'width' : width, 'height' : height } return """ <iframe src="%(url)s" width="%(width)s" height="%(height)s" style="border: thin solid gray;"> %(hyperlink)s </iframe> """ % args def img(self): # TODO test for final return """<img src="/%s" alt="Image generated by dexy %s" />""" % (self.canonical_filename(), self.key) def has_sections(self): return (self.data_dict.keys() != ['1']) def relative_path_to_input(self, input_artifact): my_dir = os.path.dirname(self.name) input_dir = os.path.dirname(input_artifact.name) if my_dir == input_dir: relpath = "" else: relpath = os.path.relpath(input_dir, my_dir) return relpath def relative_key_for_input(self, input_artifact): relpath = self.relative_path_to_input(input_artifact) return os.path.join(relpath, os.path.basename(input_artifact.key))
def keys(self): return list(OrderedDict.keys(self))
class InMemoryDataStore(Delegate): def __init__(self): super(InMemoryDataStore,self).__init__() self.tables = OrderedDict() self.transactions = [] self.batch_count = 0 self.in_batch = False def create(self): # Since data store is in memory, nothing needs to be done pass def drop(self): self.tables = OrderedDict() def truncate(self): self.drop() def get_count(self, type, row, columns=None, column_start=None, super_column=None, column_finish=None): return self.get_cf(type).get_count(row, columns=columns, column_start=column_start, column_finish=column_finish, super_column=super_column) def get_cf(self, cf_name): if not cf_name in self.tables: self.tables[cf_name] = self.create_cf(cf_name) return self.tables[cf_name] def create_cf(self, type, column_type=ASCII, super=False, index_columns=list()): self.tables[type] = ColumnFamily(type, column_type) return self.tables[type] def create_secondary_index(self, type, column, column_type=None): # DO NOTHING, for now we just do complete scans since memory is "fast enough" pass def cf_exists(self, type): return type in self.tables.keys() def insert(self, cf, row, columns): def execute(): cf.insert(row, columns) if self.in_batch: self.transactions.append(execute) else: execute() def remove(self, cf, row, columns=None, super_column=None): def execute(): cf.remove(row, columns=columns, super_column=super_column) if self.in_batch: self.transactions.append(execute) else: execute() def start_batch(self, queue_size = 0): self.in_batch = True self.batch_count += 1 def commit_batch(self): self.batch_count -= 1 if not self.batch_count: for item in self.transactions: item() self.transactions = [] self.in_batch = False
class InMemoryDataStore(Delegate): def __init__(self): super(InMemoryDataStore, self).__init__() self.tables = OrderedDict() self.transactions = [] self.batch_count = 0 self.in_batch = False def create(self): # Since data store is in memory, nothing needs to be done pass def drop(self): self.tables = OrderedDict() def truncate(self): self.drop() def get_count(self, type, row, columns=None, column_start=None, super_column=None, column_finish=None): return self.get_cf(type).get_count(row, columns=columns, column_start=column_start, column_finish=column_finish, super_column=super_column) def get_cf(self, cf_name): if not cf_name in self.tables: self.tables[cf_name] = self.create_cf(cf_name) return self.tables[cf_name] def create_cf(self, type, column_type=ASCII, super=False, index_columns=list()): self.tables[type] = ColumnFamily(type, column_type) return self.tables[type] def create_secondary_index(self, type, column, column_type=None): # DO NOTHING, for now we just do complete scans since memory is "fast enough" pass def cf_exists(self, type): return type in self.tables.keys() def insert(self, cf, row, columns): def execute(): cf.insert(row, columns) if self.in_batch: self.transactions.append(execute) else: execute() def remove(self, cf, row, columns=None, super_column=None): def execute(): cf.remove(row, columns=columns, super_column=super_column) if self.in_batch: self.transactions.append(execute) else: execute() def start_batch(self, queue_size=0): self.in_batch = True self.batch_count += 1 def commit_batch(self): self.batch_count -= 1 if not self.batch_count: for item in self.transactions: item() self.transactions = [] self.in_batch = False