def handle(self, *args, **options): """ Main command handler. """ if options['job_list']: # print out the id's instead of processing anything columns = ['ID', 'Table', 'Created', 'Touched', 'Sts', 'Refs', 'Progress', 'Data file'] data = [] for j in Job.objects.all(): datafile = j.datafile() if not os.path.exists(datafile): datafile += " (missing)" data.append([j.id, j.table.name, j.created, j.touched, j.status, j.refcount, j.progress, datafile]) Formatter.print_table(data, columns) elif options['job_data']: job = Job.objects.get(id=options['job_data']) columns = [c.name for c in job.table.get_columns()] if job.status == job.COMPLETE: Formatter.print_table(job.values(), columns) elif options['job_age']: Job.age_jobs(force=True) elif options['job_flush']: Job.objects.all().delete()
def post(self, request, namespace, report_slug, widget_id, format=None): logger.debug("Received POST for report %s, widget %s: %s" % (report_slug, widget_id, request.POST)) try: report = Report.objects.get(namespace=namespace, slug=report_slug) widget = Widget.objects.get(id=widget_id) except: raise Http404 req_json = json.loads(request.POST['criteria']) fields = widget.collect_fields() form = TableFieldForm(fields, use_widgets=False, hidden_fields=report.hidden_fields, include_hidden=True, data=req_json, files=request.FILES) if not form.is_valid(): raise ValueError("Widget internal criteria form is invalid:\n%s" % (form.errors.as_text())) if form.is_valid(): logger.debug('Form passed validation: %s' % form) formdata = form.cleaned_data logger.debug('Form cleaned data: %s' % formdata) # parse time and localize to user profile timezone profile = request.user.userprofile timezone = pytz.timezone(profile.timezone) form.apply_timezone(timezone) form_criteria = form.criteria() logger.debug('Form_criteria: %s' % form_criteria) try: job = Job.create(table=widget.table(), criteria=form_criteria) job.start() wjob = WidgetJob(widget=widget, job=job) wjob.save() logger.debug("Created WidgetJob %s for report %s (handle %s)" % (str(wjob), report_slug, job.handle)) return Response({"joburl": reverse('report-job-detail', args=[namespace, report_slug, widget_id, wjob.id])}) except Exception as e: logger.exception("Failed to start job, an exception occurred") return HttpResponse(str(e), status=400) else: logger.error("form is invalid, entering debugger") from IPython import embed; embed()
def post(self, request, namespace, report_slug, widget_id, format=None): logger.debug("Received POST for report %s, widget %s: %s" % (report_slug, widget_id, request.POST)) try: report = Report.objects.get(namespace=namespace, slug=report_slug) widget = Widget.objects.get(id=widget_id) except: raise Http404 req_json = json.loads(request.POST['criteria']) fields = widget.collect_fields() form = TableFieldForm(fields, use_widgets=False, hidden_fields=report.hidden_fields, include_hidden=True, data=req_json, files=request.FILES) if not form.is_valid(): raise ValueError("Widget internal criteria form is invalid:\n%s" % (form.errors.as_text())) if form.is_valid(): logger.debug('Form passed validation: %s' % form) formdata = form.cleaned_data logger.debug('Form cleaned data: %s' % formdata) # parse time and localize to user profile timezone profile = request.user.userprofile timezone = pytz.timezone(profile.timezone) form.apply_timezone(timezone) try: job = Job.create(table=widget.table(), criteria=form.criteria()) job.start() wjob = WidgetJob(widget=widget, job=job) wjob.save() logger.debug("Created WidgetJob %s for report %s (handle %s)" % (str(wjob), report_slug, job.handle)) return Response({"joburl": reverse('report-job-detail', args=[namespace, report_slug, widget_id, wjob.id])}) except Exception as e: logger.exception("Failed to start job, an exception occurred") return HttpResponse(str(e), status=400) else: logger.error("form is invalid, entering debugger") from IPython import embed; embed()
def handle(self, *args, **options): """ Main command handler. """ if options['job_list']: # print out the id's instead of processing anything columns = ['ID', 'PID', 'Table', 'Created', 'Touched', 'Sts', 'Refs', 'Progress', 'Data file'] data = [] for j in Job.objects.all().order_by('id'): datafile = j.datafile() if not os.path.exists(datafile): datafile += " (missing)" parent_id = j.parent.id if j.parent else '--' data.append([j.id, parent_id, j.table.name, j.created, j.touched, j.status, j.refcount, j.progress, datafile]) Formatter.print_table(data, columns) elif options['job_data']: job = Job.objects.get(id=options['job_data']) columns = [c.name for c in job.table.get_columns()] if job.status == job.COMPLETE: Formatter.print_table(job.values(), columns) elif options['job_age']: logger.debug('Aging all jobs.') Job.age_jobs(force=True) elif options['job_flush']: logger.debug('Flushing all jobs.') while Job.objects.count(): ids = Job.objects.values_list('pk', flat=True)[:100] Job.objects.filter(pk__in=ids).delete()
# MIT License set forth at: # https://github.com/riverbed/flyscript-portal/blob/master/LICENSE ("License"). # This software is distributed "AS IS" as set forth in the License. from django.conf.urls import patterns, include, url from django.http import HttpResponseRedirect # Uncomment the next two lines to enable the admin: from django.contrib import admin admin.autodiscover() from django.conf import settings from rvbd_portal.apps.datasource.models import Job Job.flush_incomplete() urlpatterns = patterns('', (r'^favicon\.ico$', lambda x: HttpResponseRedirect('/static/images/favicon.ico')), url(r'^$', lambda x: HttpResponseRedirect('/report')), url(r'^report/', include('rvbd_portal.apps.report.urls')), url(r'^devices/', include('rvbd_portal.apps.devices.urls')), url(r'^data/', include('rvbd_portal.apps.datasource.urls')), url(r'^geolocation/', include('rvbd_portal.apps.geolocation.urls')), url(r'^help/', include('rvbd_portal.apps.help.urls')), url(r'^console/', include('rvbd_portal.apps.console.urls')), url(r'^preferences/', include('rvbd_portal.apps.preferences.urls')), url(r'^plugins/', include('rvbd_portal.apps.plugins.urls')), # third party packages url(r'^announcements/', include('announcements.urls')),
def run(self): # Collect all dependent tables options = self.table.options # Create dataframes for all tables dfs = {} deptables = options.tables if deptables and (len(deptables) > 0): logger.debug("%s: dependent tables: %s" % (self, deptables)) depjobids = {} batch = BatchJobRunner(self.job, max_progress=70) for (name, id) in deptables.items(): id = int(id) deptable = Table.objects.get(id=id) job = Job.create( table=deptable, criteria=self.job.criteria.build_for_table(deptable) ) batch.add_job(job) logger.debug("%s: starting dependent job %s" % (self, job)) depjobids[name] = job.id batch.run() logger.debug("%s: All dependent jobs complete, collecting data" % str(self)) failed = False for (name, id) in depjobids.items(): job = Job.objects.get(id=id) if job.status == job.ERROR: self.job.mark_error("Dependent Job failed: %s" % job.message) failed = True break f = job.data() dfs[name] = f logger.debug("%s: Table[%s] - %d rows" % (self, name, len(f) if f is not None else 0)) if failed: return False logger.debug("%s: Calling analysis function %s" % (self, str(options.func))) try: df = options.func(self, dfs, self.job.criteria, params=options.params) except AnalysisException as e: self.job.mark_error("Analysis function %s failed: %s" % (options.func, e.message)) logger.exception("%s raised an exception" % self) return False except Exception as e: self.job.mark_error("Analysis function %s failed: %s" % (options.func, str(e))) logger.exception("%s: Analysis function %s raised an exception" % (self, options.func)) return False # Sort according to the defined sort columns if df is not None: if self.table.sortcol: n = self.table.sortcol.name sorted = df.sort(n, ascending=False) # Move NaN rows to the end df = (sorted[sorted[n].notnull()] .append(sorted[sorted[n].isnull()])) if self.table.rows > 0: self.data = df[:self.table.rows] else: self.data = df else: self.data = None logger.debug("%s: completed successfully" % (self)) return True
def handle(self, *args, **options): """ Main command handler """ self.options = options if options['table_list']: # print out the id's instead of processing anything tables = Table.objects.all() for t in tables: self.console('%5d - %s' % (t.id, t)) elif options['table_list_by_report']: # or print them out organized by report/widget/table output = [] reports = Report.objects.all() for report in reports: for section in report.section_set.all(): for widget in section.widget_set.all(): for table in widget.tables.all(): line = [ table.id, report.title, widget.title, table ] output.append(line) Formatter.print_table(output, ['ID', 'Report', 'Widget', 'Table']) elif options['criteria_list']: if 'table_id' in options and options['table_id'] is not None: table = Table.objects.get(id=options['table_id']) elif 'table_name' in options: table = Table.objects.get(name=options['table_name']) else: raise ValueError("Must specify either --table-id or " "--table-name to run a table") form = self.get_form(table) output = [[c.keyword, c.label] for c in form._tablefields.values()] Formatter.print_table(output, ['Keyword', 'Label']) else: if 'table_id' in options and options['table_id'] is not None: table = Table.objects.get(id=options['table_id']) elif 'table_name' in options: table = Table.objects.get(name=options['table_name']) else: raise ValueError("Must specify either --table-id or " "--table-name to run a table") # Django gives us a nice error if we can't find the table self.console('Table %s found.' % table) # Parse criteria options criteria_options = {} if 'criteria' in options and options['criteria'] is not None: for s in options['criteria']: (k, v) = s.split(':', 1) criteria_options[k] = v form = self.get_form(table, data=criteria_options) if not form.is_valid(check_unknown=True): self.console('Invalid criteria:') logger.error('Invalid criteria: %s' % ','.join('%s:%s' % (k, v) for k, v in form.errors.iteritems())) for k, v in form.errors.iteritems(): self.console(' %s: %s' % (k, ','.join(v))) sys.exit(1) criteria = form.criteria() columns = [c.name for c in table.get_columns()] if options['only_columns']: print columns return job = Job.create(table=table, criteria=criteria) job.save() self.console('Job created: %s' % job) self.console('Criteria: %s' % criteria.print_details()) start_time = datetime.datetime.now() job.start() self.console('Job running . . ', ending='') # wait for results while not job.done(): #self.console('. ', ending='') #self.stdout.flush() time.sleep(1) end_time = datetime.datetime.now() delta = end_time - start_time seconds = float(delta.microseconds + (delta.seconds + delta.days * 24 * 3600) * 10**6) / 10**6 self.console('Done!! (elapsed time: %.2f seconds)' % seconds) self.console('') # Need to refresh the column list in case the job changed them # (ephemeral cols) columns = [c.name for c in table.get_columns()] if job.status == job.COMPLETE: if options['as_csv']: if options['output_file']: with open(options['output_file'], 'w') as f: for line in Formatter.get_csv( job.values(), columns): f.write(line) f.write('\n') else: Formatter.print_csv(job.values(), columns) else: Formatter.print_table(job.values(), columns) else: self.console("Job completed with an error:") self.console(job.message) sys.exit(1)
def run(self): # Collect all dependent tables options = self.table.options logger.debug("%s: dependent tables: %s" % (self, options.tables)) deptables = options.tables depjobids = {} batch = BatchJobRunner(self.job, max_progress=70) for name, id in deptables.items(): id = int(id) deptable = Table.objects.get(id=id) job = Job.create( table=deptable, criteria=self.job.criteria.build_for_table(deptable)) batch.add_job(job) logger.debug("%s: starting dependent job %s" % (self, job)) depjobids[name] = job.id batch.run() logger.debug("%s: All dependent jobs complete, collecting data" % str(self)) # Create dataframes for all tables dfs = {} failed = False for name, id in depjobids.items(): job = Job.objects.get(id=id) if job.status == job.ERROR: self.job.mark_error("Dependent Job failed: %s" % job.message) failed = True break f = job.data() dfs[name] = f logger.debug("%s: Table[%s] - %d rows" % (self, name, len(f) if f is not None else 0)) if failed: return False logger.debug("%s: Calling analysis function %s" % (self, str(options.func))) try: df = options.func(self, dfs, self.job.criteria, params=options.params) except AnalysisException as e: self.job.mark_error("Analysis function %s failed: %s" % (options.func, e.message)) logger.exception("%s raised an exception" % self) return False except Exception as e: self.job.mark_error("Analysis function %s failed: %s" % (options.func, str(e))) logger.exception("%s: Analysis function %s raised an exception" % (self, options.func)) return False # Sort according to the defined sort columns if df is not None: if self.table.sortcol: n = self.table.sortcol.name sorted = df.sort(n, ascending=False) # Move NaN rows to the end df = (sorted[sorted[n].notnull()].append( sorted[sorted[n].isnull()])) if self.table.rows > 0: self.data = df[:self.table.rows] else: self.data = df else: self.data = None logger.debug("%s: completed successfully" % (self)) return True
def report_business_hours(query, tables, criteria, params): times = tables['times'] if times is None or len(times) == 0: return None deptable = Table.objects.get(id=params['table']) # Create all the jobs batch = BatchJobRunner(query) for i, row in times.iterrows(): t0 = row['starttime']/1000 t1 = row['endtime']/1000 sub_criteria = copy.copy(criteria) sub_criteria.starttime = datetime.datetime.utcfromtimestamp(t0).replace(tzinfo=pytz.utc) sub_criteria.endtime = datetime.datetime.utcfromtimestamp(t1).replace(tzinfo=pytz.utc) job = Job.create(table=deptable, criteria=sub_criteria) logger.debug("Created %s: %s - %s" % (job, t0, t1)) batch.add_job(job) if len(batch.jobs) == 0: return None # Run all the Jobs batch.run() # Now collect the data total_secs = 0 df = None idx = 0 for job in batch.jobs: if job.status == Job.ERROR: raise AnalysisException("%s for %s-%s failed: %s" % (job, job.criteria.starttime, job.criteria.endtime, job.message)) subdf = job.data() logger.debug("%s: returned %d rows" % (job, len(subdf) if subdf is not None else 0)) if subdf is None: continue logger.debug("%s: actual_criteria %s" % (job, job.actual_criteria)) t0 = job.actual_criteria.starttime t1 = job.actual_criteria.endtime subdf['__secs__'] = timedelta_total_seconds(t1 - t0) total_secs += timedelta_total_seconds(t1 - t0) idx += 1 if df is None: df = subdf else: df = df.append(subdf) if df is None: return None keynames = [key.name for key in deptable.get_columns(iskey=True)] if 'aggregate' in params: ops = params['aggregate'] for col in deptable.get_columns(iskey=False): if col.name not in ops: ops[col.name] = 'sum' else: ops = 'sum' df = avg_groupby_aggregate(df, keynames, ops, '__secs__', total_secs) return df
def report_business_hours(query, tables, criteria, params): times = tables['times'] if times is None or len(times) == 0: return None deptable = Table.objects.get(id=params['table']) # Create all the jobs batch = BatchJobRunner(query) for i, row in times.iterrows(): t0 = row['starttime'] / 1000 t1 = row['endtime'] / 1000 sub_criteria = copy.copy(criteria) sub_criteria.starttime = datetime.datetime.utcfromtimestamp( t0).replace(tzinfo=pytz.utc) sub_criteria.endtime = datetime.datetime.utcfromtimestamp(t1).replace( tzinfo=pytz.utc) job = Job.create(table=deptable, criteria=sub_criteria) logger.debug("Created %s: %s - %s" % (job, t0, t1)) batch.add_job(job) if len(batch.jobs) == 0: return None # Run all the Jobs batch.run() # Now collect the data total_secs = 0 df = None idx = 0 for job in batch.jobs: if job.status == Job.ERROR: raise AnalysisException("%s for %s-%s failed: %s" % (job, job.criteria.starttime, job.criteria.endtime, job.message)) subdf = job.data() logger.debug("%s: returned %d rows" % (job, len(subdf) if subdf is not None else 0)) if subdf is None: continue logger.debug("%s: actual_criteria %s" % (job, job.actual_criteria)) t0 = job.actual_criteria.starttime t1 = job.actual_criteria.endtime subdf['__secs__'] = timedelta_total_seconds(t1 - t0) total_secs += timedelta_total_seconds(t1 - t0) idx += 1 if df is None: df = subdf else: df = df.append(subdf) if df is None: return None keynames = [key.name for key in deptable.get_columns(iskey=True)] if 'aggregate' in params: ops = params['aggregate'] for col in deptable.get_columns(iskey=False): if col.name not in ops: ops[col.name] = 'sum' else: ops = 'sum' df = avg_groupby_aggregate(df, keynames, ops, '__secs__', total_secs) return df
# This software is licensed under the terms and conditions of the # MIT License set forth at: # https://github.com/riverbed/flyscript-portal/blob/master/LICENSE ("License"). # This software is distributed "AS IS" as set forth in the License. from django.conf.urls import patterns, include, url from django.http import HttpResponseRedirect # Uncomment the next two lines to enable the admin: from django.contrib import admin admin.autodiscover() from django.conf import settings from rvbd_portal.apps.datasource.models import Job Job.flush_incomplete() urlpatterns = patterns( '', (r'^favicon\.ico$', lambda x: HttpResponseRedirect('/static/images/favicon.ico')), url(r'^$', lambda x: HttpResponseRedirect('/report')), url(r'^report/', include('rvbd_portal.apps.report.urls')), url(r'^devices/', include('rvbd_portal.apps.devices.urls')), url(r'^data/', include('rvbd_portal.apps.datasource.urls')), url(r'^geolocation/', include('rvbd_portal.apps.geolocation.urls')), url(r'^help/', include('rvbd_portal.apps.help.urls')), url(r'^console/', include('rvbd_portal.apps.console.urls')), url(r'^preferences/', include('rvbd_portal.apps.preferences.urls')), url(r'^plugins/', include('rvbd_portal.apps.plugins.urls')),
def handle(self, *args, **options): """ Main command handler """ self.options = options if options['table_list']: # print out the id's instead of processing anything tables = Table.objects.all() for t in tables: self.console('%5d - %s' % (t.id, t)) elif options['table_list_by_report']: # or print them out organized by report/widget/table output = [] reports = Report.objects.all() for report in reports: for section in report.section_set.all(): for widget in section.widget_set.all(): for table in widget.tables.all(): line = [table.id, report.title, widget.title, table] output.append(line) Formatter.print_table(output, ['ID', 'Report', 'Widget', 'Table']) elif options['criteria_list']: if 'table_id' in options and options['table_id'] is not None: table = Table.objects.get(id=options['table_id']) elif 'table_name' in options: table = Table.objects.get(name=options['table_name']) else: raise ValueError("Must specify either --table-id or " "--table-name to run a table") form = self.get_form(table) output = [[c.keyword, c.label] for c in form._tablefields.values()] Formatter.print_table(output, ['Keyword', 'Label']) else: if 'table_id' in options and options['table_id'] is not None: table = Table.objects.get(id=options['table_id']) elif 'table_name' in options: table = Table.objects.get(name=options['table_name']) else: raise ValueError("Must specify either --table-id or " "--table-name to run a table") # Django gives us a nice error if we can't find the table self.console('Table %s found.' % table) # Parse criteria options criteria_options = {} if 'criteria' in options and options['criteria'] is not None: for s in options['criteria']: (k, v) = s.split(':', 1) criteria_options[k] = v form = self.get_form(table, data=criteria_options) if not form.is_valid(check_unknown=True): self.console('Invalid criteria:') logger.error('Invalid criteria: %s' % ','.join('%s:%s' % (k, v) for k, v in form.errors.iteritems())) for k, v in form.errors.iteritems(): self.console(' %s: %s' % (k, ','.join(v))) sys.exit(1) criteria = form.criteria() columns = [c.name for c in table.get_columns()] if options['only_columns']: print columns return job = Job.create(table=table, criteria=criteria) job.save() self.console('Job created: %s' % job) self.console('Criteria: %s' % criteria.print_details()) start_time = datetime.datetime.now() job.start() self.console('Job running . . ', ending='') # wait for results while not job.done(): #self.console('. ', ending='') #self.stdout.flush() time.sleep(1) end_time = datetime.datetime.now() delta = end_time - start_time seconds = float(delta.microseconds + (delta.seconds + delta.days * 24 * 3600) * 10**6) / 10**6 self.console('Done!! (elapsed time: %.2f seconds)' % seconds) self.console('') # Need to refresh the column list in case the job changed them # (ephemeral cols) columns = [c.name for c in table.get_columns()] if job.status == job.COMPLETE: if options['as_csv']: if options['output_file']: with open(options['output_file'], 'w') as f: for line in Formatter.get_csv(job.values(), columns): f.write(line) f.write('\n') else: Formatter.print_csv(job.values(), columns) else: Formatter.print_table(job.values(), columns) else: self.console("Job completed with an error:") self.console(job.message) sys.exit(1)