Exemplo n.º 1
0
    def post(self, request):
        _post = request.POST
        my_sge_info = get_sge_info()
        my_sge_info.update()
        _salt_addons(request)
        rms_info = _fetch_rms_info(request)

        latest_possible_end_time = cluster_timezone.localize(
            datetime.datetime.fromtimestamp(int(_post["jobinfo_jobsfrom"])))
        done_jobs = rms_job_run.objects.all().filter(
            Q(end_time__gt=latest_possible_end_time)).select_related("rms_job")

        def xml_to_jobid(jobxml):
            return [
                int(jobxml.findall("job_id")[0].text),
                jobxml.findall("task_id")[0].text
            ]

        json_resp = {
            "jobs_running":
            sorted(map(xml_to_jobid, rms_info.run_job_list)),
            "jobs_waiting":
            sorted(map(xml_to_jobid, rms_info.wait_job_list)),
            "jobs_finished":
            sorted([
                job.rms_job.jobid,
                job.rms_job.taskid if job.rms_job.taskid else ""
            ] for job in done_jobs),
        }
        return HttpResponse(json.dumps(json_resp),
                            content_type="application/json")
Exemplo n.º 2
0
 def close_job_run(self):
     _latest_run = self.get_latest_job_run()
     if _latest_run:
         _latest_run.end_time_py = cluster_timezone.localize(
             datetime.datetime.now())
         _latest_run.save(update_fields=["end_time_py"])
     return _latest_run
Exemplo n.º 3
0
 def _get_missing_dict(self):
     # clean old jobs without a valid accounting log
     invalid_runs = rms_job_run.objects.filter(
         Q(qacct_called=False) &
         Q(end_time=None) &
         Q(start_time=None) &
         Q(start_time_py__lt=cluster_timezone.localize(datetime.datetime.now()) - datetime.timedelta(seconds=31 * 24 * 3600))
     )
     self.log("invalid runs found: {:d}".format(invalid_runs.count()))
     _missing_ids = rms_job_run.objects.filter(
         Q(qacct_called=False)
     ).values_list(
         "idx", "rms_job__jobid", "rms_job__taskid"
     )
     _mis_dict = {}
     for _entry in _missing_ids:
         if _entry[2]:
             _id = "{:d}.{:d}".format(
                 _entry[1],
                 _entry[2],
             )
         else:
             _id = "{:d}".format(_entry[1])
         _mis_dict.setdefault(_id, []).append(_entry[0])
     return _mis_dict
Exemplo n.º 4
0
 def add_job_run(self, _dev_name, _dev):
     new_run = rms_job_run(
         rms_job=self,
         device=_dev,
         hostname=_dev_name,
         start_time_py=cluster_timezone.localize(datetime.datetime.now()),
     )
     return new_run
Exemplo n.º 5
0
    def retrieve(self, request, *args, **kwargs):
        timespans = _device_status_history_util.get_timespans_db_from_request(
            request)
        if len(timespans):
            data = {
                'status': 'found',
                'start': timespans[0].start_date,
                'end': timespans[0].end_date,
                "db_ids": [timespans[0].idx],
                # partial data found
                "partial": False,
            }
        else:
            data = {'status': 'not found'}
            start, end, duration_type = _device_status_history_util.get_timespan_tuple_from_request(
                request)
            # return most recent data type if this type is not yet finished
            try:
                latest_timespan_db = mon_icinga_log_aggregated_timespan.objects.filter(
                    duration_type=duration_type.ID).latest('start_date')
            except mon_icinga_log_aggregated_timespan.DoesNotExist:
                pass  # no data at all, can't do anything useful
            else:
                date = duration_utils.parse_date(request.GET["date"])
                # check for current datetime in the requested timespan
                _now = cluster_timezone.localize(datetime.datetime.now())
                _now_covered = start < _now < end
                if _now_covered:
                    # print "Now covered"
                    shorter_duration = duration_type.get_shorter_duration()
                    _shorter = list(
                        mon_icinga_log_aggregated_timespan.objects.filter(
                            duration_type=shorter_duration.ID,
                            start_date__range=(start, end - datetime.timedelta(
                                seconds=1))).order_by("start_date"))
                    if len(_shorter):
                        data = {
                            "start": _shorter[0].start_date,
                            "end": _shorter[-1].end_date,
                            "status": "found",
                            "db_ids": [_db.idx for _db in _shorter],
                            "partial": True,
                        }
                else:
                    # check for earlier data
                    # print latest_timespan_db.end_date, date, latest_timespan_db.end_date < date
                    if latest_timespan_db.end_date < date:
                        data = {
                            'status': 'found earlier',
                            'start': latest_timespan_db.start_date,
                            "db_ids": [latest_timespan_db.idx],
                            'end': latest_timespan_db.end_date,
                            "partial": False,
                        }
                # print data

        return Response(data)
Exemplo n.º 6
0
 def close(self):
     _run_time = cluster_timezone.localize(
         datetime.datetime.now()) - cluster_timezone.normalize(self.date)
     _run_time = _run_time.microseconds / 1000 + 1000 * _run_time.seconds
     self.active = False
     self.run_time = _run_time
     self.save()
     # close current lock and return a list of (what, level) lines
     return [("closed {}".format(unicode(self)), logging_tools.LOG_LEVEL_OK)
             ]
Exemplo n.º 7
0
 def set_value(self, value):
     if type(value) == datetime.datetime:
         self.var_type = "d"
         self.val_date = cluster_timezone.localize(value)
     elif type(value) in [int, long] or (isinstance(value, basestring)
                                         and value.isdigit()):
         self.var_type = "i"
         self.val_int = int(value)
     else:
         self.var_type = "s"
         self.val_str = value
     self._clear()
Exemplo n.º 8
0
 def _interpret_qacct(self, cur_out, needed):
     _found, _matched = (0, 0)
     _dict_list = []
     _dict = {}
     for _line in cur_out.split("\n"):
         if _line.startswith("==="):
             if "jobnumber" in _dict:
                 _found += 1
                 _matched += self._feed_qacct(_dict)
                 _dict_list.append(_dict)
             _dict = {}
         else:
             if _line.strip():
                 _parts = _line.strip().split(None, 1)
                 if len(_parts) > 1:
                     # simple cleanup
                     _key, _value = _parts
                     if _value.isdigit():
                         _value = int(_value)
                     elif _value in ["NONE", "undefined", "-/-"]:
                         _value = None
                     elif _key.endswith("time") and len(_value.split()) > 4:
                         _value = cluster_timezone.localize(datetime.datetime.strptime(_value, "%a %b %d %H:%M:%S %Y"))
                     _dict[_key] = _value
         if self["exit_requested"]:
             self.log("exiting accounting loop due to exit requst", logging_tools.LOG_LEVEL_WARN)
             break
     if "jobnumber" in _dict:
         _found += 1
         _matched += self._feed_qacct(_dict)
         _dict_list.append(_dict)
     if needed == _found and not _matched:
         # print _dict_list[0]
         _to_del = rms_job_run.objects.filter(
             Q(rms_job__jobid=_dict_list[0]["jobnumber"]) &
             Q(rms_job__taskid=_dict_list[0]["taskid"])
         )
         self.log(
             "    all matches found, removing old rms_job_run entries ({:d})".format(
                 _to_del.count()
             )
         )
         _to_del.delete()
         _matched = 0
         for _dict in _dict_list:
             _matched += self._feed_qacct(_dict, force=True)
     return _found, _matched
Exemplo n.º 9
0
def create_bg_job(server_pk, user_obj, cmd, cause, obj, **kwargs):
    # late import to break import loop
    from initat.cluster.backbone.models import background_job, device, BackgroundJobState
    srv_com = server_command.srv_command(command=cmd, )
    timeout = kwargs.get("timeout", 60 * 5)
    _bld = srv_com.builder()
    if obj is None:
        obj_list = None
    elif isinstance(obj, list):
        obj_list = obj
        cause = "{} of {}".format(
            cause, logging_tools.get_plural("object", len(obj_list)))
    else:
        obj_list = [obj]
        cause = "{} of {}".format(cause, str(obj))

    if obj_list is not None:
        srv_com[None] = _bld.objects(*[
            _bld.object(
                str(obj),
                model=obj._meta.model_name,
                app=obj._meta.app_label,
                pk="{:d}".format(obj.pk),
            ) for obj in obj_list
        ])
    # print "***", server_pk
    _new_job = background_job(
        command=cmd,
        cause=cause[:255],
        options=kwargs.get("options", ""),
        initiator=device.objects.get(Q(pk=server_pk)),
        user=user_obj,
        command_xml=str(srv_com),
        num_objects=len(obj_list) if obj_list else 0,
        # valid for 4 hours
        valid_until=cluster_timezone.localize(datetime.datetime.now() +
                                              datetime.timedelta(
                                                  seconds=timeout)),
    )
    _new_job.set_state(kwargs.get("state", BackgroundJobState.pre_init))
    # print srv_com.pretty_print()
    return _new_job
Exemplo n.º 10
0
 def consume(self, request):
     if request.is_lock:
         # is a lock, we dont consume anything
         request.weight = 0
         request.valid = False
     else:
         # consum from egg_consumer
         # if request.valid is False, try to consume it
         # if request.valid is True, check the target weight
         _target_weight = self.multiplier
         if not request.valid:
             _to_consume = _target_weight
         else:
             _to_consume = _target_weight - request.weight
         if _to_consume:
             # something to consume, resolve egg_cradle
             _avail = self.egg_cradle.available
         else:
             _avail = 1
         if _avail > _to_consume:
             if _to_consume:
                 # nothing to consume (request was already fullfilled)
                 self.egg_cradle.available -= _to_consume
                 self.consumed += _to_consume
                 self.save(update_fields=["consumed"])
                 self.egg_cradle.save(update_fields=["available"])
             if self.timeframe_secs:
                 request.valid_until = cluster_timezone.localize(
                     datetime.datetime.now() +
                     datetime.timedelta(seconds=self.timeframe_secs))
             else:
                 request.valid_until = None
             request.valid = True
         else:
             request.valid = False
         request.weight = _target_weight
     request.save(update_fields=["weight", "valid", "valid_until"])
     return request.valid
Exemplo n.º 11
0
def create_bg_job(server_pk, user_obj, cmd, cause, obj, **kwargs):
    # late import to break import loop
    from initat.cluster.backbone.models import background_job, device
    srv_com = server_command.srv_command(
        command=cmd,
    )
    _bld = srv_com.builder()
    if type(obj) == list:
        obj_list = obj
    else:
        obj_list = [obj]
    srv_com[None] = _bld.objects(
        *[
            _bld.object(
                unicode(obj),
                model=obj._meta.model_name,
                app=obj._meta.app_label,
                pk="{:d}".format(obj.pk),
            ) for obj in obj_list
        ]
    )
    # print "***", server_pk
    _new_job = background_job.objects.create(
        command=cmd,
        cause=u"{} of '{}'".format(cause, unicode(obj))[:255],
        state="pre-init",
        options=kwargs.get("options", ""),
        initiator=device.objects.get(Q(pk=server_pk)),
        user=user_obj,
        command_xml=unicode(srv_com),
        num_objects=len(obj_list),
        # valid for 4 hours
        valid_until=cluster_timezone.localize(datetime.datetime.now() + datetime.timedelta(seconds=60 * 5)),  # 3600 * 4)),
    )
    # print srv_com.pretty_print()
    return _new_job
Exemplo n.º 12
0
 def ok(self):
     self.end = cluster_timezone.localize(datetime.datetime.now())
     self.success = True
     self.save(update_fields=["end", "success"])