Exemplo n.º 1
0
def main():
    """watch a specific directory, logging changes and
    running python scripts when they are written to disk"""
    home_dir = Path(environ.get('HOME'))
    run_logfile = home_dir / 'pyrun.log'
    watchdog_logfile = home_dir / 'pydir.log'
    run_log = FileHandler(str(run_logfile), level='NOTICE', bubble=True, mode='w', delay=True)
    file_log = FileHandler(str(watchdog_logfile), level='INFO', bubble=True)
    with run_log.applicationbound():
        with file_log.applicationbound():
            watched_dir = home_dir / 'code' / 'pyrep' / 'coderunner' / 'snippets'
            handler = MyEventHandler(run_logfile, run_log)
            obs = InotifyObserver()
            obs.schedule(handler, str(watched_dir), False)
            obs.start()
            try:
                while True:
                    sleep(1)
            except: #  pylint: disable=bare-except
                obs.stop()
            obs.join()
def main():
    """
    The main routine which kicks everything off
    :return:
    """

    # Setup the command line arguments
    flags = argparse.ArgumentParser(description="Tool to validate and fix errors in CSV files for TADC imports")
    flags.add_argument('csv_file', type=str, help="Path to a CSV file to validate")
    flags.add_argument('header_rows', type=str, help="Number of header rows")
    flags.add_argument('--fix-missing', '-f', action='store_true', help="Fix missing fields by inserting the value 'unknown'")
    flags.add_argument('--output-dir', '-o', type=str, help='Where to put output files', default=os.getcwd())
    flags.add_argument('--log-dir', '-l', type=str, help='Where to put log files', default='/tmp')
    flags.add_argument('--log-level', type=str, help='Choose a log level', default='INFO')
    flags.add_argument('--old-date-format', type=str, help="the format of dates that will be fixed", default='%d/%m/%Y')
    args = flags.parse_args()

    log_filename = os.path.join(
            args.log_dir,
            'tadc_import_validator_{}.log'.format(os.path.basename(time.strftime('%Y%m%d-%H%M%S')))
        )

    # register some logging handlers
    log_handler = FileHandler(
        log_filename,
        mode='w',
        level=args.log_level,
        bubble=True
    )
    stdout_handler = StreamHandler(sys.stdout, level=args.log_level, bubble=True)

    with stdout_handler.applicationbound():
        with log_handler.applicationbound():
            log.info("Arguments: {}".format(args))
            start = time.time()
            log.info("starting at {}".format(time.strftime('%l:%M%p %Z on %b %d, %Y')))

            with CSVFileValidator(
                    csv_file=args.csv_file,
                    header_rows=args.header_rows,
                    output_dir=args.output_dir,
                    old_date_format=args.old_date_format,
                    fix_missing=args.fix_missing) as validator:
                validator.validate_file()
                log.info("Running time: {}".format(str(datetime.timedelta(seconds=(round(time.time() - start, 3))))))
                log.info("Log written to {}:".format(log_filename))
                log.info("Fixed data is in: {}".format(validator.get_fixed_filename()))
Exemplo n.º 3
0
class Fibratus():

    """Fibratus entrypoint.

    Setup the core components including the kernel
    event stream collector and the tracing controller.
    At this point the system handles are also being
    enumerated.

    """
    def __init__(self, filament):

        self.logger = Logger(Fibratus.__name__)
        self.file_handler = FileHandler(os.path.join(os.path.abspath(__file__), '..', '..', '..', 'fibratus.log'),
                                        mode='w+')
        self.kevt_streamc = KEventStreamCollector(etw.KERNEL_LOGGER_NAME.encode())
        self.kcontroller = KTraceController()
        self.ktrace_props = KTraceProps()
        self.ktrace_props.enable_kflags()
        self.ktrace_props.logger_name = etw.KERNEL_LOGGER_NAME

        self.handle_repository = HandleRepository()
        self._handles = []
        # query for handles on the
        # start of kernel trace
        with self.file_handler.applicationbound():
            self.logger.info('Starting fibratus...')
            self.logger.info('Enumerating system handles...')
            self._handles = self.handle_repository.query_handles()
            self.logger.info('%s handles found' % len(self._handles))
            self.handle_repository.free_buffers()
        self.thread_registry = ThreadRegistry(self.handle_repository, self._handles)

        self.kevent = KEvent(self.thread_registry)

        self._filament = filament

        self.fsio = FsIO(self.kevent, self._handles)
        self.hive_parser = HiveParser(self.kevent, self.thread_registry)
        self.tcpip_parser = TcpIpParser(self.kevent)
        self.dll_repository = DllRepository(self.kevent)

        self.requires_render = {}
        self.filters_count = 0

    def run(self):

        @atexit.register
        def _exit():
            self.stop_ktrace()

        self.kcontroller.start_ktrace(etw.KERNEL_LOGGER_NAME, self.ktrace_props)

        def on_kstream_open():
            if self._filament is None:
                IO.write_console('Done!                               ')
        self.kevt_streamc.set_kstream_open_callback(on_kstream_open)
        self._open_kstream()

    def _open_kstream(self):
        try:
            self.kevt_streamc.open_kstream(self._on_next_kevent)
        except Exception as e:
            with self.file_handler.applicationbound():
                self.logger.error(e)
        except KeyboardInterrupt:
            self.stop_ktrace()

    def stop_ktrace(self):
        IO.write_console('Stopping fibratus...')
        if self._filament:
            self._filament.close()
        self.kcontroller.stop_ktrace(self.ktrace_props)
        self.kevt_streamc.close_kstream()

    def add_filters(self, kevent_filters):
        if len(kevent_filters) > 0:
            self.filters_count = len(kevent_filters)
            # include the basic filters
            # that are essential to the
            # rest of kernel events
            self.kevt_streamc.add_kevent_filter(ENUM_PROCESS)
            self.kevt_streamc.add_kevent_filter(ENUM_THREAD)
            self.kevt_streamc.add_kevent_filter(ENUM_IMAGE)
            self.kevt_streamc.add_kevent_filter(REG_CREATE_KCB)
            self.kevt_streamc.add_kevent_filter(REG_DELETE_KCB)

            # these kevents are necessary for consistent state
            # of the trace. If the user doesn't include them
            # in a filter list, then we do the job but set the
            # kernel event type as not eligible for rendering
            if not KEvents.CREATE_PROCESS in kevent_filters:
                self.kevt_streamc.add_kevent_filter(CREATE_PROCESS)
                self.requires_render[CREATE_PROCESS] = False
            else:
                self.requires_render[CREATE_PROCESS] = True

            if not KEvents.CREATE_THREAD in kevent_filters:
                self.kevt_streamc.add_kevent_filter(CREATE_THREAD)
                self.requires_render[CREATE_THREAD] = False
            else:
                self.requires_render[CREATE_THREAD] = True

            if not KEvents.CREATE_FILE in kevent_filters:
                self.kevt_streamc.add_kevent_filter(CREATE_FILE)
                self.requires_render[CREATE_FILE] = False
            else:
                self.requires_render[CREATE_FILE] = True

            for kevent_filter in kevent_filters:
                ktuple = kname_to_tuple(kevent_filter)
                if isinstance(ktuple, list):
                    for kt in ktuple:
                        self.kevt_streamc.add_kevent_filter(kt)
                        if not kt in self.requires_render:
                            self.requires_render[kt] = True
                else:
                    self.kevt_streamc.add_kevent_filter(ktuple)
                    if not ktuple in self.requires_render:
                        self.requires_render[ktuple] = True

    def _on_next_kevent(self, ktype, cpuid, ts, kparams):
        """Callback which fires when new kernel event arrives.

        This callback is invoked for every new kernel event
        forwarded from the kernel stream collector.

        Parameters
        ----------

        ktype: tuple
            Kernel event type.
        cpuid: int
            Indentifies the CPU core where the event
            has been captured.
        ts: str
            Temporal reference of the kernel event.
        kparams: dict
            Kernel event's parameters.
        """

        # initialize kernel event properties
        self.kevent.ts = ts
        self.kevent.cpuid = cpuid
        self.kevent.name = ktuple_to_name(ktype)
        kparams = ddict(kparams)
        # thread / process kernel events
        if ktype in [CREATE_PROCESS,
                     CREATE_THREAD,
                     ENUM_PROCESS,
                     ENUM_THREAD]:
            self.thread_registry.add_thread(ktype, kparams)
            if ktype in [CREATE_PROCESS, CREATE_THREAD]:
                self.thread_registry.init_thread_kevent(self.kevent,
                                                        ktype,
                                                        kparams)
                self._render(ktype)
        elif ktype in [TERMINATE_PROCESS, TERMINATE_THREAD]:
            self.thread_registry.init_thread_kevent(self.kevent,
                                                    ktype,
                                                    kparams)
            self._render(ktype)
            self.thread_registry.remove_thread(ktype, kparams)

        # file system/disk kernel events
        elif ktype in [CREATE_FILE,
                       DELETE_FILE,
                       CLOSE_FILE,
                       READ_FILE,
                       WRITE_FILE]:
            self.fsio.parse_fsio(ktype, kparams)
            self._render(ktype)

        # dll kernel events
        elif ktype in [LOAD_IMAGE, ENUM_IMAGE]:
            self.dll_repository.register_dll(kparams)
            if ktype == LOAD_IMAGE:
                self._render(ktype)
        elif ktype == UNLOAD_IMAGE:
            self.dll_repository.unregister_dll(kparams)
            self._render(ktype)

        # registry kernel events
        elif ktype == REG_CREATE_KCB:
            self.hive_parser.add_kcb(kparams)
        elif ktype == REG_DELETE_KCB:
            self.hive_parser.remove_kcb(kparams.key_handle)

        elif ktype in [REG_CREATE_KEY,
                       REG_DELETE_KEY,
                       REG_OPEN_KEY,
                       REG_QUERY_KEY,
                       REG_SET_VALUE,
                       REG_DELETE_VALUE,
                       REG_QUERY_VALUE]:
            self.hive_parser.parse_hive(ktype, kparams)
            self._render(ktype)

        # network kernel events
        elif ktype in [SEND_SOCKET_TCPV4,
                       SEND_SOCKET_UDPV4,
                       RECV_SOCKET_TCPV4,
                       RECV_SOCKET_UDPV4,
                       ACCEPT_SOCKET_TCPV4,
                       CONNECT_SOCKET_TCPV4,
                       DISCONNECT_SOCKET_TCPV4,
                       RECONNECT_SOCKET_TCPV4]:
            self.tcpip_parser.parse_tcpip(ktype, kparams)
            self._render(ktype)

        if self._filament:
            # call filament method
            # to process the next
            # kernel event from the stream
            if ktype not in [ENUM_PROCESS,
                             ENUM_THREAD, ENUM_IMAGE]:
                if self.kevent.name:
                    self._filament.process(self.kevent)

    def _render(self, ktype):
        """Renders the kevent to the standard output stream.

        Parameters
        ----------

        ktype: tuple
            Identifier of the kernel event
        """
        if not self._filament:
            if ktype in self.requires_render:
                rr = self.requires_render[ktype]
                if rr:
                    self.kevent.render()
            elif self.filters_count == 0:
                self.kevent.render()
Exemplo n.º 4
0
    elif soort == 'event':
        p.events.append((dt.datetime.today().isoformat(' ')[:19], data))
    elif soort == 'statuscode':
        p.status = data
    elif soort == 'arch':
        p.set_arch(data)
    list(p)
    if update:
        p.write()
    return p


if __name__ == "__main__":
    fnm = "afrift"
    log_handler = FileHandler('get_acties_sql_1.log', mode='w')
    with log_handler.applicationbound():
        test_get_acties(fnm, {}, "")
        test_get_acties(fnm, {"idlt": "2010"}, "")
        test_get_acties(fnm, {
            "idlt": "2010",
            "id": "and",
            "idgt": "2007-0003"
        }, "")
        test_get_acties(fnm, {
            "idgt": "2010",
            "id": "or",
            "idlt": "2007-0003"
        }, "")
        test_get_acties(fnm, {"idgt": "2007-0003"}, "")
        test_get_acties(fnm, {"status": ["1"]}, "")
        test_get_acties(fnm, {"status": ["1", "2"]}, "")
Exemplo n.º 5
0
    from logbook import FileHandler
    from logbook import Logger
    from argparse import ArgumentParser
    import sys
    parser = ArgumentParser()
    logpath = './log/'
    parser.add_argument('--log', nargs=1, help='log path')
    parser.add_argument('--version', nargs=1, help='maintain version')
    args = parser.parse_args(sys.argv[1:])
    logfilepath = logpath + args.log[0]
    maintain_version = args.version[0]
    log_handler = FileHandler(logfilepath)
    logbk = Logger('Token Maintain')

    with log_handler.applicationbound():
        logbk.info('maintain prepare')

        at_least = AT_LEAST_TOKEN_COUNT
        max_tokens_redis_limit = MAX_TOKENS_IN_REDIS

        logbk.info('maintain begin')

        # 认证新用户,并将access_token加入mongodb,redis从mongodb导入新token,不重置已有token 的 req_count
        if maintain_version == 'addatoken':
            print 'generate new token, write to mongo, push to redis without reset request count'
            generate_api_access_token(logbk)
            add_without_reset_req_count(max_tokens_redis_limit, logbk)

        # 将mongodb中所有access_token加入redis,并重置已有token 的 req_count
        if maintain_version == 'addalltoken':
Exemplo n.º 6
0
"""

import os
import sys
from logbook import Processor, StreamHandler, DEBUG, Logger, FileHandler

my_handler = FileHandler("test.log", encoding="utf-8", level=DEBUG)
# my_handler = StreamHandler(sys.stdout, level=DEBUG)


def log_other_info(record):
    """
    a) 通过 with.processor可以让在其中的日志拥有共同的逻辑,相当于一个切面注入
    比如这里的例子是 在每条日志中记录一些额外的信息(额外的信息是通过在日志对象(logRecord)的extra(字典对象)属性中添加
    一些其他的信息),这样每条日志都会有这里添加的额外的信息。
    b) 有个疑问就是,这些额外的信息怎么运用呢,比如这些信息如何能和日志一块记录在文件中呢
    c) 关于日志的属性,见 logrecord.py
    """
    record.extra['myname'] = 'kute'
    record.extra['mycwd'] = os.getcwd()
    # update myname propertiy
    record.extra.update(myname="lisa")
    print(record.to_dict())


if __name__ == "__main__":
    with my_handler.applicationbound():
        with Processor(log_other_info).applicationbound():
            mylog = Logger("processor")
            mylog.notice("notice msg.")
def main():
    """Shows basic usage of the Google Drive API.

    Creates a Google Drive API service object and outputs the names and IDs
    for up to 10 files.
    """

    log_filename = os.path.join(
        args.log_dir,
        'google-drive-to-s3-{}.log'.format(os.path.basename(time.strftime('%Y%m%d-%H%M%S')))
    )

    # register some logging handlers
    log_handler = FileHandler(
        log_filename,
        mode='w',
        level=args.log_level,
        bubble=True
    )
    stdout_handler = StreamHandler(sys.stdout, level=args.log_level, bubble=True)

    with stdout_handler.applicationbound():
        with log_handler.applicationbound():
            log.info("Arguments: {}".format(args))
            start = time.time()
            log.info("starting at {}".format(time.strftime('%l:%M%p %Z on %b %d, %Y')))

            credentials = get_credentials()
            http = credentials.authorize(httplib2.Http())
            drive_service = discovery.build('drive', 'v3', http=http)

            s3 = boto3.resource('s3')

            # load up a match file if we have one.
            if args.match_file:
                with open(args.match_file, 'r') as f:
                    match_filenames = f.read().splitlines()
            else:
                match_filenames = None

            # get the files in the specified folder.
            files = drive_service.files()
            request = files.list(
                pageSize=args.page_size,
                q="'{}' in parents".format(args.folder_id),
                fields="nextPageToken, files(id, name)"
            )

            # make sure our S3 Key prefix has a trailing slash
            key_prefix = ensure_trailing_slash(args.key_prefix)

            page_counter = 0
            file_counter = 0
            while request is not None:
                file_page = request.execute(http=http)
                page_counter += 1
                page_file_counter = 0  # reset the paging file counter

                # determine the page at which to start processing.
                if page_counter >= args.start_page:
                    log.info(u"######## Page {} ########".format(page_counter))

                    for this_file in file_page['files']:
                        file_counter += 1
                        page_file_counter += 1
                        if we_should_process_this_file(this_file['name'], match_filenames):
                            log.info(u"#== Processing {} file number {} on page {}. {} files processed.".format(
                                this_file['name'],
                                page_file_counter,
                                page_counter,
                                file_counter
                            ))

                            # download the file
                            download_request = drive_service.files().get_media(fileId=this_file['id'])
                            fh = io.BytesIO()  # Using an in memory stream location
                            downloader = MediaIoBaseDownload(fh, download_request)
                            done = False
                            pbar = InitBar(this_file['name'])
                            while done is False:
                                status, done = downloader.next_chunk()
                                pbar(int(status.progress()*100))
                                # print("\rDownload {}%".format(int(status.progress() * 100)))
                            del pbar

                            # upload to bucket
                            log.info(u"Uploading to S3")
                            s3.Bucket(args.bucket).put_object(
                                Key="{}{}".format(key_prefix, this_file['name']),
                                Body=fh.getvalue(),
                                ACL='public-read'
                            )
                            log.info(u"Uploaded to S3")
                            fh.close()  # close the file handle to release memory
                        else:
                            log.info(u"Do not need to process {}".format(this_file['name']))

                # stop if we have come to the last user specified page
                if args.end_page and page_counter == args.end_page:
                    log.info(u"Finished paging at page {}".format(page_counter))
                    break
                # request the next page of files
                request = files.list_next(request, file_page)

            log.info("Running time: {}".format(str(datetime.timedelta(seconds=(round(time.time() - start, 3))))))
            log.info("Log written to {}:".format(log_filename))
Exemplo n.º 8
0
class Fibratus():
    """Fibratus entrypoint.

    Setup the core components including the kernel
    event stream collector and the tracing controller.
    At this point the system handles are also being
    enumerated.

    """
    def __init__(self, filament):

        self.logger = Logger(Fibratus.__name__)
        self.file_handler = FileHandler(os.path.join(os.path.abspath(__file__),
                                                     '..', '..', '..',
                                                     'fibratus.log'),
                                        mode='w+')
        self.kevt_streamc = KEventStreamCollector(
            etw.KERNEL_LOGGER_NAME.encode())
        self.kcontroller = KTraceController()
        self.ktrace_props = KTraceProps()
        self.ktrace_props.enable_kflags()
        self.ktrace_props.logger_name = etw.KERNEL_LOGGER_NAME

        self.handle_repository = HandleRepository()
        self._handles = []
        # query for handles on the
        # start of kernel trace
        with self.file_handler.applicationbound():
            self.logger.info('Starting fibratus...')
            self.logger.info('Enumerating system handles...')
            self._handles = self.handle_repository.query_handles()
            self.logger.info('%s handles found' % len(self._handles))
            self.handle_repository.free_buffers()
        self.thread_registry = ThreadRegistry(self.handle_repository,
                                              self._handles)

        self.kevent = KEvent(self.thread_registry)

        self._filament = filament

        self.fsio = FsIO(self.kevent, self._handles)
        self.hive_parser = HiveParser(self.kevent, self.thread_registry)
        self.tcpip_parser = TcpIpParser(self.kevent)
        self.dll_repository = DllRepository(self.kevent)

        self.requires_render = {}
        self.filters_count = 0

    def run(self):
        @atexit.register
        def _exit():
            self.stop_ktrace()

        self.kcontroller.start_ktrace(etw.KERNEL_LOGGER_NAME,
                                      self.ktrace_props)

        def on_kstream_open():
            if self._filament is None:
                IO.write_console('Done!                               ')

        self.kevt_streamc.set_kstream_open_callback(on_kstream_open)
        self._open_kstream()

    def _open_kstream(self):
        try:
            self.kevt_streamc.open_kstream(self._on_next_kevent)
        except Exception as e:
            with self.file_handler.applicationbound():
                self.logger.error(e)
        except KeyboardInterrupt:
            self.stop_ktrace()

    def stop_ktrace(self):
        IO.write_console('Stopping fibratus...')
        if self._filament:
            self._filament.close()
        self.kcontroller.stop_ktrace(self.ktrace_props)
        self.kevt_streamc.close_kstream()

    def add_filters(self, kevent_filters):
        if len(kevent_filters) > 0:
            self.filters_count = len(kevent_filters)
            # include the basic filters
            # that are essential to the
            # rest of kernel events
            self.kevt_streamc.add_kevent_filter(ENUM_PROCESS)
            self.kevt_streamc.add_kevent_filter(ENUM_THREAD)
            self.kevt_streamc.add_kevent_filter(ENUM_IMAGE)
            self.kevt_streamc.add_kevent_filter(REG_CREATE_KCB)
            self.kevt_streamc.add_kevent_filter(REG_DELETE_KCB)

            # these kevents are necessary for consistent state
            # of the trace. If the user doesn't include them
            # in a filter list, then we do the job but set the
            # kernel event type as not eligible for rendering
            if not KEvents.CREATE_PROCESS in kevent_filters:
                self.kevt_streamc.add_kevent_filter(CREATE_PROCESS)
                self.requires_render[CREATE_PROCESS] = False
            else:
                self.requires_render[CREATE_PROCESS] = True

            if not KEvents.CREATE_THREAD in kevent_filters:
                self.kevt_streamc.add_kevent_filter(CREATE_THREAD)
                self.requires_render[CREATE_THREAD] = False
            else:
                self.requires_render[CREATE_THREAD] = True

            if not KEvents.CREATE_FILE in kevent_filters:
                self.kevt_streamc.add_kevent_filter(CREATE_FILE)
                self.requires_render[CREATE_FILE] = False
            else:
                self.requires_render[CREATE_FILE] = True

            for kevent_filter in kevent_filters:
                ktuple = kname_to_tuple(kevent_filter)
                if isinstance(ktuple, list):
                    for kt in ktuple:
                        self.kevt_streamc.add_kevent_filter(kt)
                        if not kt in self.requires_render:
                            self.requires_render[kt] = True
                else:
                    self.kevt_streamc.add_kevent_filter(ktuple)
                    if not ktuple in self.requires_render:
                        self.requires_render[ktuple] = True

    def _on_next_kevent(self, ktype, cpuid, ts, kparams):
        """Callback which fires when new kernel event arrives.

        This callback is invoked for every new kernel event
        forwarded from the kernel stream collector.

        Parameters
        ----------

        ktype: tuple
            Kernel event type.
        cpuid: int
            Indentifies the CPU core where the event
            has been captured.
        ts: str
            Temporal reference of the kernel event.
        kparams: dict
            Kernel event's parameters.
        """

        # initialize kernel event properties
        self.kevent.ts = ts
        self.kevent.cpuid = cpuid
        self.kevent.name = ktuple_to_name(ktype)
        kparams = ddict(kparams)
        # thread / process kernel events
        if ktype in [CREATE_PROCESS, CREATE_THREAD, ENUM_PROCESS, ENUM_THREAD]:
            self.thread_registry.add_thread(ktype, kparams)
            if ktype in [CREATE_PROCESS, CREATE_THREAD]:
                self.thread_registry.init_thread_kevent(
                    self.kevent, ktype, kparams)
                self._render(ktype)
        elif ktype in [TERMINATE_PROCESS, TERMINATE_THREAD]:
            self.thread_registry.init_thread_kevent(self.kevent, ktype,
                                                    kparams)
            self._render(ktype)
            self.thread_registry.remove_thread(ktype, kparams)

        # file system/disk kernel events
        elif ktype in [
                CREATE_FILE, DELETE_FILE, CLOSE_FILE, READ_FILE, WRITE_FILE
        ]:
            self.fsio.parse_fsio(ktype, kparams)
            self._render(ktype)

        # dll kernel events
        elif ktype in [LOAD_IMAGE, ENUM_IMAGE]:
            self.dll_repository.register_dll(kparams)
            if ktype == LOAD_IMAGE:
                self._render(ktype)
        elif ktype == UNLOAD_IMAGE:
            self.dll_repository.unregister_dll(kparams)
            self._render(ktype)

        # registry kernel events
        elif ktype == REG_CREATE_KCB:
            self.hive_parser.add_kcb(kparams)
        elif ktype == REG_DELETE_KCB:
            self.hive_parser.remove_kcb(kparams.key_handle)

        elif ktype in [
                REG_CREATE_KEY, REG_DELETE_KEY, REG_OPEN_KEY, REG_QUERY_KEY,
                REG_SET_VALUE, REG_DELETE_VALUE, REG_QUERY_VALUE
        ]:
            self.hive_parser.parse_hive(ktype, kparams)
            self._render(ktype)

        # network kernel events
        elif ktype in [
                SEND_SOCKET_TCPV4, SEND_SOCKET_UDPV4, RECV_SOCKET_TCPV4,
                RECV_SOCKET_UDPV4, ACCEPT_SOCKET_TCPV4, CONNECT_SOCKET_TCPV4,
                DISCONNECT_SOCKET_TCPV4, RECONNECT_SOCKET_TCPV4
        ]:
            self.tcpip_parser.parse_tcpip(ktype, kparams)
            self._render(ktype)

        if self._filament:
            # call filament method
            # to process the next
            # kernel event from the stream
            if ktype not in [ENUM_PROCESS, ENUM_THREAD, ENUM_IMAGE]:
                if self.kevent.name:
                    self._filament.process(self.kevent)

    def _render(self, ktype):
        """Renders the kevent to the standard output stream.

        Parameters
        ----------

        ktype: tuple
            Identifier of the kernel event
        """
        if not self._filament:
            if ktype in self.requires_render:
                rr = self.requires_render[ktype]
                if rr:
                    self.kevent.render()
            elif self.filters_count == 0:
                self.kevent.render()
def main():
    """
    Copy a folder from Source to Target

    """

    log_filename = os.path.join(
        args.log_dir, 'copy-google-drive-folder-{}.log'.format(
            os.path.basename(time.strftime('%Y%m%d-%H%M%S'))))

    # register some logging handlers
    log_handler = FileHandler(log_filename,
                              mode='w',
                              level=args.log_level,
                              bubble=True)
    stdout_handler = StreamHandler(sys.stdout,
                                   level=args.log_level,
                                   bubble=True)

    with stdout_handler.applicationbound():
        with log_handler.applicationbound():
            log.info("Arguments: {}".format(args))
            start = time.time()
            log.info("starting at {}".format(
                time.strftime('%l:%M%p %Z on %b %d, %Y')))

            credentials = get_credentials()
            http = credentials.authorize(httplib2.Http())
            drive_service = discovery.build('drive', 'v3', http=http)

            # get the files in the specified folder.
            files = drive_service.files()
            request = files.list(
                pageSize=args.page_size,
                q="'{}' in parents".format(args.source_folder_id),
                fields="nextPageToken, files(id, name, mimeType)")

            page_counter = 0
            file_counter = 0
            while request is not None:
                file_page = request.execute(http=http)
                page_counter += 1
                page_file_counter = 0  # reset the paging file counter

                # determine the page at which to start processing.
                if page_counter >= args.start_page:
                    log.info(u"######## Page {} ########".format(page_counter))

                    for this_file in file_page['files']:
                        file_counter += 1
                        page_file_counter += 1
                        log.info(
                            u"#== Processing {} {} file number {} on page {}. {} files processed."
                            .format(this_file['mimeType'], this_file['name'],
                                    page_file_counter, page_counter,
                                    file_counter))

                        # if not a folder
                        if this_file[
                                'mimeType'] != 'application/vnd.google-apps.folder':
                            # Copy the file
                            new_file = {'title': this_file['name']}
                            copied_file = drive_service.files().copy(
                                fileId=this_file['id'],
                                body=new_file).execute()
                            # move it to it's new location
                            drive_service.files().update(
                                fileId=copied_file['id'],
                                addParents=args.target_folder_id,
                                removeParents=args.source_folder_id).execute()
                        else:
                            log.info(u"Skipped Folder")

                else:
                    log.info(u"Skipping Page {}".format(page_counter))

                # stop if we have come to the last user specified page
                if args.end_page and page_counter == args.end_page:
                    log.info(
                        u"Finished paging at page {}".format(page_counter))
                    break

                # request the next page of files
                request = files.list_next(request, file_page)

            log.info("Running time: {}".format(
                str(datetime.timedelta(seconds=(round(time.time() -
                                                      start, 3))))))
            log.info("Log written to {}:".format(log_filename))
def main():
    """
    Copy a folder from Source to Target

    """

    log_filename = os.path.join(
        args.log_dir,
        'copy-google-drive-folder-{}.log'.format(os.path.basename(time.strftime('%Y%m%d-%H%M%S')))
    )

    # register some logging handlers
    log_handler = FileHandler(
        log_filename,
        mode='w',
        level=args.log_level,
        bubble=True
    )
    stdout_handler = StreamHandler(sys.stdout, level=args.log_level, bubble=True)

    with stdout_handler.applicationbound():
        with log_handler.applicationbound():
            log.info("Arguments: {}".format(args))
            start = time.time()
            log.info("starting at {}".format(time.strftime('%l:%M%p %Z on %b %d, %Y')))

            credentials = get_credentials()
            http = credentials.authorize(httplib2.Http())
            drive_service = discovery.build('drive', 'v3', http=http)

            # get the files in the specified folder.
            files = drive_service.files()
            request = files.list(
                pageSize=args.page_size,
                q="'{}' in parents".format(args.source_folder_id),
                fields="nextPageToken, files(id, name, mimeType)"
            )

            page_counter = 0
            file_counter = 0
            while request is not None:
                file_page = request.execute(http=http)
                page_counter += 1
                page_file_counter = 0  # reset the paging file counter

                # determine the page at which to start processing.
                if page_counter >= args.start_page:
                    log.info(u"######## Page {} ########".format(page_counter))

                    for this_file in file_page['files']:
                        file_counter += 1
                        page_file_counter += 1
                        log.info(u"#== Processing {} {} file number {} on page {}. {} files processed.".format(
                            this_file['mimeType'],
                            this_file['name'],
                            page_file_counter,
                            page_counter,
                            file_counter
                        ))

                        # if not a folder
                        if this_file['mimeType'] != 'application/vnd.google-apps.folder':
                            # Copy the file
                            new_file = {'title': this_file['name']}
                            copied_file = drive_service.files().copy(fileId=this_file['id'], body=new_file).execute()
                            # move it to it's new location
                            drive_service.files().update(
                                fileId=copied_file['id'],
                                addParents=args.target_folder_id,
                                removeParents=args.source_folder_id
                            ).execute()
                        else:
                            log.info(u"Skipped Folder")

                else:
                    log.info(u"Skipping Page {}".format(page_counter))

                # stop if we have come to the last user specified page
                if args.end_page and page_counter == args.end_page:
                    log.info(u"Finished paging at page {}".format(page_counter))
                    break

                # request the next page of files
                request = files.list_next(request, file_page)

            log.info("Running time: {}".format(str(datetime.timedelta(seconds=(round(time.time() - start, 3))))))
            log.info("Log written to {}:".format(log_filename))
        for k,v in headers.iteritems():
            self.req.add_header(k,v)

    def set_req(self):
        self.req = urllib2.Request(self.url, urllib.urlencode(self.data))
        #self.req = urllib2.Request(self.url)

    def send(self):
        self.set_req()
        return urllib2.urlopen(self.req)



if __name__ == "__main__":

    logger = Logger("TicketchangeToInfluxdb")
    logfile = "ticketchangetoinfluxdb.log"
    fh = FileHandler(logfile,"a")
    fh.applicationbound()
    fh.push_application()

    client = Client()
    client.test()
    adapter = Adapter()
    client.set_adapter(adapter)
    a =client.get_adapter()
    a.test()

    print("This is just a test.")
    logger.info("Testing logging.")
Exemplo n.º 12
0
#!/bin/env python

import os, sys, urllib, urllib2, time
from logbook import Logger, FileHandler

user = "******"
token = "password"

message = bytes(user).encode('utf-8')
secret = bytes(token).encode('utf-8')
logger = Logger("Cache Purge")
logfile = "cache-purge.log"

fh = FileHandler(logfile, "a")
fh.applicationbound()
fh.push_application()

api_root = "https://api.ccu.akamai.com"
get_call = "/ccu/v2/queues/default"
#data = {}

try:
    req = None
    url = api_root + get_call
    mgr = urllib2.HTTPPasswordMgrWithDefaultRealm()
    mgr.add_password(None, api_root, user, token)
    handler = urllib2.HTTPBasicAuthHandler(mgr)
    opener = urllib2.build_opener(handler)
    urllib2.install_opener(opener)
    req = urllib2.Request(url)
    #req = urllib2.Request(api_root,urllib.urlencode(data))
Exemplo n.º 13
0
class Pipeline(object):
    """Represents the abstraction of a pipeline of jobs to be run
    distributed over machines
    """

    def __init__(self, workdir, jobs, total_cores, scheduler=None, queue=None,
                 local=False, retries=None):
        """Initialize a pipeline.

        :param workdir: Name of a directory to use for scratch space
        and results. This needs to be visible to all nodes over NFS or
        similar.

        :param jobs: A list of jobs, which are just dicts. The only
        required key for now is "description", which will be used for
        the directory that holds all this job's output.

        :param total_cores: The total number of cores you want to use
        for processing.

        :returns: A Pipeline object, which has methods that invoke
        various kinds of distributed work.

        """

        # validate things
        for job in jobs:
            if type(job) is not dict:
                raise ValueError("job is not a dict: {}".format(job))
            if not job.get("description"):
                raise ValueError("job {} has not description".format(job))
        workdir = os.path.abspath(os.path.expanduser(workdir))
        if not os.path.exists(workdir):
            raise ValueError(
                "workdir: {} appears not to exist".format(workdir))
        self.workdir = workdir
        self.jobs = jobs
        self.total_cores = total_cores
        self.scheduler = scheduler
        self.queue = queue
        self.local = local
        self.retries = retries
        # setup default cluster_view
        self._cluster_view = cluster_view

    def start(self):
        """Initialize workdir, logging, etc. in preparation for running jobs.
        """

        # make a working directory for each job
        for job in self.jobs:
            job["workdir"] = os.path.join(self.workdir, job["description"])
            fs.maybe_mkdir(job["workdir"])
        # temporary ipython profile directory
        self.ipythondir = os.path.join(self.workdir, ".ipython")
        fs.maybe_mkdir(self.ipythondir)
        # log dir
        self.logdir = os.path.join(self.workdir, "log")
        fs.maybe_mkdir(self.logdir)

        # determine which IP we are going to listen on for logging
        try:
            self.listen_ip = localinterfaces.public_ips()[0]
        except:
            raise ValueError("This machine appears not to have"
                             " any publicly visible IP addresses")

        # setup ZMQ logging
        self.handler = FileHandler(os.path.join(self.logdir, "dish.log"))
        self.listen_port = str(randint(5000, 10000))
        self.subscriber = ZeroMQPullSubscriber("tcp://" + self.listen_ip +
                                               ":" + self.listen_port)
        self.controller = self.subscriber.dispatch_in_background(self.handler)
        self.logger = Logger("dish_master")

    def stop(self):
        """Gracefully shutdown the Pipeline, cleaning up threads, sockets,
        etc.  Leaves working directory intact so everything can in
        principle be picked up again where we left off.

        """
        self.controller.stop()
        self.subscriber.close()

    def _compute_resources(self, cores_per_engine, mem_per_engine,
                           max_engines):
        if cores_per_engine > self.total_cores:
            raise ValueError("A job requested {0} but only {1}"
                             " are available.".format(cores_per_engine,
                                                      self.total_cores))
        num_engines = self.total_cores // cores_per_engine
        if len(self.jobs) < num_engines:
            # we don't even need this many engines
            num_engines = self.jobs
        if max_engines:
            num_engines = min(num_engines, max_engines)
        # TODO in the future, should maybe validate that requested
        # cores and memory are actually going to be availible. This
        # would unfortunately have to be specialized for each
        # scheduler probably.
        return num_engines, cores_per_engine, mem_per_engine

    @contextmanager
    def group(self, cores=1, mem="0.1", max=None):
        """Context manager for "grouping" a set of pipeline operations. A
        group of operations is run on the same ipython cluster and has
        it's resources specified in the group as opposed to in each
        individual job. This is useful if there is some small amount
        of setup work that isn't worth spinning up a new cluster for
        but which needs to be done before a resource intensive task.

        For example::

            with p.group(cores=8, mem=12):
               p.run("setup.sh . . .")  # do some data munging or other setup
               p.run("main_work -n 8 . . .")  # call an expensive program

        """
        # TODO this duplicates some code from p.map and is a bit
        # clunky, there is probably a better abstraction here
        engines, cores, mem = self._compute_resources(cores, mem, max)
        extra_params = {"run_local": self.local,
                        "mem": mem}
        old_view_factory = self._cluster_view
        cm = self._cluster_view(self.scheduler, self.queue,
                                engines, profile=self.ipythondir,
                                cores_per_job=cores,
                                extra_params=extra_params,
                                retries=self.retries)
        view = cm.gen.next()

        @contextmanager
        def reuse_view(*args, **kwargs):
            yield view

        # everything done in the block will use the view we just made
        self._cluster_view = reuse_view
        try:
            yield
        finally:
            # restore the normal cluster_view context manager on exit
            self._cluster_view = old_view_factory
            try:
                cm.gen.next()  # clean up the view we've been using
            except StopIteration:
                pass

    def _transaction_filter(self, targets):
        """Filter the `jobs` appropriately based on whether `targets` is a
        function, str, or list of str"""
        # TODO there has got to be a better way to do this -____-
        to_run = []
        dont_run = []
        if callable(targets):
            f = targets
            for job in self.jobs:
                if f(job):
                    dont_run.append(job)
                else:
                    to_run.append(job)
            return to_run, dont_run
        elif isinstance(targets, str):
            targets = [targets]
        elif not isinstance(targets, list):
            TypeError("transaction targets must be list, str, or callable")
        for job in self.jobs:
            canonical_targets = fs.canonicalize(job, targets)
            if all((os.path.exists(target)
                    for target in canonical_targets)):
                info = ("Skipping transaction for job {} targets {} "
                        "already present")
                with self.handler.applicationbound():
                    self.logger.info(info.format(job["description"],
                                                 canonical_targets))
                dont_run.append(job)
            else:
                # targets not present for this job
                to_run.append(job)
        return to_run, dont_run

    @contextmanager
    def transaction(self, targets):
        """Do some work "transacationally", in the sense that nothing done
        inside a ``transaction`` block will be "commited" to the
        workdir unless it all succeeds without error. The work done
        inside a transaction is also idempotent in that you must
        specify a ``target`` file or files for the tranasaction and it
        will not be run if the target exists already. This is perhaps
        best illustrated by a simple example::

            with p.transaction("{workdir}/example.txt"):
                p.run("{tmpdir}/touch example.txt")

        This will result in a file ``B.txt`` in each job's
        ``workdir``. The creation of this file will be skipped if the
        code is run again and the file already exists. This is
        obviously a silly example, but the code inside the `with`
        block can be any arbitrarily complex series of operations
        which produces a set of target output files at the end. This
        is a powerful feature in that it allows pipelines to be
        restratable: if a pipeline crashes for some reason but you
        have it's major sections wrapped in ``transaction`` blocks,
        you can simple run it again and pick up where you left off
        without redoing any work. The transaction blocks guarentee
        that the ``workdir`` for each job is never in an inconsistent
        state and that work that's already been completed isn't
        redone.

        Inside a transaction, each job has a special ``tmpdir`` key,
        whose value is the path to a unique temporary directory for
        the job. You can do work that produces files inside the
        ``tmpdir`` and expect everything in it to be moved to the
        job's ``workdir`` if the transaction compeltes without error.
        The ``tmpdir`` will be removed at the end of the transaction
        regardless of whether or not it succeeds. We change
        directories to the ``tmpdir`` before doing anything else and
        implicitly consider targets to be relative to a job's
        ``workdir`` so the above example could also be written
        written::

            with p.transaction("example.txt"):
                p.run("touch example.txt")

        which sacrifices explicitness for brevity.

        :param targets: a string or list of strings descsribing files
        that must exist in order for the transaction to be skipped.

        """
        to_run, dont_run = self._transaction_filter(targets)
        for job in to_run:
            job["tmpdir"] = tempfile.mkdtemp(dir=job["workdir"])
        self.jobs = to_run
        try:
            yield
        finally:
            for job in self.jobs:
                if not os.path.exists(os.path.join(job["tmpdir"], ".error")):
                    fs.liftdir(job["tmpdir"], job["workdir"])
                shutil.rmtree(job["tmpdir"])
                del job["tmpdir"]
            self.jobs = dont_run + self.jobs

    def localmap(self, f):
        """Just like ``map``, but work locally rather than launching an ipython
        cluster.  This is useful for tasks where the cluster launch
        overhead would swamp the cost of the actual work to be done.

        :params f: function of ``(job, logger)`` to be mapped over all jobs.

        """
        self.jobs = map(logging_wrapper, self.jobs,
                        (f for j in self.jobs),
                        (self.listen_ip for j in self.jobs),
                        (self.listen_port for j in self.jobs))

    def map(self, f, cores=1, mem="0.1", max=None):
        """Map the function ``f`` over all of the ``jobs`` in this
        pipeline. ``f`` must be a function of two arguments, the job
        and a logger. It should modify the job it is passed, which
        will then be returned over the wire. A silly example::

            def f(job, logger):
                job["capitalized_description"] = job["description"].toupper()
            p.map(f)

        Will give each ``job`` in the pipeline a ``capitalized_description``
        attribute, which can then be used in future pipline operations.

        ``cores`` and ``mem`` are used to specify the cores and memory
        required by this step; they will be passed to the underlying
        scheduler. ``max`` can be used as a hard limit on the number of
        jobs to run. This is useful if, for example, a particular task
        puts pressure on some sort of storage system (a distributed
        file system, object store, etc.) that you know will fail under
        too much load.

        :param f: function of ``(job, logger)`` to be mapped over all jobs.
        :param cores: cores required by this call.
        :param mem: memory required by this call.
        :param max: maximum number of jobs to submit.

        """
        if not self.jobs:
            # this looks very odd, it's necessary because sometimes
            # being a transaction causes self.jobs to be empty, and
            # IPython throws errors if you try to make over the empty
            # list. It might be cleaner to catch the error after
            # letting IPython do the map; will have to think about it.
            return
        engines, cores, mem = self._compute_resources(cores, mem, max)
        extra_params = {"run_local": self.local,
                        "mem": mem}
        with self._cluster_view(self.scheduler, self.queue,
                                engines, profile=self.ipythondir,
                                cores_per_job=cores,
                                extra_params=extra_params,
                                retries=self.retries) as view:
            # using cloudpickle allows us to serialize all sorts of things
            # we wouldn't otherwise be able to
            dview = view.client.direct_view()
            use_cloudpickle()
            dview.apply(use_cloudpickle)
            self.jobs = view.map_sync(logging_wrapper, self.jobs,
                                      (f for j in self.jobs),
                                      (self.listen_ip for j in self.jobs),
                                      (self.listen_port for j in self.jobs))

    def run(self, template, capture_in=None, **kwargs):
        """Run the ``template`` formatted with the contents of each
        job. Example::

            p.run("touch {workdir}/example.txt")

        will make an example.txt file in each job's workdir.

        ``cores`` and ``mem`` mean the same thing they do in the
        ``map`` method.

        If a string is passed for ``capture_in``, the stdout of the command
        will be captured in ``job[capture_in]`` for each job.

        """
        runner = cmdrunner(template, capture_in)
        self.map(runner, **kwargs)