Пример #1
0
    def start(self):
        logger.debug('client pid:%s' % os.getpid())
        import argparse
        from apps.openport_api import open_port
        from common.session import Session

        parser = argparse.ArgumentParser()
        self.add_default_arguments(parser)
        args = parser.parse_args()
        if args.verbose:
            from logging import DEBUG
            set_log_level(DEBUG)

        if args.no_gui:
            args.hide_message = True
            args.no_clipboard = True
        self.args = args

        if not args.hide_message:
            import wx
            self.app = wx.App(redirect=False)

        def show_message_box(session):
            wx.MessageBox('Your local port %s is now reachable on %s' % ( session.local_port, session.get_link()), 'Info',
                wx.OK | wx.ICON_INFORMATION)

        self.first_time = True

        def callback(ignore):
            if not self.first_time:
                return
            self.first_time = False

            session.restart_command = self.get_restart_command(session)
            if args.tray_port > 0:
                self.inform_tray_app_new(session, args.tray_port, start_tray=(not args.no_tray))

            session.error_observers.append(self.error_callback)
            session.success_observers.append(self.success_callback)

            if not args.no_clipboard:
                self.copy_share_to_clipboard(session)
            if not args.hide_message:
                show_message_box(session)

        session = Session()
        session.local_port = int(args.local_port)
        session.server_port = args.request_port
        session.server_session_token = args.request_token

#        app.MainLoop()

        def show_error(error_msg):
            import wx
            wx.MessageBox(error_msg, 'Error', wx.OK | wx.ICON_ERROR)
        self.session = session

        open_port(session, callback, show_error)
Пример #2
0
    def startOpenportProcess (self, port):
        session = Session()
        app_dir = self.os_interaction.get_application_dir()
        if self.os_interaction.is_compiled():
            session.restart_command = [os.path.join(app_dir, 'openport_app.exe'), '--local-port', '%s' % port]
        else:
            session.restart_command = ['python', os.path.join(app_dir,'apps/openport_app.py'), '--local-port', '%s' % port]
        logger.debug(session.restart_command)

        self.os_interaction.start_openport_process(session, hide_message=False, no_clipboard=False)
Пример #3
0
    def get_share_from_row(self, row):
        share = Session()
        share.server = row[0]
        share.server_port = row[1]
        share.server_session_token = row[2]
        share.local_port = row[3]
        share.pid = row[4]
        share.active = row[5]
        share.restart_command = row[6].split()
        try:
            share.restart_command = pickle.loads(row[6].encode('ascii','ignore'))
            pass
        except (Exception) as e:
            pass

        share.id = row[7]
        return share
Пример #4
0
def main():
    try:
        args = cmd_args.init()
        cmd = args.command
        # Variables
        data_dir = validate.directory(glbs.ES_BULK_DATA)
        es_url = glbs.ES_URL if 'DOCKER' not in os.environ else glbs.ES_URL_DOCKER
        kibana_url = glbs.KIBANA_URL if 'DOCKER' not in os.environ else glbs.KIBANA_URL_DOCKER
        # Command-line Options
        # Initializing Kibana and ES with mappings and visualizations
        if cmd == 'init':
            # Need to wait for Kibana and ES to start up whilst using docker
            # Choosing to write this here instead of bash script + docker-compose for maintainability
            if 'DOCKER' in os.environ:
                waiting = True
                timeout_counter = 0
                # Wait to establish connection, and timeout if it takes too long
                while waiting:
                    try:
                        response = requests.get(kibana_url).status_code
                    # Catching error if ES/Kibana isn't ready yet
                    except Exception:
                        response = 400
                        pass
                    if response == 200:
                        waiting = False
                    else:
                        if timeout_counter >= 5:
                            raise Exception(
                                f"Unable to ping Kibana instance located at '{kibana_url}'"
                            )
                        print(
                            f"ElasticSearch and Kibana services are not ready yet, trying again in 60 seconds"
                        )
                        sleep(60)
                        timeout_counter += 1

            # Preparing Elasticsearch and Kibana for data consumption
            for index in glbs.ES_INDEX_NAME:
                common.create_index(es_url,
                                    index,
                                    validate.file(
                                        os.path.join(glbs.ES_MAPPINGS,
                                                     f"{index}_mapping.json")),
                                    silent=args.silent,
                                    force=args.force)
                common.create_index_pattern(kibana_url,
                                            index,
                                            silent=args.silent,
                                            force=args.force)
            # Importing visualizations
            common.import_kibana(kibana_url,
                                 ndjson=common.get_files(
                                     glbs.ES_DIR,
                                     "visualizations.ndjson").pop(),
                                 silent=args.silent)
        # Uploading user data into ES
        if cmd == 'update' or cmd == 'demo':
            # Loop through all climbing logs, normalize and add additional information
            if not args.silent:
                print("[1/5] Retreiving climbing logs...")
            if cmd == 'demo':
                session_logs = get_session_yamls(glbs.SAMPLE_DATA_DIR)
            else:
                session_logs = get_session_yamls(glbs.INPUT_DIR)
            sessions = []
            session_data = []
            project_data = []
            counter_data = []
            project_list = {}
            if not args.silent:
                print("[2/5] Enhancing and normalizing data...")
            for log in session_logs:
                try:
                    # Creating Session class from logs
                    climbing_session = Session(log)
                    # Create and maintain a running list of projects and a total counter across all Sessions
                    if climbing_session.Projects:
                        for project in climbing_session.Projects:
                            if project.name in project_list.keys():
                                updated_total = [
                                    x + y for x, y in zip(
                                        project_list[project.name].
                                        get_counters(), project.get_counters())
                                ]
                                # Remove is_last from the previous project instance and assign the new value to the current project
                                project_list[project.name].set_is_last(False)
                                project.set_is_last(True)
                                # Increase the running counters and update the project with the current running counter
                                project.set_total_counter(
                                    updated_total[0], updated_total[1],
                                    updated_total[3], updated_total[4],
                                    updated_total[5], updated_total[6])
                                # del project_list[project.name]
                                project_list[project.name] = project
                            # If the project isn't in the running list, add it.
                            # Total counter is default the same as counter, so don't change the values
                            else:
                                project.set_is_last(True)
                                project_list[project.name] = project
                    sessions.append(climbing_session)

                except Exception as ex:
                    raise Exception(f"Unable to update '{log}'. {ex}")
            # Loop through the list of Sessions and update the output lists
            for session in sessions:
                session_data.append(session.toDict())
                counter_data.extend(session.getCounters())
                project_data.extend(session.getProjects())
            if not args.silent:
                print("[3/5] Writing climbing data to json...")
            common.write_bulk_api(
                session_data,
                os.path.join(
                    data_dir, "sessions.json"
                    if cmd == 'update' else "sessions_demo.json"), 'sessions')
            common.write_bulk_api(
                counter_data,
                os.path.join(
                    data_dir, "counters.json"
                    if cmd == 'update' else "counters_demo.json"), 'counters')
            common.write_bulk_api(
                project_data,
                os.path.join(
                    data_dir, "projects.json"
                    if cmd == 'update' else "projects_demo.json"), 'projects')
            # Importing all data into elasticSearch
            if not args.silent:
                print("[4/5] Uploading data into ElasticSearch...")
            common.upload_to_es(es_url, data_dir, silent=True)
            if not args.silent:
                print(
                    f"[5/5] Visualizations and stats are ready at {kibana_url}/app/dashboard"
                )
        # Exporting Kibana and ES Objects
        elif cmd == 'export':
            # Default export name used (ex. climbr_2020-09-19_02-55-15) unless overwritten by -o option
            timestamp = datetime.now().strftime('%Y-%m-%d_%H-%M-%S')
            filename = args.export_name if args.export_name else f"climbr_{timestamp}"
            # Default export location used (output directory) unless overwritten but -d/--dest option
            destination = os.path.join(
                args.export_dest,
                filename) if args.export_dest else os.path.join(
                    glbs.OUTPUT_DIR, f"{filename}.ndjson")
            common.export_kibana(kibana_url, destination, silent=args.silent)
        # Import Kibana and ES Objects from a ndjson file
        elif cmd == 'import':
            for path in args.import_path:
                if os.path.isfile(path):
                    if '.ndjson' in os.path.splitext(path)[1]:
                        common.import_kibana(kibana_url,
                                             path,
                                             silent=args.silent)
                    else:
                        raise TypeError(
                            f"Unable to import '{path}'. Invlaid file extension, must be '.ndjson'."
                        )
                # If the given path is a directory, then gather all .ndjson files
                elif os.path.isdir(path):
                    files = common.get_files(path, ".*\.ndjson$")
                    for file in files:
                        common.import_kibana(kibana_url,
                                             file,
                                             silent=args.silent)
                else:
                    raise Exception(
                        f"Unable to import '{path}'. File path or directory does not exist."
                    )
    except Exception as ex:
        raise ex
Пример #5
0
 def __init__(self):
     self.session = Session()
     self.logger = Log.create_logger('spider')
Пример #6
0
class Spider:
    def __init__(self):
        self.session = Session()
        self.logger = Log.create_logger('spider')

    def get_id(self, redis: RedisSet):
        start_page = config.Config.spider.start_page
        params = {
            'area': config.Config.spider.area,
            'type': 0,
            'year': 0,
            'initial': '全部',
            'pIndex': start_page
        }

        res = self.session.get(url=url('/Mdata/getMdata_movie'), params=params)
        data = json.loads(res.text)
        self.logger.info('Total: {0} pages, {1} items'.format(
            data['tPage'], data['tCount']))
        end_page = data['tPage']
        for item in data['pData']:
            redis.add(item['ID'])
        self.logger.info('Page {}'.format(start_page))
        time.sleep(10)
        for i in range(start_page + 1, end_page + 1):
            params['pIndex'] = i
            res = self.session.get(url=url('/Mdata/getMdata_movie'),
                                   params=params)
            data = json.loads(res.text)
            for item in data['pData']:
                redis.add(item['ID'])
            self.logger.info('Page {}'.format(i))
            time.sleep(10)

    def start_crawl(self, extractor: Extractor, redis: RedisSet,
                    mongo: MongoDB):
        while not redis.empty():
            movie_id = redis.pop()
            self.logger.info('Movie ID: {}'.format(movie_id))
            info = self._crawl(movie_id, extractor)
            if info is not None:
                if mongo.count({'id': movie_id}) <= 0:
                    mongo.insert(info)
                else:
                    self.logger.info('Duplicate record')
            else:
                self.logger.info('Failed to download')
                redis.add(movie_id)
            time.sleep(10)

    def _crawl(self, movie_id, extractor: Extractor):
        retry = MAX_RETRY
        while retry:
            try:
                res = self.session.get(url=url('/m/{}'.format(movie_id)))
                info = extractor.extract_info(res.text)
                res = self.session.get(url=url(
                    '/Mdata/getMovieEventAll?movieid={}'.format(movie_id)))
                info['event'] = extractor.extract_events(res.text)
                info['id'] = movie_id
                return info
            except (NetworkException, AttributeError) as e:
                self.logger.error(str(e))
                retry -= 1
        return None