Пример #1
0
    def __init__(self,
                 input_shapes=None,
                 params=None,
                 logger_path=None,
                 root_path=ROOT_PATH):
        """create instance of AbstractModel

        :type logger_path: str
        :param logger_path: path for log file
        if logger_path is None, log ony stdout
        """
        self.root_path = root_path

        if logger_path is None:
            self.log = Logger(self.__class__.__name__, LOG_PATH)
        else:
            self.log = Logger(self.__class__.__name__, logger_path)

        self.sess = None
        self.saver = None
        self.summary_writer = None
        self.is_built = False

        # gen instance id
        self.input_shapes = input_shapes
        self.params = params

        self.id = "_".join([self.__str__(), time_stamp()])
        self.instance_path = os.path.join(INSTANCE_PATH, self.id)
        self.instance_visual_result_folder_path = os.path.join(
            self.instance_path, VISUAL_RESULT_FOLDER)
        self.instance_source_folder_path = os.path.join(
            self.instance_path, 'src_code')
        self.instance_summary_folder_path = os.path.join(
            self.instance_path, 'summary')
        self.instance_class_name = self.__class__.__name__
        self.instance_source_path = os.path.join(
            self.instance_source_folder_path, self.id + '.py')
        self.metadata_path = os.path.join(self.instance_path, 'instance.meta')
        self.save_folder_path = os.path.join(self.instance_path, 'check_point')
        self.check_point_path = os.path.join(self.save_folder_path,
                                             'instance.ckpt')

        self.metadata = {
            MODEL_METADATA_KEY_INSTANCE_ID: self.id,
            MODEL_METADATA_KEY_INSTANCE_PATH: self.instance_path,
            MODEL_METADATA_KEY_INSTANCE_VISUAL_RESULT_FOLDER_PATH:
            self.instance_visual_result_folder_path,
            MODEL_METADATA_KEY_INSTANCE_SOURCE_FOLDER_PATH:
            self.instance_source_folder_path,
            MODEL_METADATA_KEY_INSTANCE_SOURCE_PATH: self.instance_source_path,
            MODEL_METADATA_KEY_INSTANCE_SUMMARY_FOLDER_PATH:
            self.instance_summary_folder_path,
            MODEL_METADATA_KEY_INSTANCE_CLASS_NAME: self.instance_class_name,
            MODEL_METADATA_KEY_METADATA_PATH: self.metadata_path,
            MODEL_METADATA_KEY_CHECK_POINT_PATH: self.check_point_path,
            MODEL_METADATA_KEY_SAVE_FOLDER_PATH: self.save_folder_path,
            MODEL_METADATA_KEY_PARAMS: self.params,
            MODEL_METADATA_KEY_INPUT_SHAPES: self.input_shapes,
        }
Пример #2
0
    def __init__(self, env, serviceName):
        self.log = Logger("debug")

        opera = OperationIni()

        chrome_driver = findPath.data_dir(fileName='chromedriver.exe',
                                          pathName='driver')
        base_url = opera.read_ini(section='CONFIG', key='base_url')
        url = base_url + opera.read_ini(section=env, key='url')

        self.userName = opera.read_ini(section='CONFIG', key='userName')
        self.passWord = opera.read_ini(section='CONFIG', key='passWord')
        self.ServiceName = opera.read_ini(section='CONFIG', key=serviceName)

        chrome_options = Options()
        # 设置chrome浏览器无界面模式
        chrome_options.add_argument('--headless')

        self.log.info("开始调用webdriver,当前模式为Chrome无界面模式")
        self.d = webdriver.Chrome(executable_path=chrome_driver,
                                  chrome_options=chrome_options)
        self.d.maximize_window()
        self.log.info('成功打开谷歌浏览器')
        self.d.get(url)
        self.d.implicitly_wait(30)
        print('成功打开网址:{0}'.format(url))
        self.log.info('成功打开网址:{0}'.format(url))
Пример #3
0
    def __init__(self,
                 func=None,
                 n_parallel=4,
                 initializer=None,
                 initargs=(),
                 child_timeout=30):
        self.logger = Logger(self.__class__.__name__)
        self.log = self.logger.get_log()

        self.func = func

        self.n_parallel = n_parallel

        if initializer is None:
            self.initializer = init_worker
        else:
            self.initializer = initializer
        self.initargs = initargs
        self.child_timeout = child_timeout

        self.pools = [
            Pool(1, initializer=init_worker, initargs=initargs)
            for _ in range(n_parallel)
        ]
        self.queues = [Queue() for _ in range(n_parallel)]
        self.pbar = None
        self.fail_list = []
Пример #4
0
    def __init__(self,
                 preprocess=None,
                 batch_after_task=None,
                 before_load_task=None):
        """
        init dataset attrs

        *** bellow attrs must initiate other value ***
        self._SOURCE_URL: (str) url for download dataset
        self._SOURCE_FILE: (str) file name of zipped dataset
        self._data_files = (str) files name in dataset
        self.batch_keys = (str) feature label of dataset,
            managing batch keys in dict_keys.dataset_batch_keys recommend

        :param preprocess: injected function for preprocess dataset
        :param batch_after_task: injected function for after iter mini_batch
        :param before_load_task: hookable function for AbstractDataset.before_load
        """
        self._SOURCE_URL = None
        self._SOURCE_FILE = None
        self._data_files = None
        self.batch_keys = None
        self.logger = Logger(self.__class__.__name__, stdout_only=True)
        self.log = self.logger.get_log()
        self.preprocess = preprocess
        self.batch_after_task = batch_after_task
        self.data = {}
        self.cursor = {}
        self.data_size = 0
        self.before_load_task = before_load_task
Пример #5
0
 def __init__(self,
              creator,
              server,
              name=None,
              description=None,
              openTime=None,
              closeTime=None,
              absoluteThreshold=None,
              percentThreshold=None,
              percentThresholdMinimum=None,
              thresholdTime=None,
              keepUpdated=True,
              pollid=None):
     self.log = Logger()
     self.base = CabbageBase()
     self.creator = creator
     self.server = server
     self.name = name
     self.description = description
     self.openTime = openTime
     self.closeTime = closeTime
     self.absoluteThreshold = absoluteThreshold
     self.percentThreshold = percentThreshold
     self.percentThresholdMinimum = percentThresholdMinimum
     self.thresholdTime = thresholdTime
     self.options = {'short': [], 'long': [], 'emoji': []}
     self.keepUpdated = keepUpdated
     if pollid:
         self.pollid = pollid
     else:
         self.genPollid()
     self.update()
Пример #6
0
def calculate_average_bow_size(res_folder):
    """
    Calculate average bow size for the URLBow database

    :param res_folder:
    :return:
    """

    total_bow_sizes={"right":0,"wrong":0,"swing":0}
    bow_count={"right":0,"wrong":0,"swing":0}

    Logger.info("Average bow size, on right bow size")
    for right_res in RightResultsIter(res_folder):
        total_bow_sizes["right"]+=len(URLBow.objects.get(index=right_res.ref_id).bow)
        bow_count["right"]+=1

    Logger.info("Average bow size, on wrong bow size")
    for wrong_res in WrongResultsIter(res_folder):
        if wrong_res.is_swing_sample():
            label="swing"
        else:
            label="wrong"

        bow_count[label]+=1
        total_bow_sizes[label]+=len(URLBow.objects.get(index=wrong_res.ref_id).bow)

    print([(label,total/bow_count[label] if bow_count[label] != 0 else 1,bow_count[label]) for label,total in total_bow_sizes.items()])
Пример #7
0
    def get_genre_refs(*genres):

        genre_objs=[]
        for agenre_obj in genres:
            #find all matching genre
            if isinstance(agenre_obj,str):
                genre_models=Genre.objects(genre=agenre_obj)
            else:
                genre_models=Genre.objects(genre=agenre_obj['genre'])

            if(len(genre_models)==0):
                if not isinstance(agenre_obj,str):
                    genre_model=Genre()

                    for (k,v) in agenre_obj.items():
                        genre_model[k]=v
                else:
                    genre_model=Genre(genre=agenre_obj)

                try:
                    genre_model.save()
                except:
                    Logger.error("Error saving: "+str(agenre_obj))


                genre_objs.append(genre_model)
            else:
                genre_objs.extend(genre_models.all())

        return genre_objs
Пример #8
0
    def __init__(self):
        self.logger = Logger()
        self.service = {
            "ObjectTracking": {
                "Performance": {
                    "DetectionSpeed": "ProcessingTime",
                    "DetectionAccuracy": "DetectionRate"
                },
                "Reliability": {
                    "VideoContinuity": "FPS"
                },
                "Security": {
                    "VideoComposition": "NumberOfComposedVideos"
                }
            },
            "ObjectCounting": {
                "Performance": {
                    "DetectionSpeed": "ProcessingTime",
                    "DetectionAccuracy": "DetectionRate"
                },
                "Reliability": {
                    "VideoContinuity": "FPS"
                },
                "Security": {
                    "VideoComposition": "NumberOfComposedVideos"
                }
            }
        }

        self.logger.debug("Get Service Knowledge")
Пример #9
0
 def set_binary_model(self, model_file_path: str):
     timer: Timer = Timer()
     Logger().start_analyzing("Loading binary Word2VecModel")
     self.model = KeyedVectors.load_word2vec_format(model_file_path,
                                                    binary=True)
     Logger().finish_analyzing(timer.get_duration(),
                               "Loading binary Word2VecModel")
Пример #10
0
    def __init__(self, net_type, model_out_dir, frequency, electrodes, learning_rate=0.002, batch_size=32,
                 epochs=30):
        """
        initializes the basic class variables
        Args:
            learning_rate: the chosen learning rate
            batch_size: the amount of items per batch
            epochs: the amount of epochs
        """
        if frequency is None:
            self.input_shape = (len(electrodes), 101)
        else:
            self.input_shape = (len(electrodes), 5)
        self.frequency = frequency
        self.net_type = net_type
        self.model_out_dir = model_out_dir
        self.batch_size = batch_size
        self.learning_rate = learning_rate
        self.epochs = epochs

        self.model = None
        self.history = None

        self.logger = Logger(model_out_dir, self.net_type)

        if not os.path.exists(os.path.join(model_out_dir, self.net_type)):
            os.makedirs(os.path.join(model_out_dir, self.net_type))
Пример #11
0
    def __init__(self, env='QA'):
        self.log = Logger("debug")
        self.opera = OperationIni(fileName='config.ini', pathName='config')
        path = '/website/saas/account/api2/user/login'
        self.key = env.lower() + '_token'
        d = get_env_authorization(env=env)
        self.url = d[0] + path
        self.cookie = d[1]
        self.userName = d[2]
        self.passWord = d[3]

        # if env == 'QA':
        #     self.url = self.opera.read_ini(section='Authorization', key='qa_url') + path
        #     self.cookie = self.opera.read_ini(section='Authorization', key='qa_cookie')
        #     self.userName = self.opera.read_ini(section='Authorization', key='qa_username')
        #     self.passWord = self.opera.read_ini(section='Authorization', key='qa_password')
        # if env == 'DEV':
        #     self.url = self.opera.read_ini(section='Authorization', key='dev_url') + path
        #     self.cookie = self.opera.read_ini(section='Authorization', key='dev_cookie')
        #     self.userName = self.opera.read_ini(section='Authorization', key='dev_username')
        #     self.passWord = self.opera.read_ini(section='Authorization', key='dev_password')

        self.headers = {
            'Cookie': self.cookie,
            'Content-Type': 'application/x-www-form-urlencoded'
        }
Пример #12
0
 def __init__(self, root_path):
     self.root_path = root_path
     self.logger = Logger(self.__class__.__name__, self.root_path)
     self.log = self.logger.get_log()
     self.model = None
     self.visualizers = []
     self.sub_process = {}
Пример #13
0
    def __init__(self, path=None, execute_interval=None, name=None):
        """create Visualizer

        :type path: str
        :type execute_interval: int
        :type name: str
        :param path: path for saving visualized result
        :param execute_interval: interval for execute
        :param name: naming for visualizer
        """

        self.execute_interval = execute_interval
        self.name = name
        self.visualizer_path = os.path.join(path, self.__str__())

        if not os.path.exists(path):
            os.mkdir(path)

        if not os.path.exists(self.visualizer_path):
            os.mkdir(self.visualizer_path)

        files = glob(os.path.join(self.visualizer_path, '*'))
        self.output_count = len(files)

        self.logger = Logger(self.__class__.__name__, self.visualizer_path)
        self.log = self.logger.get_log()
Пример #14
0
def apiMoveUserVision(userId):
    if request.method == 'POST':
        if SessionManager.userLoggedIn():

            userInfo = SessionManager.getUser()
            if userInfo['id'] != userId:
                abort(406)

            parameters = request.json
            if not 'visionId' in parameters or \
               not 'srcIndex' in parameters or \
               not 'destIndex' in parameters:
                abort(406)
            visionId = parameters['visionId']
            srcIndex = parameters['srcIndex']
            destIndex = parameters['destIndex']

            Logger.debug("V:%s src: %s dest: %s" % (visionId, srcIndex, destIndex))

            user = User.getById(userInfo['id'])
            result = user.moveVision(visionId, srcIndex, destIndex)

            if True == result:
                data = { 'result' : "success" }
            else:
                data = { 'result' : "error" }
            return jsonify(data)
        abort(403)
    abort(405)
Пример #15
0
 def __init__(self, root_path=ROOT_PATH):
     """create DatasetManager
     todo
     """
     self.root_path = root_path
     self.logger = Logger(self.__class__.__name__, self.root_path)
     self.log = self.logger.get_log()
     self.datasets = {}
Пример #16
0
def create():

    '''
    Debugging Tip:
    if you see: 

        Bad Request
        The browser (or proxy) sent a request that this server could not understand.

    (a 400 error)

    Make sure all of the form fields are given correctly

    http://stackoverflow.com/questions/8552675/form-sending-error-flask
    '''

    mediaUrl = request.form[Constant.BOOKMARKLET_POST_MEDIA_URL]
    text = request.form[Constant.BOOKMARKLET_POST_TEXT]
    pageUrl = request.form[Constant.BOOKMARKLET_POST_PAGE_URL]
    pageTitle = request.form[Constant.BOOKMARKLET_POST_PAGE_TITLE]

    #Vision Privacy
    private = False
    if Constant.BOOKMARKLET_POST_IS_PRIVATE in request.form:
        private = True

    #Format for saving
    visionIsPublic = not private

    #Validate Parameters
    if mediaUrl is None \
        or text is None \
        or pageUrl is None \
        or pageTitle is None:
        return "Invalid Vision Parameters"

    Logger.debug("URL: " + mediaUrl)

    #Question: Do we really need to check the login again here?
    #Check Login
    if not SessionManager.userLoggedIn():
        return redirect(url_for('login'))

    #Get the user id
    userId = SessionManager.getUser()['id']

    #Add
    user = User.getById(userId)
    if user:
        # TODO: should we save pageUrl and pageTitle also?
        vision, message = user.addVision(mediaUrl, text, False, visionIsPublic)

        if vision:
            #Successful Create!
            return render_template('successCreatingVision.html',
                                   visionId=vision.id(), userId=userId)
    #Error
    return render_template('errorCreatingVision.html', message=message)
Пример #17
0
def apiAddUserVision(userId):
    if request.method == 'POST':
        if SessionManager.userLoggedIn():
            userInfo = SessionManager.getUser()
            if userInfo['id'] != userId:
                abort(406)

            parameters = request.json
            if not 'useImage' in parameters or \
               not 'text'     in parameters or \
               not 'privacy'  in parameters:
                abort(406)
            useImage = parameters['useImage']
            text = parameters['text'].strip()
            isPublic = parameters['privacy']

            Logger.debug("IsPublic: " + str(isPublic))

            # Make sure input OK to create a new vision
            if useImage == False:
            # TODO: should we allow text w/o image?
            #if useImage == False and len(text) == 0:
                abort(406)

            # Make sure image link OK
            url = ""
            if useImage == True:
                url = SessionManager.getPreviewUrl()

            # Create a new vision with the photo
            user = User.getById(userId)
            
            # Make sure we have a valid user
            if not user:
                data = {'result' : "error"}

            else:
                vision, errorMsg = user.addVision(url, text, True, isPublic)

                if vision:
                    objList = []
                    if None != vision:
                        objList = VisionList.createFromVision(vision)
                    if len(objList.visions()) == 1:
                        data = { 'result'    : "success",
                                 'newVision' : objList.toDictionary(
                                        options=[Vision.Options.PICTURE,
                                                 Vision.Options.USER,
                                                 Vision.Options.PARENT_USER,
                                                 Vision.Options.COMMENT_PICTURES,
                                                 Vision.Options.COMMENTS])[0] }

                else:
                    data = { 'result' : "error" }

            return jsonify(data)
        abort(403)
    abort(405)
Пример #18
0
def main(argv):
    parser = argparse.ArgumentParser(
        description='Upload a hub to display on Apollo.')
    parser.add_argument('-j',
                        '--data_json',
                        help='JSON file containing the metadata of the inputs')
    parser.add_argument('-o', '--output', help='HTML output')

    #parser.add_argument('-e', '--extra_file_path', help='Extra file path for generated jbrowse hub')
    #parser.add_argument('-d', '--jbrowsehub', help='Name of the HTML summarizing the content of the JBrowse Hub Archive')

    # Get the args passed in parameter
    args = parser.parse_args()
    json_inputs_data = args.data_json
    outputFile = args.output
    #outputFile = args.jbrowsehub

    ##Parse JSON file with Reader
    reader = Reader(json_inputs_data)

    # Begin init variables
    extra_files_path = reader.getExtFilesPath()
    #user_email = reader.getUserEmail()
    species_name = reader.getSpeciesName()
    #apollo_host = reader.getApolloHost()
    apollo_port = reader.getPortNum()
    apollo_host = "http://localhost:" + apollo_port + "/apollo"
    #apollo_host = "http://localhost:8080/apollo"
    #apollo_user = reader.getApolloUser()
    apollo_admin_user = reader.getAdminUser()
    toolDirectory = reader.getToolDir()
    #jbrowse_hub = reader.getJBrowseHubDir()
    debug_mode = reader.getDebugMode()

    #### Logging management ####
    # If we are in Debug mode, also print in stdout the debug dump
    log = Logger(tool_directory=toolDirectory,
                 debug=debug_mode,
                 extra_files_path=extra_files_path)
    log.setup_logging()

    logging.info(
        "#### JBrowseArchiveCreator: Start to upload JBrowse Hub to Apollo instance: %s #### ",
        apollo_host)
    logging.debug('JSON parameters: %s\n\n', json.dumps(reader.args))

    # Set up apollo
    apollo = ApolloInstance(apollo_host, apollo_admin_user, toolDirectory)
    jbrowse_hub_dir = _getHubDir(extra_files_path)
    apollo.loadHubToApollo(apollo_admin_user,
                           species_name,
                           jbrowse_hub_dir,
                           admin=True)
    outHtml(outputFile, apollo_host, species_name)

    logging.info(
        '#### JBrowseArchiveCreator: Congratulation! JBrowse Hub is uploaded! ####\n'
    )
Пример #19
0
 def __init__(self, logLevel, action):
     self.actionToRun = action
     self.logger = Logger(name="a2d2 thread",
                          logFile=conf.APPLICATION_LOG_FILE,
                          level=logLevel)
     threading.Thread.__init__(self)
     self.__stopFlag = False
     self.__bypass = False  # if True, actions are skipped in periodic check
     self.logger.info("Initialised.")
    def __init__(self, config):
        threading.Thread.__init__(self)
        self.logger = Logger()
        self.serviceList = []

        self.config = config

        self.ip = config['MQTT']['ip']
        self.port = int(config['MQTT']['port'])
Пример #21
0
def query_sql_keep_connection(db, sql):
    try:
        Logger.logDebug("query_db: " + sql)
        rs = db.query(sql)
        return rs
    except:
        traceback.print_exc(file=sys.stdout)
        db.close()
        raise
Пример #22
0
class DBManager(object):
    def __init__(self, mongo, collection):
        self.logger = Logger()
        self.logger.debug("INTO DBManager!")
        client = MongoClient(mongo["ip"], username=mongo["username"], password=mongo["password"], authSource=mongo["database"], authMechanism='SCRAM-SHA-1')
        database = client.get_database(mongo["database"])
        self.collection = database.get_collection(collection)

    def getCollection(self):
        return self.collection
Пример #23
0
def execute_sql_keep_connection(db, sql):
    try:
        Logger.logDebug("excute_db: " + sql)
        affect = db.execute(sql)
        db.commit()
        return affect
    except:
        traceback.print_exc(file=sys.stdout)
        db.close()
        raise
Пример #24
0
    def save_url(**kwargs):
        kwargs['url']=replace_dot_url(kwargs['url'])

        url_model=URLToGenre(**kwargs)

        try:
            save_obj=url_model.save()
        except:
            Logger.error("Error saving: "+str(kwargs['url']))

        return save_obj
Пример #25
0
    def scrape_links_from_position(self,pos):
        MongoDB.connect(settings.HOST_NAME,settings.PORT)
        links=self.__get_next_urls(pos)


        Logger.info(links)
        for link in links:
            self.scrape_link_and_child(link)

        Logger.debug('Process job completed')
        return 0
Пример #26
0
    def __init__(self, logger_path=None):
        """create instance of AbstractModel

        :type logger_path: str
        :param logger_path: path for log file
        if logger_path is None, log ony stdout
        """
        if logger_path is None:
            self.logger = Logger(self.__class__.__name__, with_file=True)
        else:
            self.logger = Logger(self.__class__.__name__, logger_path)
        self.log = self.logger.get_log()
Пример #27
0
    def __init__(self, root_path=ROOT_PATH):
        """ create a 'InstanceManager' at env_path

        :type root_path: str
        :param root_path: env path for manager
        """
        self.root_path = root_path
        self.logger = Logger(self.__class__.__name__, self.root_path)
        self.log = self.logger.get_log()
        self.instance = None
        self.visualizers = {}
        self.subprocess = {}
Пример #28
0
    def post(self, command):
        xml = request.data
        Logger().debug(xml)
        result = json.dumps(xmltodict.parse(xml)['service'])
        Logger().debug(result)

        if command == "start":
            self.serviceManager.receiveService(ServiceInstance(result))
        elif command == "stop":
            self.serviceManager.stopService(ServiceInstance(result))
        else:
            pass
Пример #29
0
def execute_sql(sql, dbcfg, dbtype = "oracle"):
    try:
        db = connect_db(dbcfg, dbtype)
        Logger.logDebug("execute_sql: " + sql)
        affect = db.execute(sql)
        db.commit()
        return affect
    except:
        traceback.print_exc(file=sys.stdout)
        raise 
    finally:
        db.close()
Пример #30
0
def startService(name):
    logger = Logger()
    logger.debug("Start Service!")
    client = docker.from_env()
    service = client.services.create(
        "face_detection",
        name=name,
        networks=["swarm_net"],
        mounts=["/home/pi/video/face_detection/container:/data:rw"],
        mode="replicated",
        constraints=["node.labels.name==node03"])
    #container = client.containers.run("face_detection:latest", detach=True)
    return service
Пример #31
0
    def __init__(self, pack_keys=None):
        super().__init__()
        self.log = Logger(self.__class__.__name__)
        if pack_keys is None:
            pack_keys = self.class_pack.keys()

        self.pack = {}
        for key in pack_keys:
            self.pack[key] = self.class_pack[key]()

        self.optimize_result = {}

        self.params_save_path = SKLEARN_PARAMS_SAVE_PATH
Пример #32
0
    def push_to_queue(number,url_doc):
        try:

            if URLQueue.objects(number=number):
                return None


            URLQueue(number=number,document=url_doc).save()
        except:
            try:
                Logger.error('Failed to save with url: {}'.format(url_doc['url']))
            except:
                Logger.error('Complete error to save number: {}'+number)
Пример #33
0
    def __init__(self, ip, port, duration, name):
        self.logger = Logger()
        self.logger.debug("INTO DeviceAbstractor!")
        self.capabilityList = []

        self.ip = ip
        self.port = port
        self.duration = duration
        self.name = name

        self.profiler = Profiler(self.duration)

        self.doProfiling()
Пример #34
0
    def __init__(self, pid, env='QA'):
        self.log = Logger("debug")
        opera = OperationIni(fileName='config.ini', pathName='config')
        self.get_skuId = GetGoodsDetail(env=env, pid=pid)
        self.get_access_token = GetAccessToken(env=env, pid=pid)

        # env字符串转小写
        env = env.lower()
        key = env + '_url'

        self.base_url = opera.read_ini(section='goods', key=key)
        self.path = opera.read_ini(section='goods', key='wholeUpdateStock')
        self.access_token = self.get_access_token.get_ini_access_token()
Пример #35
0
    def __init__(self, env='QA'):
        self.log = Logger("debug")
        opera = OperationIni(fileName='config.ini', pathName='config')
        self.env = env
        self.get_access_token = GetAccessToken(env=env)

        # env字符串转小写
        env = env.lower()
        key = env + '_url'
        self.url = opera.read_ini(section='goods', key=key)
        self.path = opera.read_ini(section='goods', key='queryGoodsDetail')

        self.access_token = self.get_access_token.get_ini_access_token()
Пример #36
0
 def parse(self):
     Logger().start_analyzing(self.relative_path)
     self.identifier_list_model = LanguageParser().parse_file(
         self.extension, self.content)
     self.identifier_dictionary_model = IdentifierDictionaryModel(
         self.identifier_list_model)
     self.word_dictionary_model = WordDictionaryModel(
         self.identifier_dictionary_model)
     if Word2VecModel.instance.exists():
         self.calculate_semantic_metrics()
     self.identifier_dictionary_model.set_word_metrics(
         self.word_dictionary_model.get_dictionary())
     Logger().finish_analyzing(self.timer.get_duration(),
                               self.relative_path)
Пример #37
0
class ServiceManager(object):
    def __init__(self, config):
        threading.Thread.__init__(self)
        self.logger = Logger()
        self.serviceList = []
        self.config = config

    def receiveService(self, serviceInstance):
        t = threading.Thread(target=self.publishService(serviceInstance))
        self.serviceList.append([t, serviceInstance])
        t.start()

    def stopService(self, serviceInstance):
        ClusterManager.stopService(serviceInstance)

    #
    # Service Management
    #
    '''
    Name: publishService
    parameter: ServiceInstance
    action: 
            let requirementInterpreter interpret service's requirements in terms of device's capabilities 
            -->
            let resourceSelector select suitable nodes which satisfy service's requirements
            -->
            let clusterManager make selected nodes start service
    '''

    def publishService(self, serviceInstance):
        self.logger.debug("PublishService starts!")

        # INTERPRET
        interpretedRequirement = RequirementInterpreter.interpret(
            serviceInstance)

        # SELECT
        serviceInstance.setInterpretedRequirement(interpretedRequirement)
        serviceCapabilityManager = ServiceCapabilityManager(
            self.config, serviceInstance)
        serviceCapabilityManager.start()
        selectedNodes = ResourceSelector.selectNodes(serviceInstance,
                                                     serviceCapabilityManager)

        print("selected nodes: " + ", ".join(selectedNodes))
        self.logger.debug("selected nodes: " + ", ".join(selectedNodes))

        # START
        serviceInstance.setSeledtedNodes(selectedNodes)
        ClusterManager.startService(serviceInstance)
Пример #38
0
    def scrape_link_and_child(self,parent_url):
        parent_url=base_util.replace_dot_url(parent_url)
        webpage_body,parent_url=self.scrape(base_util.unreplace_dot_url(parent_url),None)

        #exit if failed to scrap website
        if webpage_body is None:
            return

        MongoDB.save_page(url=parent_url,page=webpage_body)
        Logger.info('Completed page: '+parent_url)

        #Now, we grab the childs of this webpage
        all_ahref=[base_util.combine_parent_rel_link(parent_url,a.attrs['href']) for a in BeautifulSoup(webpage_body,'html.parser', from_encoding="utf-8").find_all('a') if 'href' in a.attrs]

        child_urls=random.sample(all_ahref,settings.GET_X_CHILD) if len(all_ahref)>=settings.GET_X_CHILD else all_ahref

        #get rid of bad normalization
        if not re.match('^www[.].*$',parent_url):
            Logger.info('Updating bad url for {}'.format(parent_url))
            MongoDB.update_url(base_util.normalize_url(parent_url),parent_url)

        if len(child_urls) > 0:
            parent_genres=MongoDB.get_genre(parent_url)

            #get the childs
            for child_url in child_urls:
                child_page=self.scrape(child_url,parent_url)

                if child_page is None:
                    exploredset=set()
                    tries=0
                    for url in set(all_ahref)^(exploredset):
                        if tries==settings.MAX_RETRIES:
                            Logger.info('Max retrie number exceeded')
                            break

                        Logger.info("trying new url: "+url)

                        child_page=self.scrape(url,parent_url)

                        if child_page is not None:
                            break
                        exploredset.add(url)

                        tries+=1

                if child_page is not None:
                    MongoDB.save_modify_url(url=base_util.replace_dot_url(child_url),parent=[MongoDB.get_url_object(parent_url)],genre=parent_genres,page=child_page)
                    Logger.info('Completed page: '+child_url)
Пример #39
0
def main():
    script_name: str = PathExtractor().get_file_name(sys.argv[0])

    if len(sys.argv) != 2:
        Logger().usage(f'python {script_name} <wiki.en.raw.txt>')
        return

    file_path = sys.argv[1]

    if PathValidator().is_valid_files([file_path]):
        Logger().info(f'Input file: "{file_path}"')
        Logger().info("Starting to remove stopwords")
        timer = Timer()
        remove_stopwords(file_path)
        Logger().finish_script(timer.get_duration(), script_name)
Пример #40
0
def calculate_genres_per_instance(res_folder,classifiers=""):
    current_classifier=classifiers

    right_genresize_counter=collections.Counter()
    wrong_genresize_counter=collections.Counter()
    swing_genresize_counter=collections.Counter()

    Logger.info("Current on rights")


    #iterate over the right samples first, we don't write to file because right files are the same
    for right_res_obj in {x.ref_id: x for x in RightResultsIter(res_folder,classifiers)}.values():
        assert isinstance(right_res_obj,ClassificationResultInstance)
        if right_res_obj.classifier != current_classifier:
            current_classifier=right_res_obj.classifier

        #now find the size of its genre
        right_genresize_counter.update([len(URLBow.objects.get(index=right_res_obj.ref_id).short_genres)])

    Logger.info("Current on wrongs")

    swing_file=res_folder+"/{}swing.txt".format(classifiers+"_" if classifiers.strip()!="" else classifiers)
    wrong_file=res_folder+"/{}wrong_true.txt".format(classifiers+"_" if classifiers.strip()!="" else classifiers)

    with open(swing_file,mode="w") as swing_handle,open(wrong_file,mode="w") as wrong_handle:
        #iterate over the wrong samples
        for wrong_res_obj in {x.ref_id: x for x in WrongResultsIter(res_folder,classifiers)}.values():
            assert isinstance(wrong_res_obj,ClassificationResultInstance)
            if wrong_res_obj.classifier != current_classifier:
                current_classifier=wrong_res_obj.classifier

            if wrong_res_obj.is_swing_sample():
                swing_handle.write(str(wrong_res_obj)+"\n")

                swing_genresize_counter.update([len(URLBow.objects.get(index=wrong_res_obj.ref_id).short_genres)])

            else:
                wrong_handle.write(str(wrong_res_obj)+"\n")

                #now find the size of its genre
                wrong_genresize_counter.update([len(URLBow.objects.get(index=wrong_res_obj.ref_id).short_genres)])

    print("Wrong predicted sample distrbution: {}".format(sorted(wrong_genresize_counter.items(),key=operator.itemgetter(0))))
    print("Right predicted sample distrbution: {}".format(sorted(right_genresize_counter.items(),key=operator.itemgetter(0))))
    print("Swing sample distrbution: {}".format(sorted(swing_genresize_counter.items(),key=operator.itemgetter(0))))
Пример #41
0
    def scrape_urls_multiproc(cls):
        #current position
        pos=MongoDB.get(MetaData,'position',type='queue')
        #current cap
        cap=pos

        process_queue=queue.Queue(maxsize=settings.NUM_PROCESSES)

        #creates all the necessary processes
        for p_num in range(0,settings.NUM_PROCESSES):
            p=mp.Process(target=WebScraper().scrape_links_from_position,args=[cap])
            #get curresponding objects
            process_queue.put(p)

            cap+=settings.NUM_URLS_PER_PROCESS

            #now start
            p.start()

        head=process_queue.get()
        #wait and create new processes as needed
        while(pos<MongoDB.count(URLQueue)):
            head.join()

            if not head.exitcode ==0:
                Logger.error('Error with Process, terminating')
                return

            #update counter
            MongoDB.increment_url_counter(settings.NUM_URLS_PER_PROCESS)

            p=mp.Process(target=WebScraper().scrape_links_from_position,args=[cap])
            process_queue.put(p)
            p.start()

            #increase both cap and current position
            cap+=settings.NUM_URLS_PER_PROCESS
            pos+=settings.NUM_URLS_PER_PROCESS
            head=process_queue.get()


        print(p.exitcode)

        return cls
Пример #42
0
    def get(self, url):
        self._randomized_wait()
        response=None
        try:
            response= self.http.request('GET',url,timeout=settings.TIME_OUT)
            self.bad_count=0
        except:
            self.bad_count+=1

            # wait and sleep until we get an answer
            if self.bad_count >= settings.REQUEST_EXCEPTION_UNTIL_TEST_CONNECTION:
                while(not self.testInternet()):
                    Logger.info('Waiting for internet')
                    time.sleep(2)

                response= self.http.request('GET',url,timeout=settings.TIME_OUT)
                self.bad_count=0

        return response
Пример #43
0
    def scrape(self):
        home=self.http.get(dmoz_home)

        home_page_links=self._scrapeHomeAndGetLinks(home.data)

        #visit each link in homepage and dig down
        #for url in home_page_links:
        i=0
        while i<settings.NUM_RANDOM_WEBPAGE:
            result=self._scrapPage(home_page_links[random.randint(0,len(home_page_links)-1)])

            if result is not None and MongoDB.get_url_object(result['url']) is None:
                i+=1
                try:
                    page=utf_8_safe_decode(self.http.get(result['url']).data)

                    MongoDB.save_modify_url(page=page,**result)

                    Logger.info("Completed: "+result['url'])
                except Exception as ex:
                    Logger.error(ex)
Пример #44
0
    def scrape(self,url,parent):
        Logger.debug('Starting url scrap for {}'.format(url))
        config.last_url_and_parent=url+', {}'.format('' if parent==None else parent)

        new_url=base_util.unreplace_dot_url(url)

        response=self.http.get(new_url)
        Logger.debug('Got URL')
        if not hasattr(response,'data') and new_url.startswith('www.'):
            new_url=new_url.replace('www.','http://')

            response=self.http.get(new_url)

            if not hasattr(response,'data'):
                new_url=new_url.replace('http://','http://www.')
                response=self.http.get(new_url)


        if hasattr(response,'data'):
            body=base_util.utf_8_safe_decode(response.data)

        else:
            Logger.error('No data associated with '+new_url)
            raise AttributeError(new_url+':::No data')

        return body,new_url
Пример #45
0
def calculate_similarity():
    q = DBQueue("similarity_queue")
    genre_meta_data = GenreMetaData.objects.order_by("url")[q.get() :]

    # init the Analytics
    analytics_coll = col.Analytics()

    if analytics_coll.select(name=ANALYTICS_NAME).find_one() is None:
        analytics_coll.create(
            alexa_total=0,
            edit_distance_count=0,
            total_edit_distance=0,
            alexa_match=0,
            name=ANALYTICS_NAME,
            alexa_genre_length=0,
        )

    urls = set()
    # calculate the similar on a document to document basis
    for genre_meta in genre_meta_data:

        if genre_meta["url"] not in urls:
            urls.add(genre_meta["url"])

            Logger.info("Doing genre for url: {}".format(genre_meta["url"]))

            similarity_res = _calculate_similarity_document(genre_meta)

            analytics_obj = analytics_coll.select(name=ANALYTICS_NAME).find_one()

            for k in similarity_res.keys():
                similarity_res[k] += analytics_obj[k]

            analytics_coll.select(name=ANALYTICS_NAME).update(**similarity_res)
            q.increment()

    print("URL has a unique percent of {}".format(len(urls) / len(genre_meta_data) * 100))
Пример #46
0
                headers[key] = self.headers[key]
            response_tuple = conn.connect(str(self.command), str(self.path), headers, body)
            self.send_response(response_tuple[1])
            try:
                for key in response_tuple[3]:
                    self.send_header(key, response_tuple[3][key])
            except:
                import traceback
                traceback.print_exc()
            self.end_headers()
            self.wfile.write(response_tuple[4])
        except:
            self.send_error(500)


class ThreadServer(ThreadingMixIn, HTTPServer):
    pass
        
if __name__ == '__main__':
    # server = ProxyServer()
    # server.setDaemon(True)
    # server.start()
    # while True:
    #     pass

    server = ThreadServer(('localhost', 7890), HttpHandler)
    Logger.log('Starting server at 7890')
    server.serve_forever()

    # h = HttpConnection.get_single_instance()
    # h.connect('GET', 'http://www.zhihu.com', None)
Пример #47
0
def lda(lda,train_set,n_top_words):
    """
    Conduct lda with the train_set
    """

    lda.fit(train_set.X)
    vocab=None

    topic_word=lda.topic_word_
    for i, topic_dist in enumerate(topic_word):
        topic_words = np.array(vocab)[np.argsort(topic_dist)][:-n_top_words:-1]
        print('Topic {}: {}'.format(i, ' '.join(topic_words)))

if __name__=="__main__":
    clustering_logger=Logger()
    """
    Unsupervised Clustering bootstrap
    """

    mapping={"short_genres":"short_genre","index":"ref_index","bow":"attr_map"}

    #s=SourceMapper(URLBow.objects(),mapping)
    X_pickle_path=os.path.join(PICKLE_DIR,"X_summary_pickle")
    y_pickle_path=os.path.join(PICKLE_DIR,"y_summary_pickle")
    ref_index_pickle_path=os.path.join(PICKLE_DIR,"refIndex_summary_pickle")

    mapping={"short_genres":"short_genre","index":"ref_index","bow":"attr_map"}

    #SETTING UP LABEL
    settings=LearningSettings(type="unsupervised",dim_reduction="chi",feature_selection="summary",num_attributes=10000)
Пример #48
0
    def __init__(self,type_queue,position=0):
        self.queue=MetaData(type=type_queue)

        if self.queue.find_one()==None:
            Logger.info('Queue of Type: {} does not exist in database, creating'.format(type_queue))
            self.queue.create(type=type_queue,position=position).save()
Пример #49
0
# Get Config
app.config.from_object(DEFAULT_CONFIG)
if os.getenv('PROJECT_AWESOME_FLASK_SETTINGS'):
    app.config.from_envvar('PROJECT_AWESOME_FLASK_SETTINGS')

# Read LOCAL_DB and PROD from environment variable 
# (this is set on heroku for production)
if os.getenv('LOCAL_DB'):
    app.config['LOCAL_DB'] = (os.getenv('LOCAL_DB') == "true")
if os.getenv('PROD'):
    app.config['PROD'] = (os.getenv('PROD') == "true")

#If we are using the production database
if app.config['LOCAL_DB'] == False:
  Logger.info(" ********   Using the Product DB - be careful!   ******** ")

# Print current status of the config variables
Logger.info("PROD=" + str(app.config['PROD']) +
            "  DEBUG=" + str(app.config['DEBUG']) +
            "  LOCAL_DB=" + str(app.config['LOCAL_DB']))

SITE_DOMAIN = "http://www.goprojectawesome.com"
if app.config['PROD'] == False:
    SITE_DOMAIN = "http://127.0.0.1:5000"

#
# Add methods to Jinja2 context for creating URLs
#
def full_url_for(*args, **kwargs):
    '''Wrapper for url_for that prepends the domain to the path'''
Пример #50
0
    def update_url(url,new_url):
        url=replace_dot_url(url)
        new_url=replace_dot_url(new_url)

        Logger.info('Updating {} to {}'.format(url,new_url))
        return URLToGenre.objects(url=url).update(url=new_url)
Пример #51
0
def collect_bad_url():
    """
    Make bows of websites in the bad url list

    :return:
    """

    queue=DBQueue_old("genre_bow")

    #don't trust anything
    summarizer=Summarizer()
    bow=BagOfWords()
    short_genre_to_genre=coll.ShortGenre()
    url_to_bow=coll.URLBow()
    start_pos=queue.get()

    for c,line in enumerate(open("bad_url_summarize_bow.txt")):
        if c<start_pos:
            continue

        url=line.split(" ")[1].split(":::")[0]

        try:
            print('New url {} num: {}'.format(url,c))

            url_obj=coll.URLToGenre().select(url=url).find_one()

            if not hasattr(url_obj,"original") or not url_obj["original"]:
                print("Not original")
                continue

            #request page anyways, most of the bad pages are due to bad pagess
            data=Request().get_data(base_util.unreplace_dot_url(base_util.unreplace_dot_url(url_obj["url"])))

            if data is None:
                raise Exception('url {} No has page'.format(url))
            else:
                if not hasattr(url_obj,"page") or len(data)>len(url_obj["page"]):
                    print("updating data")
                    data=base_util.utf_8_safe_decode(data)

                    if not hasattr(url_obj,"page"):
                        #save page if the new page is significantly bigger than the old one
                        url_obj.save(page=data)

                    else:
                        url_obj.update(page=data)
                    url_obj.reload()

            if len(data) > len(url_obj.page):
                raise Exception("Inconsistency b/w data and page data")



            #url_obj=repair.genre_to_genre_data(url_obj.document)

            #get genre strings
            #register the genre with the short genres for faster retrieval
            genre_string_list=[]
            for g in url_obj.genre:
                normalized_string=base_util.normalize_genre_string(g["genre"])
                genre_string_list.append(normalized_string)
                short_genre_to_genre.select(short_genre=normalized_string).update(upsert=True,add_to_set__genres=g)

            Logger.info("Getting bow rep")
            #get BOW representation
            bow_dict=bow.get_word_count(summarizer.summarize(url_obj.page if isinstance(url_obj.page,str) else base_util.utf_8_safe_decode(url_obj)))

            if len(bow_dict)<20:
                raise Exception("Words less than 20")

            Logger.info("Update count:"+str(bow_dict))


            #store the url bow in urlbow table
            if not url_to_bow.select(url=url_obj["url"]).find_one():
                url_to_bow.create(url=url_obj["url"],bow=bow_dict,short_genres=genre_string_list)

            else:
                print('Exists bow url number {}'.format(url))

            queue.increment()
        except Exception as ex:
            Logger.error(url_obj['url']+":::"+str(ex),"C:/Users/Kevin/Desktop/GitHub/Research/Webscraper/bad_url_summarize_bow1.txt")
Пример #52
0
def Worker_print(string):
  Logger.debug(string)
Пример #53
0
from data.training_testing import MultiData
from data.util import unpickle_obj
from classification.classification import feature_selection
from functools import partial
from util.base_util import normalize_genre_string
from util.genre import filter_genres
from util.Logger import Logger
from data.X_y import match_sets_based_on_ref_id
from classification.classification import classify, load_training_testing
import operator as op
from classification.results import ResCrossValidation

__author__ = 'Kevin'


supervised_logger=Logger()

genre_dict={'Sports': 8757,
            'Business': 8553,
            'Shopping': 6920,
            'Computers': 6245,
            'Arts': 6165,
            'Society': 5841,
            'Recreation': 5770,
            'Health': 5418,
            'Science': 3662,
            'Games': 2767,
            'Reference': 2219,
            'Kids': 2142,
            'News': 1954,
            'Regional': 1949,