Exemplo n.º 1
0
def _getFeeds(feeds, feed_list):
    """Collect feed data from request response."""

    # If feeds exist
    feeds = feeds['feed'] if 'feed' in feeds else feeds

    if 'data' in feeds:

        for feed in feeds['data']:

            Log.info('Extracting feed data: ' + feed['id'])

            message = feed['message'] if 'message' in feed else ''
            link = feed['link'] if 'link' in feed else ''
            shares = _getShares(feed)
            comments_count = _getComments(feed)
            reactions_count = _getReactions(feed)

            feed_list.append(
                (feed['id'], message, link, shares, feed['created_time'],
                 comments_count, reactions_count))

        # Check if feed has next or not
        if 'paging' in feeds and 'next' in feeds['paging']:
            feeds_url = feeds['paging']['next']
            feed_list = _getFeeds(_getRequest(feeds_url), feed_list)

        return feed_list
Exemplo n.º 2
0
def rp5_download(location,
                 beginDate,
                 endDate,
                 source,
                 fileName=None,
                 dirPath=None,
                 extract=True):

    # Mandatory arguments to rp5Interface.rp5_download API :
    #    location -> (string) Name of the place to download the weather data for.
    #    beginDate -> (string) Start Date in format - dd.mm.yyyy
    #    endDate -> (string) End Date in format - dd.mm.yyyy
    #    source -> (string) Data source from rp5.in portal. either "metar" or "archive"

    # Optional arguments to rp5Interface.rp5_download API :
    #   FileName -> (string) Name of file without any extension
    #   dirPath -> (string) Full directory path where file is to be donwloaded. default - rp5_ddmmYYYY
    #   extract -> (bool) Weather to extract the downloaded .xls.gz file. Default - True

    if fileName is not None:
        fileName = fileName + '.xls.gz'
    rp5 = RP5Interface(location=location,
                       beginDate=beginDate,
                       endDate=endDate,
                       source=source,
                       fileName=fileName,
                       dirPath=dirPath)
    fullPath = rp5.download_date()
    if extract:
        fileFullPath = fullPath.rsplit('.', 1)[0]
        with gzip.open(fullPath, 'rb') as f_in:
            with open(fileFullPath, 'wb') as f_out:
                shutil.copyfileobj(f_in, f_out)
                Log.info("Extracted file at - {}".format(fileFullPath))
Exemplo n.º 3
0
    def setTimeInterval(self, time_delta=1):
        """
        Set time interval in days for data extraction.
        DEFAULT: 1 day
        """

        self.time_delta = timedelta(days=time_delta)
        Log.info('Time delta: %i day(s)' % time_delta)
Exemplo n.º 4
0
 def _browser_exit(self, exit_status=0):
     if self._browser is not None:
         self._browser.close()
         if exit_status is not 0:
             Log.error(self._exit_msg)
         else:
             Log.info(self._exit_msg)
         sys.exit(exit_status)
Exemplo n.º 5
0
 def _browser_create(self):
     try:
         if self._browser is not None:
             Log.info("Closing Existing browser instance")
             self._browser.close()
         self._browser = webdriver.Firefox()
         Log.info("Creating browser instance ...")
     except:
         Log.fatal("Some error occured while creating browser instance!")
Exemplo n.º 6
0
    def init(self):
        """Start crawling."""

        Log.info('Crawling initiated...')
        data = self._getTarget()

        df = pd.DataFrame(data)
        df.to_json('data/data.json')

        Log.info('Crawling finished!')
Exemplo n.º 7
0
 def _select_data_source(self):
     if self._browser is not None:
         if (self._rp5source.lower() == "metar"):
             Log.info("Getting METAR data")
             dataLinkElem = self._browser.find_element_by_id('metar_link')
         elif (self._rp5source.lower() == "archive"):
             Log.info("Getting ARCHIVE data")
             dataLinkElem = self._browser.find_element_by_id('archive_link')
         else:
             self._exit_msg = "Something went wrong, exiting!"
             self._browser_exit(1)
         dataLinkElem.click()
Exemplo n.º 8
0
def run_listener(q):
    log = Log(__name__,level='INFO')
    log.info('Run AMQP listener until ctrl-c input\n {0}'.format(q))
    def thread_func(worker,id):
        worker.start()
    def signal_handler(signum,stack):
        sys.exit(0)
    signal.signal(signal.SIGINT, signal_handler)
    worker = AMQPWorker(queue=q)
    task = WorkerThread(worker,'amqp')
    tasks = WorkerTasks(tasks=[task], func=thread_func)
    tasks.run()
    tasks.wait_for_completion(timeout_sec=-1)
Exemplo n.º 9
0
    def download_date(self):
        self._browser_create()
        self._search_loc()
        self._select_loc()

        DataSource.setSource(self._rp5source)
        self._rp5source = DataSource.getSource()

        self._select_data_source()

        if (self._rp5source.lower() == "metar"):
            downTabElem = self._browser.find_element_by_id('tabMetarDLoad')
        elif (self._rp5source.lower() == "archive"):
            downTabElem = self._browser.find_element_by_id('tabSynopDLoad')
        else:
            self._exit_msg = "Rp5 Source is not from available options, pls retry, exiting!"
            self._browser_exit(1)
        downTabElem.click()
        time.sleep(3)

        beginDateElem = self._browser.find_element_by_id('calender_dload')
        beginDateElem.clear()
        Log.info("Start Date is - {}".format(self._beginDate))
        beginDateElem.send_keys(self._beginDate)

        endDateElem = self._browser.find_element_by_id('calender_dload2')
        endDateElem.clear()
        Log.info("End Date is - {}".format(self._endDate))
        endDateElem.send_keys(self._endDate)

        generateDownloadElem = self._browser.find_elements_by_class_name(
            'archButton')[1]
        generateDownloadElem.click()
        time.sleep(5)

        try:
            downloadElem = self._browser.find_element_by_link_text('Download')
            fileUrl = downloadElem.get_attribute('href')
            Log.info("File URL is - " + fileUrl)
        except:
            self._exit_msg = "Download link not found, exiting ..."
            self._browser_exit(1)

        fullPath = download_data(fileUrl, self._fileName, self._dirPath)
        Log.info("Downloading Data to {} ...".format(fullPath))

        Log.info(
            "rp5 data downloaded successfully for {} location from {} to {}".
            format(self._loc, self._beginDate, self._endDate))
        self._browser.close()
        return fullPath
Exemplo n.º 10
0
class DB:
    def __init__(self):
        self.log = Log()
        try:
            self.conn = connect(host=host,
                                port=port,
                                user=user,
                                password=password,
                                db=db,
                                charset='utf8',
                                cursorclass=cursors.DictCursor)
        except OperationalError as e:
            self.log.error("Mysql Error %d: %s" (e.args[0], e.args[1]))

    #封装select语句
    def select(self, table_name, table_data):
        if table_data['where'] == '':
            #print("无where子句")
            real_sql = "select " + table_data['fields'] + " from " + table_name
        else:
            keys = table_data['where']
            #print(keys)
            str = 'where'
            for key, value in keys.items():
                str = str + ' ' + key + ' =' + ' ' + '\'' + value + '\'' + 'and'
            str = str[:-3]
            real_sql = "select " + table_data[
                'fields'] + " from " + table_name + ' ' + str

        self.log.info(real_sql)
        #print(table_name)
        cur = self.conn.cursor()
        cur.execute(real_sql)
        results = cur.fetchall()
        return results

    #封装insert语句
    def insert(self, table_name, table_data):
        keys = table_data
        #print(keys)
        str1, str2 = '', ''
        for key, value in keys.items():
            str1 = str1 + key + ','
            str2 = str2 + value + ','
        str1 = str1[:-1]
        str2 = str2[:-1]
        real_sql = "insert into " + table_name + " (" + str1 + ")" + " values " + "(" + str2 + ")"
        self.log.info(real_sql)
        #print(table_name)
        cur = self.conn.cursor()
        cur.execute(real_sql)
Exemplo n.º 11
0
  def __init__(self, gse, merge_cols=True, percentile=.75):
    """Initialize filter. Requires populated gse.

    Args:
      gse: GSE instance associated with row_iter
      merge_cols: bool if to merge columns if able
      percentile: float 0<x<=1 of top percent by std to keep
    """
    # 1. Require that GSE is populated and is of correct type.
    # ==========
    if not gse.populated:
      raise geo.NotPopulatedError, "%s must be populated to filter rows." % gse
    if gse.type != "eQTL":
      raise geo.StudyTypeMismatch, "%s must be type 'eQTL', not '%s'." % \
        (gse, gse.type)

    # 2. Set Attributes.
    # ==========
    self.gse = gse
    self.col_titles = self.gse.col_titles[:]
    self.col_map = None
    self.rows_filtered = []
    self.rows_per_gene = {}
    self.row_stats = {}
    self.merge_cols = merge_cols
    self.percentile = percentile
    
    # 3. Get column map for column merging.
    # ==========
    n_samples = len(self.gse.samples)
    n_uniques = len(self.gse.subject_gsms)

    # If there are more samples than unique subjects, then create column map.
    if self.merge_cols and n_samples > n_uniques:
      self.col_map = self._make_col_map()
      rx_str = self.gse.parameters['rx_gsm_subject_str']
      Log.info(("Created column merge map for %s (%d samples to %d subjects)" +\
        " with rx '%s'") % \
        (self.gse, n_samples, n_uniques, rx_str))
      # Verify that column merge map is reasonable (num uniques + 1 for ID column)
      if len(self.col_map) != n_uniques + 1:
        Log.warning("Column merge map has %d classes, expected %d in %s." % \
                    (len(self.col_map), n_uniques, self))
        
    # No column merging scheme can exist. Do not create a col_map.
    else:
      # Retrieve the regular expression used
      rx_str = self.gse.parameters['rx_gsm_subject_str']
      Log.info("No column merge map created for %s using rx '%s'. Merge_cols flag is %s" % \
        (self.gse, rx_str, self.merge_cols))
Exemplo n.º 12
0
def _getRequest(url):
    """Send HTTP request to url and return the response."""

    try:
        request_result = requests.get(url, headers={
            'Connection': 'close'
        }).json()
        Log.info('Sent request to: %s' % url)
        time.sleep(0.01)

    except:
        Log.error('URL not found!')
        sys.exit()

    return request_result
Exemplo n.º 13
0
 def __init__(self,
              location,
              beginDate,
              endDate,
              source,
              fileName=None,
              dirPath=None):
     Log.info("Selenium Version - {}".format(selenium.__version__))
     self._loc = location
     self._beginDate = beginDate
     self._endDate = endDate
     self._exit_msg = ""
     self._rp5source = source
     self._fileName = fileName
     self._dirPath = dirPath
     self._browser = None
Exemplo n.º 14
0
def run_listener(q, timeout_sec=3):
    log = Log(__name__, level='INFO')
    log.info('Run AMQP listener until ctrl-c input\n {0}'.format(q))

    def thread_func(worker, id):
        worker.start()

    def signal_handler(signum, stack):
        sys.exit(0)

    signal.signal(signal.SIGINT, signal_handler)
    worker = AMQPWorker(queue=q)
    task = WorkerThread(worker, 'amqp')
    tasks = WorkerTasks(tasks=[task], func=thread_func)
    tasks.run()
    tasks.wait_for_completion(timeout_sec)
Exemplo n.º 15
0
class MyUnit(unittest.TestCase):
    # def setUpClass(cls):
    #     cls.db = DB()
    #     # cls.log = Log()
    #     # cls.excel = Excel()
    #
    # def tearDownClass(cls):
    #     cls.log.info('success')

    def setUp(self):
        self.db = DB()
        self.log = Log()
        self.excel = Excel()

    def tearDown(self):
        self.log.info(self.result)
Exemplo n.º 16
0
def _processFeed(feed):
    """Turns feed content into dictionary."""

    # Log process
    Log.info('Processing feed: ' + feed[0])

    # Create feed content dictionary
    feed_content = {
        'id': feed[0],
        'message': feed[1],
        'link': feed[2],
        'shares': feed[3],
        'created_time': feed[4],
        'comments_count': feed[5],
        'reactions_count': feed[6]
    }

    return feed_content
Exemplo n.º 17
0
  def read(self):
    """Return a file-pointer-like object to this resource.
    
    Returns:
      iter: file-pointer-like str line iterator (uncompressed)
    """
    # Attempt to retreive from cache if possible.
    if self.read_cache:
      fp = self._fetch_from_cache()
    else:
      fp = None
    if fp:
      Log.info("Fetched %s from cache." % self.url)
      return fp
    else:
      Log.info("Downloading %s from network." % self.url)
    
    # From HTTP, Fetch request and populate self with response.
    http_fp = self.fetch()
    # If compressed, wrap http handle in a gzip decompressor.
    if self.headers and "content-encoding" in self.headers and \
        self.headers["content-encoding"] == "gzip":
      zip_fp = gzip.GzipFile(fileobj=http_fp)
      fp = zip_fp
    else:
      fp = http_fp
      
    # Return download iterator from decompressed HTTP handle.
    if self.write_cache:
      cache = self.cache_name
    else:
      cache = None

    # Get expected download size in bytes.
    if self.headers and 'content-length' in self.headers:
      try:
        size = int(self.headers['content-length'])
      except:
        size = None
    else:
      size = None
      
    return DownloadIter( \
      fp, cache=cache, size=size, report=self.report_status, finalize=self.finalize)
Exemplo n.º 18
0
def run_server(addr,port):
    global task
    log = Log(__name__,level='INFO')
    log.info('Run httpd server until ctrl-c input')
    def shutdown(task):
        task.worker.stop()
        task.running = False
    def start(httpd, id):
        httpd.start()
    def signal_handler(signum,stack):
        log.info('Sending shutdown to httpd server')
        thread.start_new_thread(shutdown, (task,)) 
    signal.signal(signal.SIGINT, signal_handler)
    server = Httpd(port=int(port),address=addr)
    task = WorkerThread(server,'httpd')
    worker = WorkerTasks(tasks=[task], func=start)
    worker.run()
    worker.wait_for_completion(timeout_sec=-1) # run forever
    
Exemplo n.º 19
0
class SynchronizeData():
    def __init__(self):
        # self.time = datetime.datetime.strftime(datetime.datetime.now(), '%Y-%m-%dT%H:%M:%SZ')
        # self.ssh = paramiko.SSHClient()
        self.log = Log()
        self.db = pymysql.connect(host="172.16.129.40",
                                  port=3306,
                                  user="******",
                                  password="******",
                                  charset='utf8')
        self.cursor = self.db.cursor()

    # def connection_service(self):
    #     '''连接linux服务器'''
    #     self.ssh.set_missing_host_key_policy(paramiko.AutoAddPolicy()) # 允许连接不在know_hosts文件中的主机
    #     self.ssh.connect(hostname='172.16.129.40', port=8800, username='******', password='******') # 连接服务器
    #     sftp_client = self.ssh.open_sftp()
    #     mysqld_log = sftp_client.open("/var/log/mysqld.log")  # 文件路径
    #     l = mysqld_log.readlines()  # 获取所有日志内容
    #     row_list = [x.strip() for x in l if x.strip() != '']
    #     error_list = []
    #     for log_list in row_list:
    #         if '1733' in log_list:
    #             error_list.append(log_list)
    #     return error_list

    def synchronize(self):
        self.cursor.execute("show slave status")
        result = self.cursor.fetchall()  # 获取字段信息
        self.db.commit()
        try:
            if 'no' in result[0]:
                print("数据链接断开,准备重新启动同步程序")
                self.log.info("数据同步有问题,准备重新启动同步程序")
                self.cursor.execute("start slave")
                self.db.commit()
            else:
                print("同步数据正常,状态正常")
                self.log.info("同步数据正常,无错误日志")
            # self.ssh.close()  # 关闭
        except Exception as e:
            print(e)
            self.db.rollback()  # 出现异常,回滚
Exemplo n.º 20
0
    def _search_loc(self):
        if self._browser is not None:
            self._browser.get("http://rp5.in/")

            searchElem = self._browser.find_element_by_id('searchStr')
            searchElem.clear()
            Log.info("Searching for location - {}".format(self._loc))
            searchElem.send_keys(self._loc)

            searchButtonElem = self._browser.find_element_by_id('searchButton')
            searchButtonElem.click()

            try:
                self._browser.find_element_by_class_name('searchResults')
            except selenium.common.exceptions.NoSuchElementException:
                self._exit_msg = "No Search results, Pls try searching with other location"
                self._browser_exit(1)
            except:
                self._exit_msg = "some other error happened while searching"
                self._browser_exit(1)
Exemplo n.º 21
0
    def _getTarget(self):
        """Get data from target."""

        # Log task start time
        start_time = time.time()
        Log.info('Task started.')

        # Set time interval
        since = datetime.strftime(datetime.now() - self.time_delta, '%Y-%m-%d')
        until = datetime.strftime(datetime.now(), '%Y-%m-%d')

        # Get list of feed id from target
        feeds_url = 'https://graph.facebook.com/v%s/' % self.version + self.target + '/?fields=feed.since(' + since + ').until(' + until + '){id,message,link,shares,created_time,comments.summary(true), reactions.summary(true)}&' + self.token
        feed_list = _getFeeds(_getRequest(feeds_url), [])

        # Get message, comments and reactions from feed
        data = []
        if feed_list:
            data = [_processFeed(feed) for feed in feed_list]

        # Get time cost
        cost_time = time.time() - start_time

        # Log task end time and time cost
        Log.info('Task finished.')
        Log.info('Time Cost:  ' + str(cost_time))

        return data
Exemplo n.º 22
0
def run_server(addr, port):
    global task
    log = Log(__name__, level='INFO')
    log.info('Run httpd server until ctrl-c input')

    def shutdown(task):
        task.worker.stop()
        task.running = False

    def start(httpd, id):
        httpd.start()

    def signal_handler(signum, stack):
        log.info('Sending shutdown to httpd server')
        thread.start_new_thread(shutdown, (task, ))

    signal.signal(signal.SIGINT, signal_handler)
    server = Httpd(port=int(port), address=addr)
    task = WorkerThread(server, 'httpd')
    worker = WorkerTasks(tasks=[task], func=start)
    worker.run()
    worker.wait_for_completion(timeout_sec=-1)  # run forever
Exemplo n.º 23
0
def detect_system_language(log: logger.Log):
    db: Dict[str, Union[bool, list, str]] = utils.access_db()

    if not db['has_asked_language']:
        language_codes: dict = {
            read_localization_files()[lang]['code']: lang
            for lang in langs[1:]
        }
        system_locale: str = locale.windows_locale[
            ctypes.windll.kernel32.GetUserDefaultUILanguage()]
        system_language_code: str = system_locale.split('_')[0]
        is_brazilian_port: bool = system_locale == 'pt_BR'

        if system_language_code in language_codes or is_brazilian_port:
            system_language: str = language_codes[system_language_code]
            can_localize: bool = True
        else:
            if system_language_code != 'en':
                log.error(f"System locale {system_locale} is not localizable")

            can_localize = False

        if can_localize and settings.get('language') != system_language:
            log.info(
                f"System language ({system_locale}, {system_language}) != settings language ({settings.get('language')}), asking to change"
            )
            db['has_asked_language'] = True
            utils.access_db(db)
            system_language_display: str = 'Português Brasileiro' if is_brazilian_port else system_language
            # this is intentionally not localized
            changed_language: str = messagebox.askquestion(
                f"TF2 Rich Presence {launcher.VERSION}",
                f"Change language to your system's default ({system_language_display})?"
            )
            log.debug(f"Changed language: {changed_language}")

            if changed_language == 'yes':
                settings.change('language', system_language)
Exemplo n.º 24
0
class Runner(RunnerInterface):
    def __init__(self):
        super().__init__()
        self.store = Store()
        self.torr_searcher = TorrSearch()
        self.loop = asyncio.get_event_loop()
        self.queue = asyncio.Queue()
        self.bot = TgBot(self.queue)
        self.log = Log(__name__)

    async def background_updater(self):
        await asyncio.sleep(5.0)
        self.log.debug(f"Start update after 5 seconds")
        while True:
            await asyncio.sleep(10)
            movies = await self.store.get_movies()
            self.log.debug(f"Search for {movies}")
            for movie in movies:
                self.log.debug(f"Find '{movie['title']}' for users: {movie['watchers']}")
                result = await self.torr_searcher.search_word(movie['title'])
                self.log.debug(f"Result: {result}")
                if result:
                    message = self.format_films(movie['title'], result)
                    for watcher in movie['watchers']:
                        await self.bot.send_message(watcher, message)
                    await self.store.create_or_update_movie(movie=movie['title'], is_active=False)

    @staticmethod
    def format_films(search_str, films):
        msg = f'По запросу: "{search_str}" найдены следущие раздачи:\n'
        for i in films[:6]:
            msg += f"---\n{i['date']}  |  {i['size']}  |  {i['name']}\n"
        return msg

    async def process_messages(self):
        while True:
            item = await self.queue.get()
            if item is not None:
                self.log.info(item)
                if not isinstance(item, dict) or 'type' not in item.keys():
                    self.log.error(f"Get incorrect object from tgbot: {item}")
                if item['type'] == 'start':
                    await self.store.create_or_update_user(item['content']['from'])
                elif item['type'] == 'deactivate':
                    await self.store.create_or_update_user(item['content']['from'], is_active=False)
                elif item['type'] == 'ignore':
                    watcher = item['content']['from']['id']
                    movie = item['content']['text'][8:].strip()
                    self.log.debug(f"User {watcher} trying ignore: {movie}")
                    await self.store.ignore_movie(movie=movie, watcher=watcher)
                    answer = f"You are unsubscribed from '{movie}' search."
                    await self.bot.send_message(watcher, answer)
                elif item['type'] == 'list':
                    watcher = item['content']['from']['id']
                    movies = await self.store.get_movies(telegram_id=watcher)
                    results = '\n'.join([i['title'] for i in movies])
                    answer = f"You are waiting for:\n" \
                             f"{results}"
                    await self.bot.send_message(watcher, answer)
                elif item['type'] == 'message':
                    movie = item['content']['text'].strip()
                    watcher = item['content']['from']['id']
                    if movie.startswith('/'):
                        answer = f"Incorrect command. Use /help for additional information."
                    else:
                        if await self.store.get_users(telegram_id=watcher):
                            await self.store.create_or_update_movie(movie=movie, watcher=watcher)
                            answer = f"Title '{movie}' was added"
                        else:
                            answer = f'You need /start chatting with bot before make requests.'
                    await self.bot.send_message(watcher, answer)
                else:
                    self.log.error(f"Unknown type from item: {item}")

    def prepare(self):
        self.loop.create_task(self.process_messages())
        self.loop.create_task(self.background_updater())

    def run(self):
        self.prepare()
        # Bot exec run loop forever
        self.bot.run()

    async def search_digital(self, keywords):
        pass

    async def search_bd(self):
        pass

    async def search_torrent(self, keywords):
        return await self.torr_searcher.search_word(keywords)
Exemplo n.º 25
0
  def close(self):
    """Close any open file pointers, close and finalize cache file.
    """
    # Ignore repeated calls to close()
    if self.closed:
      Log.info("Redundant call to close(), Ignored for %s." % self)
      return
    else:
      Log.info("Closing %s..." % self)

    # Handle finalize requests to complete download to buffer.
    if self.finalize:
      if not self.completed and self.cache:
        Log.info("Finalizing download of %s." % self)
        # Read remaining buffer unconditionally. Use iterator if reporting.
        if self.report:
          while True:
            try:
              self.next()
            except StopIteration:
              break
        else:
          self.read()
        # If not closed in previous read(), try another read().
        if not self.closed:
          # This closes self since the previous read flushed the buffer.
          self.read()
        if not self.closed:
          Log.warning("Close sequence not completed as expected for %s." % self)
        # Exit: prior reads in the finalize process already closed self.
        return

    # self.buffer.close() causes bugs with FTP. Python sockets clean up after 
    #   themselves in garbage collection, so to remove the reference to buffer
    # self.buffer.close()
    self.buffer = None
    self.fp_out.close()

    if self.completed:
      Log.info("Download complete. %d bytes read." % (self.bytes_read))
      # Finalize cache.
      if self.cache:
        os.rename(self.tmp_filepath, self.dest_filepath)
        Log.info("Cache finalized as '%s'." % (self.dest_filepath))
    else:
      Log.info("Download closed before completion. %d bytes read." % \
               (self.bytes_read))
      # Flush cache.
      if self.cache:
        os.remove(self.tmp_filepath)
        Log.info("Incomplete cache '%s' deleted." % (self.tmp_filepath))
        
    # Flag self as closed to prevent redundant .close() calls.
    self.closed = True
Exemplo n.º 26
0
    """信号处理,退出程序
    """
    tornado.ioloop.IOLoop.instance().stop()
    logger.info('Msg-delivery stopped!')

signal.signal(signal.SIGTERM, quit_app)
signal.signal(signal.SIGINT,  quit_app)
if __name__ == "__main__":
    #init
    port = 8776
    includes = None
    opts, argvs = getopt.getopt(sys.argv[1:], "c:p:h")
    for op, value in opts:
        if op == '-c':
            includes = value
        elif op == '-p':
            port = int(value)
        elif op == '-h':
            Usage()
    if not includes:
        Usage()
    confs = init_application(includes)
    logger.info("Msg-delivery initialized!")

    #main
    timer = timer_procedure.msgTimer()
    application = tornado.web.Application([(r"^/([^\.|]*)(?!\.\w+)$",MainHandler,dict(timer=timer)),], log_function=log_request)
    application.listen(port) 
    logger.info("Msg-delivery start to Loop!")
    tornado.ioloop.IOLoop.instance().start()
Exemplo n.º 27
0
    log.info("Starting Media Center Version", version, "(" + date + ")")


def manageLogFileSize(max_size):

    # Simple log file rollover when > 50K bytes
    if os.path.exists(log.file):
        if os.stat(log.file).st_size > int(max_size):
            log.rotateLogFile()


if __name__ == '__main__':

    logVersionAndDate()
    preferences = Preferences()
    preferences.readConfig()

    manageLogFileSize(preferences.log.max_size)

    # Start the chrome browser in the background.
    browser_pid = startChrome()
    #print("BROWSER:", browser_pid)
    websocket_server.browser_pid = browser_pid
    #print("'startChrome' is commented out for testing purposes")

    # Start the websocket server (host)
    websocket_server.startServer()

    log.info("Main Exit")
    sys.exit()
Exemplo n.º 28
0
    def __init__(self, token, version):
        """Crawler instance takes version and token as parameters."""

        self.version = version
        self.token = token
        Log.info('New Crawler: ver. %s' % version)
Exemplo n.º 29
0
  def get_rows(self):
    """Return filtered row iterator.
    CLEAN THIS UP
    It may be best to break this into multiple filters?
    Fix to return [str]
    
    Returns:
      *[str] of filtered rows of data split by columns
    """
    Log.info("Initiated filter %s for rows of %s" % (self, self.gse))
    if self.col_map:
      Log.info("self.col_map exists. Merge %d to %d columns for %s" % \
               (len(self.col_titles), len(self.col_map), self))
    else:
      Log.info("No col_map. Will not merge %d columns for %s." % \
               (len(self.col_titles), self))

    # 0. Determine best gene name column in case GENE_SYMBOL does not exist.
    # ==========
    gene_symbol_name = None
    # Traverse column names in preferred order.
    for name in geo.GPL.EQTL_GENE_NAME_LIST:
      # Skip columns without assignments. Continue
      if self.gse.platform.special_cols[name] is None:
        continue
      # Choose the first column that has an acceptable assignment. Break.
      else:
        actual_column_name = self.gse.platform.special_cols[name]
        gene_symbol_name = name
        break
    # Verify that a column was chosen to identify the row.
    if gene_symbol_name:
      Log.info("Selected column '%s=>%s' to best represent gene name for %s." %\
        (gene_symbol_name, actual_column_name, self.gse.platform))
    else:
      raise MalformedFilterError, "Cannot select gene symbol column from %s" % \
        (self.gse.platform)
    
    # 1. Update column titles accounting for merged columns.
    # ==========
    if self.col_map:
      self.col_titles = self._merge_cols(self.col_titles, merge_titles)
      
    # Insert generated column titles (AFTER merging columns)
    # self.col_titles[0] should always be "ID_REF"
    col_titles_prefix = ["ID_REF", gene_symbol_name, "NUM_VALUES", "MEAN", "STD"]
    self.col_titles = col_titles_prefix + self.col_titles[1:]
    Log.info("Added %s, NUM_VALUES, MEAN, STD to col titles for %s." %\
             (gene_symbol_name, self))
             
    # Open new temporary file. XXX RENAME
    filepath = temp_file_name("%s.rowmerge" % self.gse.id)
    fp_out = open(filepath, "w")

    # 2: @DATAPASS 1: Merge columns, add gene symbol, filter non-genes.
    # ==========
    Log.info(("Started filter 1 in %s for %s: find and add gene, merge cols. " +
             "(This may take a while.)") % (self, self.gse))
      
    num_rows = 0
    for row in self.gse.get_rows():
      # TODO: Add status reporting to console
      num_rows += 1

      # Determine gene symbol for this row. Filter if no gene symbol exists.
      row_id = row[0] # Row ID should always be the first entry in a row.
      gene_sym = self.gse.platform.get_column(row_id, gene_symbol_name)
      if not gene_sym:
        self.rows_filtered.append(row_id)
        continue # skip this row
      else:
        self.rows_per_gene.setdefault(gene_sym, set()).add(row_id)
      
      # Merge columns using column mapping of series matrix columns.
      # Also, transform row into "floats" and None
      if self.col_map:
        # XXX_merge_cols is slow, perhaps due to float conversions.
        row = self._merge_cols(row, merge_floats)
      else:
        row = map(get_float, row)

      # Compute mean and standard deviation of all non-ID columns
      # check for None specifically since a valid value could be 0
      filtered_row = filter(lambda x: x is not None, row[1:])
      std = calc_std(filtered_row)
      mean = calc_mean(filtered_row)
      num_values = len(filtered_row)
      # Store row statistics
      self.row_stats[row_id] = \
        {'num_values': num_values, 'mean': mean, 'std': std}

      # Insert (gene_sym, size, mean, std) into second column
      row = [row_id , gene_sym, num_values, mean, std] + row[1:]

      # Write row to temporary file.
      # TODO: I may want to compress my row by converting it to a pickle.
      # pickling a list of floats uses 2/3 space and takes 1/2 compute time.
      fp_out.write("\t".join(map(str, row)))
      fp_out.write("\n")
    fp_out.close()

    # Log results of filter pass 1
    # ==========
    n = len(self.rows_filtered)
    n_gene_rows = num_rows-n
    mean_rows_per_gene = float(num_rows-n)/len(self.rows_per_gene)
    
    if num_rows != self.gse.est_num_row:
      Log.warning("Num rows read(%d) not num rows expected(%d) for %s" % \
                  (num_rows, self.gse.est_num_row, self))
    Log.info(("Filter 1 complete for %s. " + \
      "%d of %d (%.2f%%) rows removed for no gene symbol. %d rows remain.") % \
      (self, n, num_rows, (n/float(num_rows))*100, n_gene_rows))
    Log.info("Number of unique genes: %d, %.1f mean num rows per gene." % \
      (len(self.rows_per_gene), mean_rows_per_gene))

    # 3: Choose representative genes from self.row_stats and self.rows_per_gene
    # ==========
    # select all rows for a gene. If a gene 
    selected_row_ids = []
    for gene, row_ids in self.rows_per_gene.items():
      # If only a single row for this gene exists, choose it.
      if len(row_ids) == 1:
        best_row_id = row_ids.pop()
      # Else, choose row with the highest mean value. 
      else:
        s = sorted(row_ids, key=lambda x: self.row_stats[x]['mean'])
        best_row_id = s[-1]
      # Add this row_id to the accepted list
      selected_row_ids.append(best_row_id)

    n_single_gene_rows = len(selected_row_ids)
    Log.info("Selected %d of %d rows for %d genes by maximum row mean." % \
      (n_single_gene_rows, n_gene_rows, len(self.rows_per_gene)))

    # Sort row_ids by row standard deviation in decreasing order.
    selected_row_ids.sort(key=lambda x: self.row_stats[x]['std'], reverse=True)
    
    # Select top percentile by std. Convert type to set for easier membership tests.
    x = int(len(selected_row_ids)*self.percentile)
    selected_row_ids = set(selected_row_ids[:x])
    threshold_num_rows = len(selected_row_ids)
    assert(x == threshold_num_rows)
    Log.info("Selected top %d%% of rows (%d of %d) by standard deviation." % 
      (self.percentile*100, threshold_num_rows, n_single_gene_rows))
      
    # FINAL PASS: YIELD FILTERED LINES
    # ===========
    # Open temporary file generated in first pass.
    fp = open(filepath, "r")

    # Yield (modified) column titles.
    yield self.col_titles[:]
    
    # For each line, only yield if the row_id is in the selected_row_ids list.
    num_yielded_rows = 0
    for line in fp:
      row = line.strip().split("\t")
      row_id = row[0]
      if row_id in selected_row_ids:
        num_yielded_rows += 1
        yield row

    # All lines yielded. Check number of lines yielded with expected value.
    if num_yielded_rows != threshold_num_rows:
      Log.warning("%d yielded rows != %d expected number of rows." % \
        (num_yielded_rows, threshold_num_rows))
    else:
      Log.info("Filter complete. yielded %d rows." % (num_yielded_rows))
Exemplo n.º 30
0
class Rutor(TorrentInterface):
    def __init__(self):
        self.log = Log(__name__)
        self.session = aiohttp.ClientSession()
        self._init_variables()

    def _init_logging(self):
        pass

    def _init_variables(self):
        self._rutor = dict()
        self._rutor['protocols'] = ['http', 'https']
        self._rutor['host'] = ['rutor.is', 'rutor.info', '6tor.net']
        self._rutor['search_string'] = '/search/'
        self._rutor['search_keyword'] = '/search/0/0/100/0/'
        self._rutor['search_words'] = ''

    async def fetch_url(self, url):
        try:
            async with self.session.get(url, allow_redirects=False) as resp:
                if resp.status != 200:
                    self.log.info(f"Got code {resp.status} from {url}")
                    await asyncio.sleep(2.0)
                    return None
                return await resp.text()
        except client_exceptions.ClientConnectionError as e:
            self.log.info(e)
            await asyncio.sleep(2.0)
            return None

    def _generate_links(self, search_str, method='search_string'):
        links = list()
        for host in self._rutor['host']:
            for proto in self._rutor['protocols']:
                links.append(f"{proto}://{host}{self._rutor[method]}{search_str}")
        return links

    @staticmethod
    def parse(html_text):
        tree = html.fromstring(html_text)
        elements = tree.xpath('//table[@width]//tr')
        results = list()
        for e in elements:
            data = e.xpath('./td//text()')
            link = e.xpath('.//a/@href')
            if len(data) == 7:
                element = {
                    "date": data[0],
                    "name": data[2],
                    "size": data[3],
                    "link": link[2]
                }
            elif len(data) == 8:
                element = {
                    "date": data[0],
                    "name": data[2],
                    "size": data[4],
                    "link": link[2]
                }
            else:
                continue
            results.append(element)
        return results

    async def search(self, search_str):
        futures = [self.fetch_url(link) for link in self._generate_links(search_str)]
        self.log.debug(f"Generated links: {'  '.join(self._generate_links(search_str))}")
        return await self.run_search(futures)

    async def run_search(self, futures):
        done, pending = await asyncio.wait(futures, return_when=asyncio.FIRST_COMPLETED)
        for future in pending:
            future.cancel()
        try:
            html_page = done.pop().result()
        except:
            return None
        # !!! Make run in executor
        return self.parse(html_page)

    async def search_keywords(self, keywords):
        if type(keywords) is list:
            keywords = ' '.join(keywords)
        futures = [self.fetch_url(link) for link in self._generate_links(search_str=keywords, method='search_keyword')]
        self.log.debug(f"Generated links: "
                       f"{'  '.join(self._generate_links(search_str=keywords, method='search_keyword'))}")
        return await self.run_search(futures)
Exemplo n.º 31
0
    def setTarget(self, target):
        """Set target FB page for Crawler instance."""

        self.target = target
        Log.info('Target: %s' % target)
Exemplo n.º 32
0
 def setSource(cls, sourceStr):
     cls.__source = getattr(cls.__SOURCES, sourceStr.upper(),
                            cls.__SOURCES.ARCHIVE)
     Log.info("Setting Data source as {}".format(cls.__source))
Exemplo n.º 33
0
#     return json.load(res)


def get(url, data=None):
    if data:
        response = requests.get(url, data=data)
    else:
        response = requests.get(url)
    if not response.ok:
        raise Exception('http error:' + url + response.status_code)
    res = response.content.decode('unicode_escape')
    return json.loads(res)


if __name__ == '__main__':
    log.info('program started:------------------------------------')
    info_url = config.httpInfo + 'info'
    finish_url = config.httpInfo + 'finish'
    while (True):
        person_info = {}
        # body_info = {}
        try:
            result = get(info_url)
            if result['has_record'] == 1:
                log.info('process record: %s' % result['data'])
                record_id = result['data']['id']
                person_info['person_id'] = result['data']['bbiid']
                person_info['height'] = result['data']['height']
                person_info['weight'] = result['data']['weight']
                person_info['name'] = result['data']['name']
                person_info['body_id'] = result['data']['body_id']
Exemplo n.º 34
0
class TL:
    def __init__(self):
        self.tlc = TestlinkAPIClient(url,key)
        self.log = Log()

    #格式转化
    def __changeformat(self,oldformat):
        newformat = oldformat.replace('&quot;','"').replace('<p>','').replace('</p>','').replace('\n','').replace('\t','')
        return newformat

    #获取所有testsuite的id和name
    def get_testsuite_idname(self):
        projects = self.tlc.getProjects()
        animbus = projects[0]
        topSuites = self.tlc.getFirstLevelTestSuitesForTestProject(animbus['id'])
        suite = topSuites[0]
        for suite in topSuites:
            print('suite_id'+suite['id'],'suite_name'+suite['name'])

    #创建testsuite
    # def create_testsuite(self,project_id, test_suite_name, test_suite_describe, father_id):
    #     if father_id == "":
    #         self.tlc.createTestSuite(project_id, test_suite_name, test_suite_describe)
    #     else:
    #         self.tlc.createTestSuite(project_id, test_suite_name, test_suite_describe, parentid=father_id)


    #获取测试用例
    def get_testcase(self,testcase_id):
        self.log.info("开始获取测试用例")
        testcase_list=[]
        testcase = self.tlc.getTestCase(testcase_id)
        for i in testcase:
            self.log.info("获取信息头"+self.__changeformat(i.get('preconditions')))
            testcase_list.append(self.__changeformat(i.get('preconditions')))
            for m in i.get('steps'):
                expected_results = self.__changeformat(m.get("expected_results"))
                actions = self.__changeformat(m.get("actions"))
                step_number = self.__changeformat(m.get("step_number"))
                # testcase_list.append(step_number)
                testcase_list.append(actions)
                testcase_list.append(expected_results)
                self.log.info("步骤:"+step_number+" 步骤动作:"+actions+" 期望结果:"+expected_results)
        return testcase_list

    #获取指定testplan下所有testcase的id
    def get_alltestcaseid_for_testplanid(self,testplanid):
        testcase_id = []
        testplan = self.tlc.getTestCasesForTestPlan(testplanid)
        for i in testplan.keys():
            self.log.info("testplanid等于"+str(testplanid)+"下【测试用例id:"+i+"】的testcase已加入列表")
            testcase_id.append(i)
        return testcase_id

    #获取指定testsuite下所有testcase的id
    def get_alltestcaseid_for_testsuiteid(self,testsuiteid):
        testcase_id = []
        testsuite = self.tlc.getTestCasesForTestSuite(testsuiteid,0,"")
        for i in testsuite:
            self.log.info("testsuiteid等于"+str(testsuiteid)+"下【测试用例id:"+i['id']+" name:"+i['name']+"】的testcase已加入列表")
            testcase_id.append(int(i['id']))
        return testcase_id
Exemplo n.º 35
0
        if not self.exist_t(evt.t):
            #持久化时间戳
            self.save_t(evt.t)
            logger.info("Save a event-timer![ TIME:%s ]" % evt.t)

            #建立定时任务
            try:
                callback = functools.partial(self.timeout_callback, evt.t)
                tornado.ioloop.IOLoop.instance().add_timeout(int(evt.t), callback)
            except Exception,e:
                logger.error("Tornado add timeout error! [ ERROR:%s ]" % e)
            logger.info("Set event-timer's callback![ TIME:%s ]" % evt.t)

        #持久化事件(设置过期时间)
        self.save_evt(evt)
        logger.info("Save a event-list![ KEY:%s ]" % (REDIS_EVT_LST_PREFIX + evt.t))

    def timeout_callback(self, t):
        logger.debug("Event-timer callback![ TIMER:%s NOW:%s ]" % (t, time.time()))
        #将t时刻的事件全部处理
        key = REDIS_EVT_LST_PREFIX + t
        len = self.R.llen(key)
        logger.debug("Scan event list![ KEY:%s TOTAL:%d ]" % (key, len))
        for i in range(len):
            str = self.R.lpop(key)
            logger.debug("Pop a event from list![ KEY:%s EVT:%s ]" % (key, str))
            if not str: continue 
            dct = ujson.loads(str)
            evt = msgEvent().from_dict(dct)
            msg_procedure.process(evt)