Пример #1
0
def make_final_diffs():
    log.info('Make diff_time')

    #db.execute("INSERT INTO `diff_time` (`id_1`, `id_2`, `diff`) VALUES(1, NULL, 2);")
    #db.execute("INSERT INTO `diff_time` (`id_1`, `id_2`, `diff`) SELECT `a`.`id`, `b`.`id`, " +
    #           "TIME_TO_SEC(TIMEDIFF(`a`.`datetime`, `b`.`datetime`)) FROM `measurement_points` AS `a` " +
    #           "JOIN `measurement_points` AS `b` ON `a`.`id`-1=`b`.`id` WHERE `a`.`id` > 1;")
    #db.execute("INSERT INTO `diff_time` (`id_1`, `id_2`, `diff`) VALUES(NULL, " +
    #           "(SELECT MAX(id) FROM `measurement_points`), 2);")
    #db.execute("TRUNCATE `diff_buffer`;")

    for table in cache.get('value_types', lambda: []):
        log.info('Make diff_%s' % table['name'])

        db.execute("INSERT INTO `diff_buffer`(`original_id`, `value`) SELECT `id`, `value` FROM `measurements` " +
                   "WHERE device='%s' AND type='%s' AND level=2 ORDER BY `measurement_point_id`;" %
                   (table['original'][0], table['original'][1]))
        break
        #db.execute("INSERT INTO `diff_%s` (`id_1`, `id_2`, `diff`) VALUES(1, NULL, 2);" % table['name'])
        #db.execute("INSERT INTO `diff_%s` (`id_1`, `id_2`, `diff`) SELECT `a`.`id`, `b`.`id`, " % table['name'] +
        #           "`a`.`value`-`b`.`value` FROM `diff_buffer` AS `a` JOIN `diff_buffer` AS `b` " +
        #           "ON `a`.`id` - 1=`b`.`id` WHERE `a`.`id` > 1;")
        #db.execute("INSERT INTO `diff_%s` (`id_1`, `id_2`, `diff`) VALUES(NULL, " % table['name'] +
        #           "(SELECT MAX(id) FROM `diff_buffer`), 2);")
        #
        #db.execute("TRUNCATE `diff_buffer`;")
Пример #2
0
def fix_stats_monster():
    infura_client = InfuraClient(INFURA_API_URL)
    data_contract = infura_client.getDataContract()

    monster_records = EtheremonDB.EmaMonsterDataTab.objects.filter(
        Q(b0=0) | Q(b1=0) | Q(b2=0) | Q(b3=0) | Q(b4=0) | Q(b5=0)).all()
    for monster in monster_records:
        if monster.monster_id < 32599:
            continue
        base_stats = []
        for index in xrange(0, 6):
            stat_value = data_contract.call().getElementInArrayType(
                DataArrayType.STAT_BASE, monster.monster_id, index)
            base_stats.append(stat_value)
        if 0 in base_stats:
            log.error("fix_monster_invalid_stat|monster_id=%s,base_stats=%s",
                      monster.monster_id, base_stats)
            continue
        monster.b0 = base_stats[0]
        monster.b1 = base_stats[1]
        monster.b2 = base_stats[2]
        monster.b3 = base_stats[3]
        monster.b4 = base_stats[4]
        monster.b5 = base_stats[5]
        monster.exp = 0
        monster.save()
        _sync_monster_id(data_contract, monster.monster_id)

        time.sleep(0.05)
        log.info("fix_monster_stats|monster_id=%s", monster.monster_id)
Пример #3
0
 def crawl_contacts(self):
     """爬小组用户的关注列表页并写入数据库中对应用户名的表中"""
     try:
         self.login()
         # 关注用户表,表名对应当前用户id
         user_id = config.get('user', 'id')
         contacts = ContactsTable(table_name=user_id)
         c = ContactsList(self.s)
         total_members, total_pages = c.total_members, c.total_pages
         log.info('共关注了{}位用户,列表页共有{}页'.format(c.total_members,
                                              c.total_pages))
         #
         for page_num in range(1, total_pages + 1):
             log.info('当前进度:[{}/{}]'.format(page_num, total_pages))
             try:
                 page_members = c.get_contacts_from_page(page_num)
             except Exception as e:
                 raise Exception('因为{}, 无法爬取页:{} 至 {}'.format(
                     e, page_num, total_pages))
             else:
                 contacts.insert(page_members)
                 # 防止IP或者账号被ban的睡眠
                 time.sleep(random.randint(3, 20))
     except Exception as e:
         raise Exception('crawl_contacts: {}'.format(e))
Пример #4
0
    def crawl_group(self):
        """爬小组成员页"""
        group_name = config.get('group', 'id')
        log.info(group_name)
        group = GroupList()
        if group.total_members is 0 or group.total_pages is 0:
            # 小组成员页获取错误
            raise Exception('所爬小组:{}\t 总人数:{}\t 总页数{}\n大概率账号被ban'.format(
                group_name, group.total_members, group.total_pages))
        else:
            log.info('所爬小组:{}\t 总人数:{}\t 总页数{}'.format(group_name,
                                                       group.total_members,
                                                       group.total_pages))
        # 设置开始爬的位置,倒着爬
        start_page = (lambda page_num: group.total_pages
                      if page_num is -1 else page_num)(int(
                          config.get('group', 'start_page')))
        end_page = (lambda page_num: 0 if page_num is -1 else page_num)(int(
            config.get('group', 'end_page')))

        # 开始爬,每倒爬step页换一个headers和proxies
        step = int(config.get('group', 'skip_page'))
        for page_range in range(start_page, end_page, step):
            # 换一个headers和proxies
            group = GroupList()
            try:
                self.crawl_group_members(page_range, page_range + step, group)
            except Exception as e:
                raise Exception('crawl_group: {}'.format(e))
Пример #5
0
 def execute(query):
     try:
         log.info("Execute query: %s" % (str(query)))
         return DBConnection.connection().execute(query)
     except Exception, exc:
         log.error("db.execute: %(error)s" % {'error': exc.message})
         return None
Пример #6
0
 def _func(keys, **kwargs):
     if not keys:
         return {}
     keys = list(keys)
     force_query = kwargs.get("force_query", False)
     result_data = {}
     if not force_query:
         cache_key_map = {cache_prefix % key: key for key in keys}
         cached_data_dict = cache.get_cache(cache_name).get_many(
             cache_key_map.keys())
         for cached_key, cached_data in cached_data_dict.iteritems():
             key = cache_key_map[cached_key]
             result_data[key] = cached_data
             keys.remove(key)
         log.info("key_cache_hit|cached_key=%s",
                  ','.join(cached_data_dict.keys()))
     if keys:
         response_data = func(keys)
         if response_data:
             data_to_cache = {
                 cache_prefix % key: data
                 for key, data in response_data.iteritems()
             }
             cache.get_cache(cache_name).set_many(
                 data_to_cache, expiry_time)
         return dict(result_data.items() + response_data.items())
     else:
         return result_data
Пример #7
0
 def wait_element_clickable(self, loc, img_info, timeout=15, poll_frequency=0.5):
     """
     等待元素可点击
     :param loc:定位元素表达式
     :param img_info:发生错误时截图文件名
     :param timeout:等待超时时间
     :param poll_frequency:查询频率
     :return:超时错误或者元素
     """
     # 获取当前时间
     start_time = time()
     try:
         ele = WebDriverWait(self.driver, timeout, poll_frequency).until(EC.element_to_be_clickable(loc))
     except Exception as e:
         # 输出错误日志
         log.error("元素{}等待可点击超时".format(loc))
         log.exception(e)
         # 对当前错误页面截图
         self.screen_shot(img_info)
         raise e
     else:
         # 打印等待时间,返回元素
         end_time = time()
         log.info('元素{}可点击,等待时间{}秒'.format(loc, start_time - end_time))
         return ele
Пример #8
0
def download_media_thumbnail(media_id, url):
    '''
        Downloads an image from a URL and save it as a local thumbnail attached to a
        Media instance.
    '''
    try:
        media = Media.objects.get(pk=media_id)
    except Media.DoesNotExist:
        # Task triggered but the media no longer exists, do nothing
        return
    width = getattr(settings, 'MEDIA_THUMBNAIL_WIDTH', 430)
    height = getattr(settings, 'MEDIA_THUMBNAIL_HEIGHT', 240)
    i = get_remote_image(url)
    log.info(f'Resizing {i.width}x{i.height} thumbnail to '
             f'{width}x{height}: {url}')
    i = resize_image_to_height(i, width, height)
    image_file = BytesIO()
    i.save(image_file, 'JPEG', quality=85, optimize=True, progressive=True)
    image_file.seek(0)
    media.thumb.save('thumb',
                     SimpleUploadedFile(
                         'thumb',
                         image_file.read(),
                         'image/jpeg',
                     ),
                     save=True)
    log.info(f'Saved thumbnail for: {media} from: {url}')
    return True
Пример #9
0
 def get_login_mode(self, option):
     """ query login mode
     :param option: query option (0: mock, 1: query market price, 2: employee/customer)
     :return: login mode
     """
     log.info('[api] call - GetLoginMode({})'.format(str(option)))
     return self.dynamicCall('GetLoginMode(nOption)', option)
Пример #10
0
    def verify(self):
        assert_data = self.assert_data.split(" ")
        expect = ""
        if assert_data[2].isdigit():
            expect = int(assert_data[2])
        else:
            expect = assert_data[2]

        try:
            if assert_data[1] == "==":
                assert eval(assert_data[0]) == expect

            elif assert_data[1] == "<":
                assert eval(assert_data[0]) < expect

            elif assert_data[1] == ">":
                assert eval(assert_data[0]) > expect

            elif assert_data[1] == "in":
                assert expect in eval(assert_data[0])

            elif assert_data[1] == "!=":
                assert eval(assert_data[0]) != expect
            log.info("检查点校验成功")
        except Exception as e:
            log.error("检查点检验失败!预期结果是:{},实际结果是:{}".format(
                self.assert_data, assert_data[0] + " " + assert_data[1] + " " +
                eval(assert_data[0])))
            raise e
Пример #11
0
 def get_login_state(self) -> bool:
     """ query login state
     :return: login state
     """
     log.info('[api] call - GetLoginState()')
     result = self.dynamicCall('GetLoginState()')
     return True if result else False
Пример #12
0
 def test_new_supercargo(self, init):
     """新增一个押运员"""
     self.driver = init
     try:
         log.info("-----> 开始新增押运员")
         self.driver.find_element(By.ID, allData.get_element_info(0)).click()
         self.driver.find_element(By.ID, allData.get_element_info(1)).click()
         self.driver.find_elements(By.ID, allData.get_element_info(2))[1].click()
         self.driver.find_element(By.ID, allData.get_element_info(3)).click()
         self.driver.find_element(By.ID, allData.get_element_info(4)).send_keys(new_supercargo)
         self.driver.find_element(By.ID, allData.get_element_info(5)).send_keys(new_supercargo_num)
         self.driver.find_element(By.ID, allData.get_element_info(6)).click()
         self.driver.find_element(By.XPATH, reuseData.get_element_info(0)).click()
         self.driver.find_element(By.ID, reuseData.get_element_info(1)).click()
         self.driver.find_elements(By.ID, reuseData.get_element_info(2))[0].click()
         time.sleep(1)
         pageView.adb_tap((110, 260))
         time.sleep(1)
         pageView.adb_tap((668, 46))
         self.driver.find_element(By.ID, reuseData.get_element_info(3)).click()
         self.driver.find_element(By.ID, reuseData.get_element_info(4)).click()
         self.driver.find_element(By.ID, reuseData.get_element_info(5)).click()
         self.driver.find_element(By.ID, reuseData.get_element_info(7)).click()
         time.sleep(3)
     except Exception as e:
         log.error("异常情况,返回错误信息是->: {0}".format(e))
         screen_shot(self.driver, allData.get_id() + '.png')
Пример #13
0
 def release_request_id(self, rid):
     """ release request id
     :param rid: request id
     :return: void
     """
     log.info('[api] call - ReleaseRqId({})'.format(str(rid)))
     self.dynamicCall('ReleaseRqId(nRqId)', rid)
Пример #14
0
 def get_fid_output_count(self, rid):
     """ FID count of data inquiry response data
     :param rid: request id
     :return: count of data
     """
     log.info('[api] call - GetFidOutputRowCnt({})'.format(str(rid)))
     return self.dynamicCall('GetFidOutputRowCnt(nRequestId)', rid)
Пример #15
0
def source_pre_delete(sender, instance, **kwargs):
    # Triggered before a source is deleted, delete all media objects to trigger
    # the Media models post_delete signal
    for media in Media.objects.filter(source=instance):
        log.info(
            f'Deleting media for source: {instance.name} item: {media.name}')
        media.delete()
Пример #16
0
 def comm_init(self) -> bool:
     """ initialize communication module
     :return: initialize successful
     """
     log.info('[api] call - CommInit()')
     result = self.dynamicCall('CommInit()')
     return True if result == 0 else False
Пример #17
0
 def get_comm_state(self) -> bool:
     """ communication module status inquiry
     :return: normal operation status of communication module
     """
     log.info('[api] call - CommGetConnectState()')
     state = self.dynamicCall('CommGetConnectState()')
     return True if state == 1 else False
Пример #18
0
 def __init__(self, url, browse):
     self.driver_browse(browse)
     log.info("打开浏览器")
     self.driver.get(url)
     log.info("打开url")
     self.driver.implicitly_wait(20)
     self.driver.maximize_window()
Пример #19
0
    def post(self):
        response = ""
        try:
            # 参数是字典,不上传文件
            if self.data_type == "data" and self.upload_file == "":
                response = self.session.post(url=self.url, data=self.parameter)

            # 参数是字典,上传文件
            elif self.data_type == "data" and self.upload_file != "":
                response = self.session.post(url=self.url,
                                             data=self.parameter,
                                             files=self.upload_file)

            # 参数是json,上传文件
            elif self.data_type == "json" and self.upload_file != "":
                response = self.session.post(url=self.url,
                                             json=self.parameter,
                                             files=self.upload_file)

            # 参数是json,不上传文件
            elif self.data_type == "json" and self.upload_file == "":
                response = self.session.post(url=self.url, json=self.parameter)
            log.debug("运行post请求成功,请求的参数是:{}".format(self.parameter))
            log.info("运行post请求成功")
        except Exception as e:
            log.error("post请求失败!,错误信息是:{}".format(e))
            log.error("post请求失败!,请求的参数是:{}".format(self.parameter))
            raise e

        return response.json()
Пример #20
0
 def set_proxies(self):
     p = ProxyTable()
     member = p.fetch_proxy()
     if member is None:
         raise Exception('set_proxies: {}'.format('member is None'))
     else:
         self.proxies = {'http': 'http://' + member.ip_port}
         log.info('{}'.format(self.proxies))
Пример #21
0
 def create_request_id(self):
     """ create request id
     :return: request id
     """
     request_id = self._api.create_request_id()
     log.info('[create_request_id] Request id created: {}'.format(
         str(request_id)))
     return request_id
Пример #22
0
def media_pre_delete(sender, instance, **kwargs):
    # Triggered before media is deleted, delete any scheduled tasks
    log.info(f'Deleting tasks for media: {instance.name}')
    delete_task_by_media('sync.tasks.download_media', (str(instance.pk), ))
    thumbnail_url = instance.thumbnail
    if thumbnail_url:
        delete_task_by_media('sync.tasks.download_media_thumbnail',
                             (str(instance.pk), thumbnail_url))
Пример #23
0
 def print_contacts_table(self):
     """打印拉取到的关注列表页数据"""
     members = ContactsTable()
     items = members.fetch_all()
     num_existed = len(items)
     log.info('当前共有: {}'.format(num_existed))
     log.info('随机打印一个用户的信息')
     members.fetch_one_basic_infos().print_basic_infos()
Пример #24
0
 def print_group_table(self):
     """打印拉取到的小组成员页数据"""
     members = MembersTable(table_name=config.get('group', 'id'))
     items = members.fetch_all()
     num_existed = len(items)
     log.info('当前共有: {}'.format(num_existed))
     log.info('随机打印一个用户的信息')
     members.fetch_one_basic_infos().print_basic_infos()
Пример #25
0
def read_xml(file_path):
    element_tree = ElementTree()
    if not os.path.exists(file_path):
        os.makedirs(os.path.dirname(file_path), exist_ok=True)
        init(file_path)
        log.info("init file:{}".format(file_path))
    element_tree.parse(file_path)
    return element_tree
Пример #26
0
 def logout(self) -> bool:
     """ logout
     :return: logout successful
     """
     log.info('[api] call - CommLogout(*)')
     result = self.dynamicCall('CommLogout(sUserId)',
                               self._CREDENTIALS['id'])
     return True if result == 0 else False
Пример #27
0
 def send(transaction_object) -> bool:
     """ forward to the message pipe cache
     :param transaction_object: transaction object
     :return: send message successful
     """
     log.info('[send] transaction: {}'.format(str(transaction_object)))
     # send to MQ or storage
     return True
Пример #28
0
def get_yt_opts():
    opts = copy(_defaults)
    cookie_file = settings.COOKIES_FILE
    if cookie_file.is_file():
        cookie_file_path = str(cookie_file.resolve())
        log.info(f'[youtube-dl] using cookies.txt from: {cookie_file_path}')
        opts.update({'cookiefile': cookie_file_path})
    return opts
Пример #29
0
 def on_agent_event_handler(self, event_type, param, value):
     """ agent event loop handler
     :param event_type: event type
     :param param: param
     :param value: value
     :return: void
     """
     log.info('[api] on event({}) {} - {}'.format(str(event_type),
                                                  str(param), str(value)))
Пример #30
0
def media_post_delete(sender, instance, **kwargs):
    # Schedule a task to update media servers
    for mediaserver in MediaServer.objects.all():
        log.info(f'Scheduling media server updates')
        verbose_name = _('Request media server rescan for "{}"')
        rescan_media_server(str(mediaserver.pk),
                            priority=0,
                            verbose_name=verbose_name.format(mediaserver),
                            remove_existing_tasks=True)
Пример #31
0
def calibrateCameraAndLidar():
    res = {"code": "99", "msg": "", "result": {}}
    data2d = []
    data3d = []
    xyz = (0, 0, 0)
    try:
        data = json.loads(request.get_data(as_text=True))
        token = data['token']
        prjId = data['prjId']
        raw3d = data['coordinateData0']
        raw2d = data['coordinateData1']
        xyz = (float(data['BLH']["x"]), float(data['BLH']["y"]),
               float(data['BLH']["z"]))
        mtx = np.array(data["para"]["mtx"])
        dist = np.array(data["para"]["dist"])

        assert (len(raw2d) == len(raw3d)), "接收坐标对数据长度不一致"
        assert (len(raw2d) >= 6), "接收坐标对数据长度小于6"

        for i in range(0, len(raw2d)):
            tmpraw2dU = float(raw2d[i]["axisX"])
            tmpraw2dV = float(raw2d[i]["axisY"])

            tmpraw3dX = float(raw3d[i]["axisX"])
            tmpraw3dY = float(raw3d[i]["axisY"])
            tmpraw3dZ = float(raw3d[i]["axisZ"])

            data2d.append((tmpraw2dU, tmpraw2dV))
            data3d.append((tmpraw3dX, tmpraw3dY, tmpraw3dZ))

        log.info("Successful transfer of points")
    except Exception as e:
        log.error(e)
        res['msg'] = 'The server receives a bad json'
        return get_result_response(EasyDict(res))
    log.info("ip:{}".format(request.remote_addr))

    rotM, tvec, rvec, Cx, Cy, Cz, thetaX, thetaY, thetaZ = calibrate_camera_and_lidar(
        xyz, data2d, data3d, mtx, dist)

    result = {
        "rotM": list(map(np.ndarray.tolist, rotM)),
        "tvec": list(map(np.ndarray.tolist, tvec)),
        "rvec": list(map(np.ndarray.tolist, rvec)),
        "Cx": Cx.tolist()[0],
        "Cy": Cy.tolist()[0],
        "Cz": Cz.tolist()[0],
        "thetaX": thetaX,
        "thetaY": thetaY,
        "thetaZ": thetaZ
    }

    res["result"] = result
    res["code"] = "00"
    res["msg"] = "Success"

    return get_result_response(EasyDict(res))
Пример #32
0
 def register_real(self) -> bool:
     """ subscribe to real register api
     :return: subscribe successful
     """
     log.info('[api] call - RegisterReal({}, {})'.format(
         str(self._REAL_NAME), str(self._SYMBOL)))
     result = self.dynamicCall('RegisterReal(strRealName, strRealKey)',
                               self._REAL_NAME, self._SYMBOL)
     return True if result == 0 else False
Пример #33
0
 def filter_before():
     """
     Filter data before reordering to be able merge data
     Prefiltering suppose to expel data_files where we can find same ut time
     """
     log.info("Data prefiltering")
     truncate = "TRUNCATE TABLE %(table_name)s"
     db.execute(truncate % {'table_name': ShortDiffNACS.__tablename__})
     db.execute(truncate % {'table_name': ShortDiffWATS.__tablename__})
     db.execute(truncate % {'table_name': BasicReorderNACS.__tablename__})
     db.execute(truncate % {'table_name': BasicReorderWATS.__tablename__})
Пример #34
0
    def make_order():
        """
        Making order after prefiltering of data
        """
        ordering_query = 'INSERT INTO `%(destination_table)s` (%(fields_insert)s) SELECT %(fields_select)s FROM ' + \
                         '`%(source_table)s` as `st` JOIN `source_files` as `sf` ON `st`.`source_id`=`sf`.`id` ' + \
                         'WHERE `sf`.`ignored`=0 ORDER BY DATE_ADD(CONCAT(FROM_DAYS(TO_DAYS(CONCAT(`year`, ' + \
                         '"-01-01")) + `day_of_year` - 1), " 00:00:00"), INTERVAL ut/1000 SECOND_MICROSECOND) ASC;'

        nacs_select = ["st.id", "source_id",
                       SQLCommand("DATE_ADD(CONCAT(FROM_DAYS(TO_DAYS(CONCAT(`year`, '-01-01')) + "
                                  "`day_of_year` - 1), ' 00:00:00'), INTERVAL ut/1000 SECOND_MICROSECOND)"),
                       SQLCommand("`ut` %% 1000"),
                       "year", "day_of_year", "ut", "orbit", "o_density", "o_density_err",
                       "n2_density", "n2_density_err", "he_density", "he_density_err", "n_density",
                       "n_density_err", "ar_density", "ar_density_err", "alt", "lat", "long", "lst",
                       "lmt", "l_sh", "inv_lat", "sza"]

        nacs_insert = ["original_id", "source_id", "date_general", "date_ms", "date_original_year",
                       "date_original_day_of_year", "date_original_ut", "orbit", "o_density", "o_density_err",
                       "n2_density", "n2_density_err", "he_density", "he_density_err", "n_density",
                       "n_density_err", "ar_density", "ar_density_err", "alt", "lat", "long", "lst",
                       "lmt", "l_sh", "inv_lat", "sza"]

        wats_select = ["st.id", "source_id",
                       SQLCommand("DATE_ADD(CONCAT(FROM_DAYS(TO_DAYS(CONCAT(`year`, '-01-01')) + "
                                  "`day_of_year` - 1), ' 00:00:00'), INTERVAL ut/1000 SECOND_MICROSECOND)"),
                       SQLCommand("`ut` %% 1000"),
                       "year", "day_of_year", "ut", "mode", "mode_horizontal", "slot",
                       "outin", "mass", "density", "tn", "tn_correction", "v_s", "c1", "c2", "t1", "t2",
                       "v_geo", "v_geo_correction", "orbit", "altitude", "latitude", "longitude", "lst",
                       "lmt", "l", "inv_lat", "sza"]

        wats_insert = ["original_id", "source_id", "date_general", "date_ms", "date_original_year",
                       "date_original_day_of_year", "date_original_ut", "mode", "mode_horizontal", "slot",
                       "outin", "mass", "density", "tn", "tn_correction", "v_s", "c1", "c2", "t1", "t2",
                       "v_geo", "v_geo_correction", "orbit", "altitude", "latitude", "longitude", "lst",
                       "lmt", "l", "inv_lat", "sza"]

        db.execute(ordering_query %
                   {
                       'destination_table': BasicReorderNACS.__tablename__,
                       'source_table': NeutralGasNACSnT1s.__tablename__,
                       'fields_insert': prepare_fields(nacs_insert),
                       'fields_select': prepare_fields(nacs_select)
                   })
        db.execute(ordering_query %
                   {
                       'destination_table': BasicReorderWATS.__tablename__,
                       'source_table': NeutralGasWATSnTv2s.__tablename__,
                       'fields_insert': prepare_fields(wats_insert),
                       'fields_select': prepare_fields(wats_select)
                   })
        log.info("Making order in satellite data")
Пример #35
0
 def make_conversion(data_type, chunk_size, do_search=True):
     count = s.query(data_type).count()
     log.info("%i elements to be converted" % count)
     iterations = count / chunk_size
     if count % chunk_size:
         iterations += 1
     for i in range(0, iterations):
         data = convert(s.query(data_type).slice(i * chunk_size, (i + 1) * chunk_size - 1).all(), do_search)
         for i in data:
             s.add(i)
         s.commit()
Пример #36
0
def main():
    infoList=[]
    oldtime=time.time()    
    infoList+=getExtraPageInfo(40)    
    for info in infoList:
        try:
            table.InsertItemDict(ctable, info)
#             print time.strftime('%Y-%m-%d %H:%M:%S',time.localtime(info['loadtime'])),info['title']
        except:
            logging.error('encoding not supported')
    msg='sina has crawled %s records,time cost: %s (seconds)' % (len(infoList), time.time()-oldtime) 
    print msg
    log.info(msg)
Пример #37
0
def parse_all():
    log.info('Parsing `plasma lang`')
    walk('%s/plasma_lang/Ne_Te_500ms_ascii/' % data_path, 'asc', True, NeTe500Ms)
    log.info('Parsing `neutral gas nacs`')
    walk('%s/neutral_gas_nacs/n_T_1s_ascii/data/' % data_path, 'asc', True, NT1s)
    log.info('Parsing `neutral gas wats`')
    walk('%s/neutral_gas_wats/n_T_v_2s_ascii/' % data_path, 'asc', True, NTV2s)
    log.info('DONE')
Пример #38
0
def main_single():
    infoList=[] 
    oldtime=time.time()
#     page can be started from 0 to 5 which represents different category
    for page in range(0,6):
        infoList+=getPageInfo(page)
            
    for info in infoList:
        try:
            table.InsertItemDict(ctable, info)
#             print info['loadtime'],info['title']
        except:
            logging.error('encoding not supported')
    msg='qq has crawled %s records,time cost: %s (seconds)' % (len(infoList), time.time()-oldtime) 
    print msg
    log.info(msg)  
Пример #39
0
    def filter_after():
        """
        Filter data after reordering to be able merge data
        Have no idea what should be done here, but leave it in case
        """

        ## Compute difference between timestamps
        diff_insert = ["first_original_id", "first_source_id", "second_original_id", "second_source_id", "time_diff"]
        diff_select = ["e1.original_id", "e1.source_id", "e2.original_id", "e2.source_id",
                       SQLCommand("TIME_TO_SEC(TIMEDIFF(%s, %s)) * 1000 + %s - %s" %
                       (q("e2.date_general"), q("e1.date_general"), q("e2.date_ms"), q("e1.date_ms")))]

        log.info("Data postfiltering")
        short_diff_maker = "INSERT INTO %(diff_destination)s (%(diff_insert)s) SELECT %(diff_select)s " + \
                           "FROM %(diff_source)s as `e2` JOIN %(diff_source)s as `e1` ON `e1`.`id` = `e2`.`id` - 1 " + \
                           " WHERE `e2`.`id` > 1;"

        db.execute(short_diff_maker % {
            'diff_destination': q(ShortDiffNACS.__tablename__),
            'diff_insert': prepare_fields(diff_insert),
            'diff_select': prepare_fields(diff_select),
            'diff_source': q(BasicReorderNACS.__tablename__)
        })

        db.execute(short_diff_maker % {
            'diff_destination': q(ShortDiffWATS.__tablename__),
            'diff_insert': prepare_fields(diff_insert),
            'diff_select': prepare_fields(diff_select),
            'diff_source': q(BasicReorderWATS.__tablename__)
        })

        ## Select zero-diff elements
        fetcher = "SELECT `first_source_id`, `second_source_id` FROM  %(table_name)s WHERE `time_diff` = 0;"
        source = []
        for i in db.execute(fetcher % {'table_name': ShortDiffNACS.__tablename__}).fetchall():
            source.extend([str(i[0]), str(i[1])])
        for i in db.execute(fetcher % {'table_name': ShortDiffWATS.__tablename__}).fetchall():
            source.extend([str(i[0]), str(i[1])])

        source_ids = []
        for i in source:
            if i not in source_ids:
                source_ids.append(i)

        ## Mark files
        if len(source_ids) > 0:
            db.execute("UPDATE `source_files` SET `ignored`=1 WHERE id IN (%s)" % (', '.join(source_ids)))
Пример #40
0
def main():
    infoList=[] 
    oldtime=time.time()    
    pool=process_dummy.Pool() # default is core_num
    for page in range(1,4):
        infoList+=getPageInfo(page,pool)    
    pool.close()
    pool.join()             
    for info in infoList:
        try:
            table.InsertItemDict(ctable, info)
#             print time.strftime('%Y-%m-%d %H:%M:%S',time.localtime(vInfo['loadtime'])),info['title']
        except:
            logging.error('encoding not supported')
    msg='ifeng has crawled %s records,time cost: %s (seconds)' % (len(infoList), time.time()-oldtime) 
    print msg
    log.info(msg)
Пример #41
0
def main():    
    infoList=[] 
    oldtime=time.time()
    pool=process_dummy.Pool() # default is cpu_count()
    results=pool.map(getMainPageInfo, categories.iterkeys())             
    pool.close()
    pool.join()      
    for result in results:
        infoList+=result
    for info in infoList:
        try:
            table.InsertItemDict(ctable, info)
#             print time.strftime('%Y-%m-%d %H:%M:%S',time.localtime(info['loadtime'])),info['title']
        except:
            logging.error('encoding not supported')
    msg='sohu has crawled %s records,time cost: %s (seconds)' % (len(infoList), time.time()-oldtime) 
    print msg
    log.info(msg)
Пример #42
0
def main_map():
    # A multiprocessing implementation of main(), about 3 pages updated every hour
    # Default use cup_count() processing ,is 4 times faster than single processing
    infoList=[] 
    oldtime=time.time()
    #pool=multiprocessing.Pool(multiprocessing.cpu_count()) 
    pool=multiprocessing.Pool() # will use default :cpu_count() processings
    results=pool.map(getPageInfo, range(0,6))    
    pool.close()
    pool.join()     
    for result in results:
        infoList+=result
    for info in infoList:
        try:
#             table.InsertItemDict(ctable, info)          
            print info['loadtime'],info['title']
        except:
            logging.error('encoding not supported')
    msg='qq has crawled %s records,time cost: %s (seconds)' % (len(infoList), time.time()-oldtime) 
    print msg
    log.info(msg)
Пример #43
0
def merge():
    """
    Merge data NACS and WATS together
    Question is about fitting NACS(with 1s resolution) to WATS(with 2s resolution) data
    """
    s = db.session()
    def make_conversion(data_type, chunk_size, do_search=True):
        count = s.query(data_type).count()
        log.info("%i elements to be converted" % count)
        iterations = count / chunk_size
        if count % chunk_size:
            iterations += 1
        for i in range(0, iterations):
            data = convert(s.query(data_type).slice(i * chunk_size, (i + 1) * chunk_size - 1).all(), do_search)
            for i in data:
                s.add(i)
            s.commit()

    chunk_size = 1000
    make_conversion(BasicReorderNACS, chunk_size, False)
    make_conversion(BasicReorderWATS, chunk_size)
    s.close()

    log.info("Merging data")
Пример #44
0
def main():
    # A multiprocessing.dummy implementation of main(), about 3 pages updated every hour
    # Default use cup_count() threads
    infoList=[] 
    oldtime=time.time()
    #pool=multiprocessing.Pool(multiprocessing.cpu_count())     
#     try:
    pool=process_dummy.Pool() # will use default :cpu_count() processings
    results=pool.map(getPageInfo, range(0,6))    
    pool.close()
    pool.join()     
    for result in results:
        infoList+=result
    for info in infoList:
        try:
            table.InsertItemDict(ctable, info)          
#             print time.strftime('%Y-%m-%d %H:%M:%S',time.localtime(info['loadtime'])),info['title']
        except:
            logging.error('encoding not supported')
#     except:
#         print 'error on distributing tasks'
    msg='qq has crawled %s records,time cost: %s (seconds)' % (len(infoList), time.time()-oldtime) 
    print msg
    log.info(msg)
Пример #45
0
def resample():
    """NACS data resampling from 1s to 2s due to wats data. Might be it's more logical would be to find
    out wats.
    """
    def merge_pair(mp1, mp2=None):
        """Merge pair of MeasurementPoints. Check if point have neighbour with time difference 2 seconds.
        If don't we will take values of original point, otherwise we will take middle value of this point
        and neighbour

        Keyword arguments:
        mp1 -- 'nacs' measurement point conjuncted with 'wats'
        mp2 -- neighbour measurement point. Default 'None' - means neighbour doesn't exists

        Return: mp1
        """
        def find_value_model(mp, type, level=1, device='nacs'):
            """Find measurement from mp2 conjuncted with measurement from mp1

            Keyword arguments:
            mp -- measurement point (mp2)
            type -- type of measurement from mp1
            level -- level of measurement from mp1
            device -- device of measurement from mp1

            Return:
            Measurement object conjuncted with selected value
            """
            for value_model in mp.data:
                if value_model.type == type and value_model.level == level and value_model.device == device:
                    return value_model
            return None

        update = []

        if not mp2 or (mp1.datetime.python_type() - mp2.datetime.python_type()).seconds > 3:
            if not mp2:
                log.debug("[wats:%i:%s] edge point does not exists" % (mp1.id, str(mp1.datetime)))
            else:
                log.debug("[wats:%i:%s]&[wats:%i:%s] is to fas in time dimension" %
                          (mp1.id, str(mp1.datetime), mp2.id, str(mp2.datetime)))
            for measurement in mp1.data:
                if measurement.device == 'nacs':
                    nm = Measurement(measurement)
                    nm.level = 2
                    update.append(nm)
        else:
            log.debug("[wats:%i:%s]&[wats:%i:%s] is goes to be resampled" %
                      (mp1.id, str(mp1.datetime), mp2.id, str(mp2.datetime)))
            for measurement in mp1.data:
                ms = find_value_model(mp2, mp1.type)
                nm = Measurement(measurement)
                nm.level = 2
                nm.value = (nm.value + ms.value) / 2
                nm.error = (nm.error + ms.error) / 2
                nm.correction = (nm.correction + ms.correction) / 2
                update.append(nm)

        session_instance.add(mp1.data.extend(update))
        session_instance.commit()

    session_instance = db.session()
    ids_ = session_instance.query(Measurement.measurement_point_id).filter(Measurement.device=='wats').all()
    ids = []
    for i in ids_:
        if i[0] not in ids:
            ids.append(i[0])
    chunk_size = 100
    iterations = [ids[i*chunk_size:(i+1)*chunk_size] for i in range(0, len(ids)/chunk_size)]
    log.info("WATS data in %i elements going to be processed in %i iterations" % (len(ids), len(iterations)))
    for points in iterations:
        log.info("Processing ids in range [%s..%s](%i)" % (str(points[0]), str(points[-1], len(points))))
        extended_points = points
        extended_points.extend([j-1 for j in points])
        data = session_instance.query(MeasurementPoint).join(Measurement).\
            filter(Measurement.type == 'nacs').filter(MeasurementPoint.id.in_(extended_points)).\
            order_by(Measurement.measurement_point_id).order_by(Measurement.type).all()
        data = {row.id: row for row in data}
        for key, row in data:
            if key in points:
                merge_pair(row, data.get(key-1, None))
        #session_instance.commit()

    # Generating 2 level for 'wats' measurements
    db.execute("INSERT INTO `measurements` (`measurement_point_id`, `device`,`type`, `level`, `value`, `error`, " +
               "`correction`) SELECT `measurement_point_id`, `device`,`type`, 2, `value`, `error`, `correction` " +
               "FROM `measurements` WHERE `device`='wats';")