Exemplo n.º 1
0
def fit_polynomial3(pixel_data):
    '''Return an "image" which is a polynomial fit to the pixel data

    Fit the image to the polynomial Ax**2+By**2+Cxy+Dx+Ey+F

    pixel_data - a two-dimensional numpy array to be fitted
    '''
    x,y = np.mgrid[0:pixel_data.shape[0], 0:pixel_data.shape[1]]
    x2 = x*x
    y2 = y*y
    xy = x*y
    x2y = x*x*y
    y3 = y*y*y
    x3 = x*x*x
    y2x = y*y*x
    o  = np.ones(pixel_data.shape)
    a = np.stack([x.flat, y.flat, x2.flat, y2.flat, xy.flat, x2y.flat, y3.flat, x3.flat, y2x.flat, o.flat], 1)
    mean, std = pixel_data.mean(), pixel_data.std()
    # z = (pixel_data.flat - mean) / std
    z = pixel_data.flat
    coeffs, residuals, rank, s = scipy.linalg.lstsq(a, z)
    LogHelper.logText('\n{:.8f}x + {:.8f}y + {:.8f}x^2 + {:.8f}y^2 + {:.8f}xy + {:.8f}x^2y + {:.8f}y^3 + {:.8f}x^3 + {:.8f}xy^2 + {:.8f}', *coeffs)
    output_pixels = np.sum([coeff * index for coeff, index in zip(coeffs, [x,y,x2,y2,xy, x2y, y3, x3, y2x, o])], 0)
    smooth = filters.rank.mean(pixel_data, disk(50))
    coeffs2, residuals2, rank2, s2 = scipy.linalg.lstsq(a, smooth.flat)
    LogHelper.logText('\n{:.8f}x + {:.8f}y + {:.8f}x^2 + {:.8f}y^2 + {:.8f}xy + {:.8f}x^2y + {:.8f}y^3 + {:.8f}x^3 + {:.8f}xy^2 + {:.8f}', *coeffs2)

    return output_pixels, mean, std
Exemplo n.º 2
0
def calculate_masked_stats():
    plate_no = "59798"
    parsed = get_plate_files(plate_no)
    for w in ['w2']:
        files = filter(lambda f: f.wave == w[1], parsed)
        # accum = np.zeros((2160, 2160), dtype=np.uint32)
        # files = filter(lambda x: 's1' not in x and 's7' not in x, all_files)
        nof = len(files)
        for i, frame in enumerate(files[0:5], 1):
            LogHelper.logText(frame.fullpath)
            img = imread(frame.fullpath)
            t = filters.threshold_yen(img)
            b1 = img > t
            b2 = binary_erosion(b1, square(2))
            b3 = binary_dilation(b2, square(10))
            b4 = binary_closing(b3, square(3))
            imm = np.ma.masked_where(b4, img)
            mn, mx = np.percentile(imm, (1, 99))
            LogHelper.logText(
                '%3d of %d, %4d-%4d-%4d-%5d, %.0f-%.0f'
                % (i, nof, imm.min(), mn, mx, imm.max(), imm.mean(), imm.std())
            )
            im2 = imm.filled(int(imm.mean()))
            out_name = "{0}\\{5}-{1}{2}-{3}-{4}.tif".format(ROOT_DIR, frame.row, frame.column, frame.site, LogHelper.init_ts, frame.experiment)
            imsave(out_name, im2)
Exemplo n.º 3
0
def my_copy_file(source_file,target):
    i_file_count = 0
    try:
        str_dir = source_file
        if os.path.isfile(str_dir):
            str_dir = os.path.dirname(str_dir)
        arr_files = get_dir_files(str_dir)
        i_file_count = len(arr_files)
        for name in arr_files:
            copy_file(name,target)
        return str(i_file_count) + ",.,"
    except Exception as e:
        LogHelper.error("CopyError1:" + e.message)
        return i_file_count + ",.," + "CopyError1:" + e.message
Exemplo n.º 4
0
def copy_file(source_file,target_file):
    try:
        source = getRootDir(source_file)
        desfilename=source_file.replace('/',os.sep).replace(source,target_file,1).replace('\\\\',os.sep)
   
        LogHelper.debug(source_file +  "  copy to   "+desfilename)
        if not os.path.exists(os.path.dirname(desfilename)):
            os.makedirs(os.path.dirname(desfilename))
        if not os.path.exists(desfilename):
            shutil.copy(source_file,desfilename)#如果要改为移动,而不是拷贝,可以将copy改为move
        return "1,.,"
    except Exception as e:
        LogHelper.error("CopyError0:" + e.message)
        return "0,.,CopyError0:" + e.message
Exemplo n.º 5
0
def find_min():
    dir_name = get_plate_files("59438")
    ilum_name = os.path.join(dir_name, 'ilum.tiff')
    all_files = glob.glob1(dir_name, '*.TIF')
    files = all_files
    m = 500
    ts = "{:%Y-%m-%d-%H-%M-%S}-".format(datetime.now())
    for filename in files:
        file_path = os.path.join(dir_name, filename)
        out_name = os.path.join(dir_name, filename)
        img = imread(file_path)
        m1 = min(m, np.percentile(img, 1))
        if m != m1: LogHelper.logText("{0} - {1} ".format(m1, filename))
        m = m1
        LogHelper.logText("*************** {0} - {1} ***************".format(m, ts))
Exemplo n.º 6
0
    def download_page(self, url, outDir, assetDir, filename, css=False, javascript=False, image=False):
        page = None
        #response = self.get_http_pool(url).request('GET', url, headers=headers)
        try:
            response = self.session.get(url)
        except requests.exceptions.ConnectionError as connectionError:
            return -1, None
        except Exception as ex:
            self.logger.exception(LogHelper.getExceptionMsg(ex, "exception: download %s" % url))
            return -1, None

        if (response.status_code != 200):
            self.logger.warn("http response: %s %s" % (response.status_code, url))
        else:
            html = response.content.decode('utf-8', 'ignore')
            page = htmlparser.fromstring(html, base_url=url)
            if (css or javascript or image):
                if (css):
                    self.download_related_files(page, u"//link", u"href", outDir, assetDir, baseurl=url)
                if (javascript):
                    self.download_related_files(page, u"//script", u"src", outDir, assetDir, baseurl=url)
                if (image):
                    self.download_related_files(page, u"//img", u"src", outDir, assetDir, baseurl=url)

                newHtml = htmlparser.tostring(page)
                self.saveToFile(os.path.join(outDir, filename), newHtml)
            else:
                self.saveToFile(os.path.join(outDir, filename), html)
        return response.status_code, page
Exemplo n.º 7
0
    def parseProfile(self, name, urlProfile, outputDir, donors):
        result = False
        try:
            self.logger.info('downloading profile %s...' % name)
            #filename = '%s.html' % datetime.datetime.fromtimestamp(time.time()).strftime('%Y%m%d_%H%M%S')
            status, page = self.downloader.download_page(urlProfile,
                                                         outputDir,
                                                         self.assetDir,
                                                         profileHtmlSrcFilename,
                                                         css=True,
                                                         javascript=True,
                                                         image=False)
            if (page == None):
                pass
            elif (status != 200):
                self.logger.warn("http response: %s %s" % (status, urlProfile))
            else:
                detailedFound = False
                progress = ""
                raised = ""
                togo = ""
                nodeDetailes = self.find_element_by_xpath(page, u'//div[@id="funding_details"]')
                #text = htmlparser.tostring(nodeDetailes, "innerHTML")
                text=nodeDetailes.text_content()
                if (text != None and text != ""):
                    text = text.lower()
                    list = re.findall(ur"([\d\.]+?)%\s*(.*?)\$([\d,]+)\s*(.*?)\s*?$", text)
                    if (len(list) == 1 and len(list[0]) == 4):
                        values = list[0]
                        if (values[1] != None and values[3] == "raised"):
                            progress = values[0]
                            raised = values[2]
                            detailedFound = True
                        elif (values[1] != None and values[3] == "to go"):
                            progress = values[0]
                            togo = values[2]
                            detailedFound = True
                    if(not detailedFound):
                        self.logger.error("invalid reg pattern for %s in profile %s" % (text, urlProfile))

                if (not detailedFound):
                    self.logger.error("profile %s details not found" % name)
                else:
                    timestamp = datetime.datetime.fromtimestamp(time.time()).strftime('%Y-%m-%d %H:%M:%S')
                    if (raised != None):
                        raised = re.sub(ur"[,$]", "", raised)
                    if (togo != None):
                        togo = re.sub(ur"[,$]", "", togo)
                    details = [timestamp, progress, raised, togo, donors]
                    self.logger.info("profile %s details: %s" % (name, str(details)))
                    #self.save_profile_details(os.path.join(outputDir, '%s.txt' % (name)), details)
                    self.save_profile_details(os.path.join(outputDir, 'data.csv'), details)
                    result = True
        except Exception as ex:
            self.logger.exception(LogHelper.getExceptionMsg(ex, "Exception: %s: %s " % ("parseProfile", name)))
        finally:
            pass
        return result
Exemplo n.º 8
0
 def saveToFile(filename, data):
     file = None
     try:
         file = open(filename, mode='wb')
         file.write(data)
     except Exception as ex:
         logger.exception(LogHelper.getExceptionMsg(ex, "unable to save file: %s" % filename))
     finally:
         if (file != None):
             file.close()
Exemplo n.º 9
0
    def savePatientInfos(self):
        if (self.patientInfosFile):
            self.patientInfosFile.close()
            self.patientInfosFile = None

        file = None
        try:
            file = open(self.patientInfosFilename, 'w').close()
        except Exception, ex:
            self.logger.exception(LogHelper.getExceptionMsg(ex, "error: can't write cache file"))
Exemplo n.º 10
0
 def saveToFile(self, filename, data):
     file = None
     try:
         file = open(filename, mode='wb')
         file.write(data)
     except Exception as ex:
         self.logger.exception(LogHelper.getExceptionMsg(ex, "unable to save file: %s" % (filename)))
     finally:
         if (file):
             file.close()
Exemplo n.º 11
0
def get_plate_files(plate_name, cnt=-1):
    def parse_file_name(f):
        n = os.path.basename(f)
        parts = [f, n, plate_name] + NAME_PARSER.split(n)[1:6]
        if len(parts[6]) == 1:
            parts[6] = '0' + parts[6]
        frm = Frame._make(parts)
        return frm

    tree = LogHelper.time(lambda: [PathNode._make(t) for t in os.walk(ROOT_DIR + "\\data", topdown=False)])
    plate = pydash.find(tree, lambda p: p.root.endswith(plate_name))
    LogHelper.logText(plate.root)
    if plate.dirs:
        files = glob.glob(plate.root + "\\*\\*.tif")
    else:
        files = glob.glob(plate.root + "\\*.tif")
    if cnt > 0:
        files = random.sample(files, cnt)
    parsed = map(parse_file_name, files)
    return parsed
Exemplo n.º 12
0
def dilum():
    # ilum = imread(r"T:\NewImaging\w2-2018-03-15-16-02-09-smooth.tiff")
    parsed = get_plate_files("59798")
    for w in ['w2']:
        files = filter(lambda f: f.wave == w[1], parsed)
        for i, frame in enumerate(files[0:1], 1):
            img = imread(frame.fullpath)
            r1 = rescale_intensity(img, out_range=np.uint8).astype(np.uint8)
            mn = img.min()
            mx = img.max()
            mean = np.mean(img)
            std = np.std(img)
            img[img > (mean + std)] = mn
            r2 = rescale_intensity(img, in_range=(mn, mx), out_range=np.uint8).astype(np.uint8)
            s = np.stack((r1, r1, r2), 2)
            # img2 = (np.int32(img) - ilum)
            # img3 = np.clip(img2, 0, None)
            # img4 = rescale_intensity(img3, out_range=np.uint8).astype(np.uint8)
            out_name = "{0}\\{1}{2}-{3}-{4}.png".format(ROOT_DIR, frame.row, frame.column, frame.site, LogHelper.init_ts)
            imsave(out_name, s)
            LogHelper.logText("*************** {0:s} ***************".format(out_name))
Exemplo n.º 13
0
 def save_profile_details(self, filename, list):
     file = None
     try:
         file = open(filename, "ab")
         writer = csv.writer(file)
         writer.writerow(list)
         file.flush()
     except Exception as ex:
         self.logger.exception(LogHelper.getExceptionMsg(ex, "unable to save file: %s" % (filename)))
     finally:
         if (file != None):
             file.close()
Exemplo n.º 14
0
 def saveTextToFile(self, filename, data):
     file = None
     try:
         #file = codecs.open(filename, mode='w', encoding="utf-8")
         #file = codecs.open(filename, mode='wb')
         file = open(filename, mode='w')
         #file.write(u'\ufeff')  #codecs.BOM_UTF8
         file.write(data)
     except Exception as ex:
         self.logger.exception(LogHelper.getExceptionMsg(ex, "unable to save file: %s" % filename))
     finally:
         if (file):
             file.close()
Exemplo n.º 15
0
 def saveTextToFile(filename, data, encoding="utf-8"):
     file = None
     try:
         file = codecs.open(filename, mode='w', encoding=encoding)
         #file = codecs.open(filename, mode='wb')
         #file = open(filename, mode='w')
         #file.write(u'\ufeff')  #codecs.BOM_UTF8
         file.write(data)
     except Exception as ex:
         logger.exception(LogHelper.getExceptionMsg(ex, "unable to save file: %s" % filename))
     finally:
         if (file != None):
             file.close()
Exemplo n.º 16
0
def calculate_ilum():
    parsed = get_plate_files("59839")
    for w in ['2']:
        files = filter(lambda f: f.wave == w, parsed)[0:30]
        nof = len(files)
        # files = filter(lambda x: 's1' not in x and 's7' not in x, files)
        img0 = imread(files[0].fullpath)
        mp = np.argmax(np.bincount(img0.flat))
        s2 = mp - img0.min()
        accum = np.zeros_like(img0, dtype=np.int32)
        accum_cnt = np.ones_like(img0, dtype=np.int32)
        thresh_w = np.uint16(filters.threshold_otsu(img0))
        prt = (img0 > thresh_w).sum() * 1.0 / len(img0.flat)
        if prt > 0.2:
            thresh_w = img0.mean() + 10 * img0.std()
        LogHelper.logText('{0}'.format(thresh_w))
        # ls = 0
        for i in range(nof):
            frame = files[i]
            img = imread(frame.fullpath)
            mp = np.argmax(np.bincount(img.flat))
            s2 = mp - img0.min()
            t = mp + s2
            # LogHelper.logText('%4d-%4d-%5d (%.0f)' % (img.min(), img.mean(), img.max(), img.std()))
            img[img >= t] = 0
            accum += img
            accum_cnt += (img != 0)
            # av = (accum / accum_cnt).astype(np.uint16)
            # avs = filters.laplace(av, 31)
            # s = avs.std()
            # ds = abs(s - ls)
            LogHelper.logText('%3d of %d w%s# %s %d' % (i+1, nof, w, frame.shortname(), t))
            # ls = s
            # if ds < LPTH:
            #     break

        stats_dir = os.path.join(ROOT_DIR, "%s-stats" % files[0].plate)
        try:
            os.mkdir(stats_dir)
        except WindowsError, e:
            assert(e.winerror == 183)  # 'Cannot create a file when that file already exists'
        filename = os.path.join(stats_dir, "%s-w%s-%%s.tif" % (LogHelper.init_ti, w))
        LogHelper.logText(filename)
        tifsave(filename % 'accum', accum)
        tifsave(filename % 'accum_cnt', accum_cnt)

        avg_u = (accum / accum_cnt).astype(np.uint16)
        tifsave(filename % 'avg_u', avg_u)
        smooth = filters.rank.mean(avg_u, disk(50))
        tifsave(filename % 'smooth', smooth)
Exemplo n.º 17
0
def calculate_stats():
    # d = imread(r"T:\NewImaging\w2-2018-03-16-18-47-26-smooth.tiff")
    # dmn, dmd, dmx = np.percentile(d, (0.1, 50, 99.9))
    # LogHelper.logText(
    #     '%4d-%4d-%4d-%4d-%5d, %.2f-%.2f'
    #     % (d.min(), dmn, dmd, dmx, d.max(), np.mean(d), np.std(d))
    # )
    parsed = get_plate_files("59833", 30)
    for w in ['1']:
        files = filter(lambda f: f.wave == w, parsed)
        # files = filter(lambda x: 's1' not in x and 's7' not in x, files)
        nof = len(files)
        p50s = []
        mps = []
        means = []
        s2s = []
        stds = []
        for i, frame in enumerate(files, 1):
            LogHelper.logText("%3d of %d - %s" % (i, nof, frame.shortname()))
            img = imread(frame.fullpath)
            p1, p50, p99 = np.percentile(img, (1, 50, 99))
            mp = np.argmax(np.bincount(img.flat))  # type: np.uint16
            s2 = mp - img.min()
            # r1 = measure.block_reduce(img, (30, 30), func=np.std)
            # r2 = measure.block_reduce(img, (20, 20), func=np.std)
            # l = abs(filters.laplace(img, 3))
            LogHelper.logText(
                '%4d-%4d # %4d-%4d # %4d-%d-%4d #%4d-%4d' % (img.min(), img.max(), p1, p99, p50, mp, img.mean(), s2, img.std())
                # "%08.8f %08.8f" % (r1.max() / 30, r2.max() / 20)
                ### good focus s > 4
                # % (i, nof, img.min(), p1, p50, p99, img.max(), l.sum(), l.std())
            )
            p50s.append(p50)
            mps.append(mp)
            means.append(img.mean())
            s2s.append(s2)
            stds.append(img.std())
            # dm = cv2.subtract(img, d)
            # p1, p50, p99 = np.percentile(dm, (0.1, 50, 99.9))
            # LogHelper.logText(
            #     '%3d of %d, %4d-%4d-%4d-%4d-%5d, %.0f-%.0f'
            #     % (i, nof, dm.min(), p1, p50, p99, dm.max(), np.mean(dm), np.std(dm))
            # )
        LogHelper.logText(
            '%4d-%d-%4d #%4d-%4d' % (np.std(p50s), np.std(mps), np.std(means), np.std(s2s), np.std(stds))
        )
Exemplo n.º 18
0
 def loadPatientInfos(self):
     if (os.path.isfile(self.patientInfosFilename)):
         file = None
         try:
             file = open(self.patientInfosFilename, "r")
             while True:
                 line = file.readline()
                 if not line: break
                 if line == "": continue
                 newEntry = ast.literal_eval(line)
                 if (isinstance(newEntry, dict) and "id" in newEntry):
                     id = newEntry[self.cache_key_id]
                     if (id in self.dictPatientInfos):
                         entry = self.dictPatientInfos[id]
                     else:
                         entry = {}
                         self.dictPatientInfos[id] = entry
                     for key in newEntry:
                         entry[key] = newEntry[key]
             self.logger.info("%d patient infos loaded" % len(self.dictPatientInfos))
         except Exception, ex:
             self.logger.exception(LogHelper.getExceptionMsg(ex, "Can't read cache file"))
         finally:
Exemplo n.º 19
0
def calculate_empty_stats():
    parsed = get_plate_files("empty")
    for w in ['w1']:
        files = filter(lambda f1: f1.wave == w[1], parsed)
        nof = len(files)
        stats = []
        for i, f in enumerate(files):
            img = imread(f.fullpath)
            h = np.unique(img)
            p0_1, p99_9 = np.percentile(img, (1, 99)).astype(np.uint16)
            mn = img.min()
            mx = img.max()
            mean = img.mean()
            std = img.std()
            st = [f.row, f.column, f.site, i, nof, mn, p0_1, p99_9, mx, mean, std, mx-p99_9, len(h)]
            stats.append(st + [f])
            LogHelper.logText('%s%s%s %3d of %d, %4d-%4d-%4d-%5d, %.2f-%.2f-%3d %d' % tuple(st))
        stats.sort(key=lambda s: s[8])
        s = stats[0]
        fn = s.pop()
        LogHelper.logText('%s%s%s %3d of %d, %4d-%4d-%4d-%5d, %.2f-%.2f-%3d %d' % tuple(s))
        LogHelper.logText(fn.filename)
Exemplo n.º 20
0
def stitch():
    DIM = 1080
    sDIM = 216 + 4
    DIMP = sDIM + 5
    plate_name = '59476'
    parsed = get_plate_files(plate_name)
    superdim = (DIMP*4*6, DIMP*4*10, 3)
    superframe = np.ones(superdim, dtype=np.uint8) * 255
    for f in parsed:
        LogHelper.logText(f.filename)
        s = int(f.site) - 1
        c = int(f.column) - 2
        r = (ord(f.row) - ord('B'))
        y = sDIM*(s % 4) + DIMP*4*c
        x = sDIM*(s / 4) + DIMP*4*r
        img = imread(f.fullpath)
        img = rescale(img, 0.2, multichannel=True, preserve_range=True)
        imgp = np.pad(img, ((0,4), (0,4), (0,0)), 'constant', constant_values=128)
        LogHelper.logText('{0} read to go {1}x{2}'.format(f.filename, x, y))

        superframe[x:x+imgp.shape[0], y:y+imgp.shape[1]] = imgp
        LogHelper.logText(f.filename + ' placed')

    imsave(ROOT_DIR + '\\59476super.png', superframe)
Exemplo n.º 21
0
def redis_public(o_redis, msg):
    #写入redis并发出消息
    o_redis.public(msg)


print(sys.getdefaultencoding())

print('--------------------------------------read config')
#print(config_configparser.config_write())
config = config_configparser.config_read()
print(config)
log_file = (config['DEFAULT']['server action'])

b_loop = True
log = LogHelper(log_file)

# 实例化RedisHelper类对象
str_r_ip = config['redis']['ip2']
str_r_port = config['redis']['port2']
str_r_pwd = config['redis']['pwd2']
str_r_chan = config['redis']['chan1']
str_r_db = config['redis']['db']
str_r_chan2 = config['redis']['chan2']

#打印配置文件
lists_header = config.sections()
str_config = ""
for secs in lists_header:
    for key in config[secs]:
        str_config = str_config + " " + key + ":" + config[secs][key]
Exemplo n.º 22
0
    def parsePages(self):
        try:
            pageIndex = 1
            #urlNextPage = 'https://watsi.org/fund-treatments/page/129'
            urlNextPage = 'https://watsi.org/fund-treatments/'
            while True:
                self.logger.info('downloading page %d...' % pageIndex)
                currentPage = urlNextPage
                status, page = self.downloader.download_page(currentPage,
                                                             self.htmlDir,
                                                             self.assetDir,
                                                             'page%05d.html' % (pageIndex),
                                                             css=False,
                                                             javascript=False,
                                                             image=False)
                if (page == None):
                    self.logger.warn("error: downloading page %d" % (pageIndex))
                    break
                elif (status != 200):
                    self.logger.warn("http response: %s %s" % (status, currentPage))
                    break
                else:
                    self.logger.info('parsing page %d...' % pageIndex)
                    #find next page's url
                    nodes = page.xpath(u"//a[text()='Next ›']")
                    urlNextPage = urlparse.urljoin(currentPage, nodes[0].attrib['href']) if (len(nodes) > 0) else None

                    items = page.xpath(u"//div[@class='profiles']/ul/li")
                    if (items):
                        for item in items:
                            id = item.attrib["id"]
                            node = self.find_element_by_xpath(item, u".//div/a")
                            url = self.get_attrib(node, "href", None)
                            urlProfile = urlparse.urljoin(currentPage, url) if url else None
                            node = self.find_element_by_xpath(item,
                                                              u".//*[@class='info-bar']")  #info-bar 會在 <p> or <div> 中
                            title = node.text if node != None else ""
                            node = self.find_element_by_xpath(item, u".//p[@class='profile-description']")
                            description = node.text if node != None else ""
                            node = self.find_element_by_xpath(item, u".//div[@class='cont']/a/img")
                            imgSrc = self.get_attrib(node, "src", "")

                            #Progress
                            node = self.find_element_by_xpath(item, u".//div[@class='meter orange nostripes']/span")
                            progressStr = self.get_attrib(node, "style", "")
                            list = re.findall(ur"[;^]*?\s*?width:\s*([,\d]*)", progressStr)
                            progress = None
                            if(len(list)==1):
                                progress = list[0]

                            #togo raised donors
                            togo = None
                            raised = None
                            donors = None

                            if(title=="The Universal Fund"):
                                continue
                            else:
                                list = re.findall(ur"\$?([,\d]*)\s*(.*?)\s*\|\s*([,\d]*)\s*(.*)", title)
                                if (len(list) == 1 and len(list[0]) == 4):
                                    values = list[0]
                                    if (values[1] != None and values[1].lower() == "raised"):
                                        raised = values[0]
                                        donors = values[2]
                                    elif (values[1] != None and values[1].lower() == "to go"):
                                        togo = values[0]
                                        donors = values[2]
                                    else:
                                        self.logger.error("invalid reg pattern for %s in page %s" % (title, currentPage))
                                        continue
                                else:
                                    self.logger.error("invalid reg pattern for %s in page %s" % (title, currentPage))
                                    continue

                            if (raised != None):
                                raised = re.sub(ur"[,$]", "", raised)
                            if (togo != None):
                                togo = re.sub(ur"[,$]", "", togo)
                            if (donors != None):
                                donors = re.sub(ur"[,$]", "", donors)

                            #Log.i("%s %s" %(id, progress))
                            #Log.i("%s %s" %(id, urlProfile))
                            # Log.i("%s %s" %(id, title))
                            # Log.i("\t%s" % description)
                            # Log.i("\t%s" % imgSrc)
                            outputDir = os.path.join(os.path.join(self.profileDir, id[-1:]), id)
                            if (progress == '0' and not (os.path.isdir(outputDir))):
                                os.makedirs(outputDir)
                            if not os.path.exists(outputDir):
                                continue
                            if (self.getPrevProgressById(id) != '100'):
                                self.parseProfile(id, urlProfile, outputDir, donors)
                            else:
                                self.ensureProfileDownloaded(id, urlProfile, outputDir)
                            self.saveOverallEntry(id, [id, urlProfile])
                            self.cache_profile_details(id, progress, raised, togo, donors)
                        self.logger.info("%d items found" % (len(items)))
                    if (not items):
                        self.logger.info("items not found")
                        break
                    if (len(items) == 0):
                        self.logger.info("items length == 0")
                        break
                    if (not urlNextPage):
                        self.logger.info("NextPage not found")
                        break
                    pageIndex += 1
            self.savePatientInfos()
            self.logger.info('done!')
        except Exception as ex:
            self.logger.exception(LogHelper.getExceptionMsg(ex, "parsePages"))
        finally:
            pass
Exemplo n.º 23
0
def MainProcess(uperList, saveRootPath, concurrency = 3):
    logger = LogHelper('Bili', cmdLevel='INFO', fileLevel="DEBUG").logger
    pp = None
    try:
        # --------------------------------------------------------------
        # 进行每个 UP 主视频页数的获取
        pp = PreProcess(logger = logger, uperList=uperList)
        pp.ScanLoclInfo(saveRootPath)
        pp.Process()
        # --------------------------------------------------------------
        # 爬取要下载视频的 url
        for uper in pp.uperList:
            logger.info(uper.UserName + " Spider Start···")
            OneSpiderRetryTimes = 0
            # 打算下载的数量,要去网络动态获取的数量进行对比
            while ((uper.NeedDownloadFilmCount > len(uper.VideoInfoDic_NetFileName) or len(uper.ErrorUrl_Dic) > 0) and OneSpiderRetryTimes <= 10):
                # dd = BiliSpider()
                # GithubDeveloperSpider
                BiliSpider.start(logger = logger,
                                uper = uper,
                                saveRootPath = saveRootPath,
                                concurrency = concurrency,
                                middleware=middleware)
                                
                OneSpiderRetryTimes = OneSpiderRetryTimes + 1
                logger.info("Try Spider " + uper.UserName + " " + str(OneSpiderRetryTimes) + " times.")
                RandomSleep()
                
            logger.info(uper.UserName + " Spider Done.")

            if OneSpiderRetryTimes > 10:
                logger.error(uper.UserName + " Spider Retry " + str(OneSpiderRetryTimes) + "times.")
                logger.error("Error Url:")
                for eUrl in uper.ErrorUrl_Dic:
                    logger.error(eUrl)
            else:
                # 本地应该原有+准备要下载的 != 网络总数,需要提示
                if len(uper.VideoInfoDic_NetFileName) != len(uper.VideoInfoDic_loaclFileName):
                    logger.warn("VideoInfoDic_NetFileName Count: " + str(len(uper.VideoInfoDic_NetFileName)) 
                        + " != VideoInfoDic_loaclFileName Count: " + str(len(uper.VideoInfoDic_loaclFileName))
                    )
            uper.ErrorUrl_Dic.clear()

        logger.info("Spider All Done.")
        # --------------------------------------------------------------
        logger.info("Start Download"+ "----" * 20)
        # 开始下载
        # 先对 local 与 net 的字典进行同步
        logger.info("Start Sync Dic")
        for uper in pp.uperList:
            iNeedDl = 0
            for fileName, oneVideo in zip(uper.VideoInfoDic_loaclFileName.keys(), uper.VideoInfoDic_loaclFileName.values()):
                if fileName in uper.VideoInfoDic_NetFileName:
                    uper.VideoInfoDic_NetFileName[fileName].isDownloaded = oneVideo.isDownloaded
                    if oneVideo.isDownloaded == False:
                        iNeedDl = iNeedDl + 1
            logger.info(uper.UserName + "NetFile / LocalFile -- NeedDl: " + str(len(uper.VideoInfoDic_NetFileName)) + " / " + str(len(uper.VideoInfoDic_loaclFileName)) + " -- " + str(iNeedDl))
        logger.info("End Sync Dic")
        for uper in pp.uperList:
            directory = os.path.join(saveRootPath, uper.UserName)
            for fileName, oneVideo in zip(uper.VideoInfoDic_NetFileName.keys(), uper.VideoInfoDic_NetFileName.values()):
                if oneVideo.isDownloaded == True:
                    continue
                DownloadRetryTimes = 0
                oneRe = False
                while oneRe is False and DownloadRetryTimes <= 10:
                    oneRe = Downloader(logger, directory, oneVideo.time, oneVideo.title, oneVideo.url).ProcessOne()
                    DownloadRetryTimes = DownloadRetryTimes + 1
                    logger.info("Try Download " + str(DownloadRetryTimes) + " times.")
                    RandomSleep()

                if OneSpiderRetryTimes > 10:
                    logger.error("Retry Download " + str(DownloadRetryTimes) + " times.")
                    logger.error("Error Url: " + oneVideo.url)
                # 标记下载完成
                if oneRe:
                    oneVideo.isDownloaded = True
                    uper.ThisTimeDownloadCount = uper.ThisTimeDownloadCount + 1
                    

    except Exception as ex:
        errInfo = "Catch Exception: " + str(ex)
        logger.error(errInfo)
    finally:
        logger.info("finally"+ "----" * 20)
        for uper in pp.uperList:
            logger.info("This Time Download: " + uper.UserName + " -- " + str(uper.ThisTimeDownloadCount))
        for uper in pp.uperList:
            for fileName, oneVideo in zip(uper.VideoInfoDic_NetFileName.keys(), uper.VideoInfoDic_NetFileName.values()):
                if oneVideo.isDownloaded == False:
                    logger.error('Download Fail:' + uper.UserName)
                    logger.error(oneVideo.url)
        logger.info("All Done.")
Exemplo n.º 24
0
    configInfo.barkurl = cf.get("BarkConfig", "barkurl")
    configInfo.barkapikey = cf.get("BarkConfig", "barkapikey")
    configInfo.notifyurl = cf.get("BarkConfig", "notifyurl")
    configInfo.repeatTimes = int(cf.get("DownloadConfig", "repeatTimes"))
    configInfo.delay = int(cf.get("DownloadConfig", "delay"))

    return configInfo


if __name__ == '__main__':
    # --------------------------------------------------------------
    # 读取外部配置
    configInfo = ReadConfigIni()

    while configInfo.repeatTimes > 0 or configInfo.repeatTimes == -1:
        logger = LogHelper('Bili', cmdLevel='INFO', fileLevel="DEBUG").logger

        try:
            logger.info('repeatTimes = ' + str(configInfo.repeatTimes))
            # --------------------------------------------------------------
            # 设置需要下载的信息
            # 每个 UP 主视频
            downloadlistfile = 'DownloadList.txt'
            if os.path.exists(downloadlistfile) == True:
                filmList = ReadDownloadList(downloadlistfile)
            else:
                logger.error("DownloadList.txt not found")
                raise Exception("DownloadList.txt not found")

            uperList = ReadDownloadList(downloadlistfile)
Exemplo n.º 25
0
    def saveProfile(self, profileName, dir, reportUrl, detailUrl, overallEntry):
        assetdir = os.path.join(dir, "files" + os.sep)
        if (not os.path.isdir(dir)):
            os.makedirs(dir)
        if (not os.path.isdir(assetdir)):
            os.makedirs(assetdir)

        status, page = self.downloader.download_page(reportUrl,
                                                     dir,
                                                     assetdir,
                                                     '%s_origin.htm' % (profileName),
                                                     css=False,
                                                     javascript=False,
                                                     image=False)
        #self.downloader.clear_cache()
        
        if (page != None):
            reporter = None
            reportContent = ""
            #headers
            items = page.xpath(u"//*[@id='maincontent']//article/header/hgroup/*")
            for item in items:
                header = StrHelper.trim(item.text_content())
                if (header != None and header.startswith(profileName)):
                    header = StrHelper.trim(header[len(profileName):])
                reportContent += header + os.linesep
                break
            reportContent += os.linesep
            #content
            reg = re.compile(ur"^基金會編號.*$", re.MULTILINE)
            allsymbols = ur" ,、。.?!~$%@&#*‧;︰…‥﹐﹒˙·﹔﹕‘’“”〝〞‵′〃├─┼┴┬┤┌┐╞═╪╡│▕└┘╭╮╰╯╔╦╗╠═╬╣╓╥╖╒╤╕║╚╩╝╟╫╢╙╨╜╞╪╡╘╧╛﹣﹦≡|∣∥–︱—︳╴¯ ̄﹉﹊﹍﹎﹋﹌﹏︴﹨∕╲╱\/↑↓←→↖↗↙↘〔〕【】﹝﹞〈〉﹙﹚《》(){}﹛﹜『』「」<>≦≧﹤﹥︵︶︷︸︹︺︻︼︽︾︿﹀∩∪﹁﹂﹃﹄"
            regReporters = [  #re.compile(ur"[。:」\s]+(.{3,4})口述.?記者(.{3,4})(?:採訪整理)?$", re.MULTILINE),
                              re.compile(allsymbols + ur"[\s]+(.{2,4})[口筆]述\s?.?\s?記者(.{2,4})(?:採訪整理)?$", re.MULTILINE),
                              #[\u4e00-\u9fa5] 英文字符之外的字符,包括中文漢字和全角標點
                              re.compile(ur"報導.攝影.(.{2,4})記者$", re.MULTILINE),
                              re.compile(ur"報導.攝影.(.{2,4})$", re.MULTILINE),
                              re.compile(ur"攝影.報導.(.{2,4})$", re.MULTILINE),
                              re.compile(ur"攝影.(.{2,4})$", re.MULTILINE),
                              re.compile(ur"報導.(.{2,4})$", re.MULTILINE),
                              re.compile(ur"報導.(.{2,4})$", re.MULTILINE),
                              re.compile(ur"記者(.{2,4})採訪整理$", re.MULTILINE),
                              re.compile(ur"^【(.{2,4})╱.{2,4}報導】", re.MULTILINE), ]

            #preserve <br> tags as \n
            brs = page.xpath(u"//div[@class='articulum']//br")
            if (len(brs) == 0):
            	brs = page.xpath(u"//div[@class='articulum trans']//br")

            for br in brs:
                br.tail = "\n" + br.tail if br.tail else "\n"

            items = page.xpath(u"//div[@class='articulum']/*")
            if (len(items) == 0):
                items = page.xpath(u"//div[@class='articulum trans']/*")

            for item in items:
                tag = item.tag.lower()
                id = self.get_attrib(item, "id", None)
                # if (tag == "figure"): continue
                # if (tag == "iframe"): break
                if (id == "bcontent" or id == "bhead" or id == "introid"):
                    text = StrHelper.trim(item.text_content())
                    if (text == None or text == ""): continue
                    if (id != "bhead"):
                        for regReporter in regReporters:
                            list = regReporter.findall(text)
                            if (len(list) == 1):
                                if (not isinstance(list[0], basestring)):
                                    reporter = "/".join(list[0])
                                else:
                                    reporter = list[0]
                                text = StrHelper.trim(regReporter.sub('', text))
                                break
                        if (reporter):
                            overallEntry.reporter = reporter
                        else:
                            self.logger.warn("error: parsing reporter: %s" % reportUrl)

                    text = StrHelper.trim(reg.sub('', text))
                    reportContent += text + os.linesep + os.linesep
            FileHelper.saveToFile(os.path.join(dir, reportFileName), reportContent)

        status, page = self.downloader.download_page(detailUrl,
                                                     dir,
                                                     assetdir,
                                                     detailSrcFileName,
                                                     css=False,
                                                     javascript=False,
                                                     image=False)
        if (page != None):
            items = page.xpath(u"//div[@id='charitysidebox3'][1]/div[@id='inquiry3']/table//tr")
            maxDate = None
            if (len(items) > 0):
                file = None
                try:
                    file = open(os.path.join(dir, detailFileName), "wb")
                    csvwriter = csv.writer(file)
                    for index, item in enumerate(items):
                        if (index > 1):
                            cols = item.xpath(u".//td")
                            if (len(cols) == 4):
                                no = StrHelper.trim(cols[0].text)
                                name = StrHelper.trim(cols[1].text)
                                amount = StrHelper.trim(cols[2].text)
                                dateStr = StrHelper.trim(cols[3].text)
                                try:
                                    date = datetime.datetime.strptime(dateStr, "%Y/%m/%d")
                                    if (maxDate == None or date > maxDate):
                                        maxDate = date
                                except Exception as ex:
                                    self.logger.warn("error date format:%s in %s" % (dateStr, detailUrl))
                                csvwriter.writerow([no, dateStr, amount, name])
                    overallEntry.enddate = maxDate.strftime("%Y/%m/%d") if maxDate != None else ""
                    overallEntry.doners = len(items) - 2
                except Exception as ex:
                    self.logger.exception(LogHelper.getExceptionMsg(ex, "error paring detail.html"))
                finally:
                    if (file):
                        file.close()
Exemplo n.º 26
0
    def parsePages(self):
        try:
            pageIndex = 1
            while True:
                urlNextPage = 'http://search.appledaily.com.tw/charity/projlist/Page/%d' % pageIndex
                self.logger.info('downloading page %d...' % pageIndex)
                currentPage = urlNextPage
                status, page = self.downloader.download_page(currentPage,
                                                             self.htmlDir,
                                                             self.assetDir,
                                                             'page%05d.html' % (pageIndex),
                                                             css=False,
                                                             javascript=False,
                                                             image=False)

                if (page == None):
                    self.logger.warn("error: downloading page %d" % (pageIndex))
                    break
                elif (status != 200):
                    self.logger.warn("http response: %s %s" % (status, currentPage))
                    break
                else:
                    self.logger.info('parsing page %d...' % pageIndex)

                    items = page.xpath(u"//tr[@class='odd']")
                    row = 0
                    if (items):
                        for item in items:
                            nodes = item.xpath(u".//td")
                            if (len(nodes) == 6):
                                reportUrl = None
                                detailUrl = None
                                title = None
                                row += 1
                                id = nodes[0].text
                                if( id[0] == 'A'):
                                    node = nodes[1].xpath(u".//a")
                                    if len(node) > 0:
                                        title = node[0].text
                                        reportUrl = urlparse.urljoin(currentPage, self.get_attrib(node[0], "href", None))
                                    else:
                                        self.logger.warn("title not found")
                                    date = nodes[2].text
                                    status = str(nodes[3].text_content())
                                    amount = nodes[4].text
                                    node = nodes[5].xpath(u".//a")
                                    if len(node) > 0:
                                        detailUrl = urlparse.urljoin(currentPage, self.get_attrib(node[0], "href", None))
                                    else:
                                        self.logger.warn("detail not found")

                                    if (title == None):
                                        self.logger.warn("title not found")
                                    if (title == None or reportUrl == None or detailUrl == None):
                                        self.logger.warn("parse error!!!")
                                    if (status == u"已結案"):
                                        dir = os.path.join(self.profileDir, id[-1:] + os.sep + id + os.sep)
                                        dirRm = os.path.join(self.profileDir, u"未結案" + os.sep + id[-1:] + os.sep + id + os.sep)
                                        if (self.getIsProfileSaved(dirRm)):
                                            shutil.rmtree(dirRm, ignore_errors=True)

                                        if (not self.getIsProfileSaved(dir)):
                                            #self.logger.warn("saving profile: page %d, id %s" % (pageIndex, id))
                                            overallEntry = OverallEntry()
                                            overallEntry.id = id
                                            overallEntry.title = StrHelper.trim(title)
                                            overallEntry.total = amount
                                            overallEntry.begindate = date
                                            overallEntry.reportUrl = reportUrl
                                            overallEntry.detailUrl = detailUrl

                                            self.logger.info("saving profile %s" % id)

                                            # FIXME: IOError: [Errno 2] No such file or directory: appledaily/profiles/\u672a\u7d50\u6848/
                                            dir = dir.replace(u"未結案"+os.sep, '')
                                            self.saveProfile(id, dir, reportUrl, detailUrl, overallEntry)
                                            self.saveOverallEntry(overallEntry.id, [overallEntry.id,
                                                                                    overallEntry.begindate,
                                                                                    overallEntry.enddate,
                                                                                    overallEntry.total,
                                                                                    overallEntry.doners,
                                                                                    overallEntry.title,
                                                                                    overallEntry.reporter,
                                                                                    overallEntry.reportUrl,
                                                                                    overallEntry.detailUrl])
                                        self.saveUrls(dir, reportUrl, detailUrl)
                                        #self.saveMetadata(dir, title, date, amount)
                                    elif (status == u"未結案"):
                                        dir = os.path.join(self.profileDir, u"未結案" + os.sep + id[-1:] + os.sep + id + os.sep)
                                        overallEntry = OverallEntry()
                                        overallEntry.id = id
                                        overallEntry.title = StrHelper.trim(title)
                                        overallEntry.total = amount
                                        overallEntry.begindate = date
                                        overallEntry.reportUrl = reportUrl
                                        overallEntry.detailUrl = detailUrl
                                        self.logger.info("saving profile %s" % id)
                                        self.saveProfile(id, dir, reportUrl, detailUrl, overallEntry)
                                        self.saveOverallEntryPending(overallEntry.id, [overallEntry.id,
                                                                                overallEntry.begindate,
                                                                                overallEntry.enddate,
                                                                                overallEntry.total,
                                                                                overallEntry.doners,
                                                                                overallEntry.title,
                                                                                overallEntry.reporter,
                                                                                overallEntry.reportUrl,
                                                                                overallEntry.detailUrl])
                                        self.saveUrls(dir, reportUrl, detailUrl)
                                        # pass
                                    else:
                                        self.logger.warn("unknown status")

                    self.logger.info("%d items found" % (row))
                    if (row == 0):
                        break
                    if (not items):
                        self.logger.info("items not found")
                        break
                    if (len(items) == 0):
                        self.logger.info("items length == 0")
                        break
                    pageIndex += 1
            self.logger.info('done!')
        except Exception as ex:
            self.logger.exception(LogHelper.getExceptionMsg(ex, "parsePages"))
        finally:
            pass
Exemplo n.º 27
0
def main():
    logger = LogHelper('ZiMuZuHelper', cmdLevel='INFO',
                       fileLevel="DEBUG").logger
    onething = WanKeYunApi.WanKeYunApi(logger)
    bok = onething.LoginEx(user="******", passwd="1234567890")
    if bok is False:
        return
    bok = onething.GetUSBInfo()
    if bok is False:
        return
    bok = onething.RemoteDlLogin()
    if bok is False:
        return
    bok = onething.GetRemoteDlInfo()
    if bok is False:
        return
    # --------------------------------------------------------------------------------
    # bok, mediaInfo = onething.UrlResolve('ed2k://|file|%E9%BB%84%E7%9F%B3.Yellowstone.2018.S01E07.%E4%B8%AD%E8%8B%B1%E5%AD%97%E5%B9%95.WEB.720P-%E4%BA%BA%E4%BA%BA%E5%BD%B1%E8%A7%86.mp4|559753916|bdb7746c12f23558420a1bfd610e8bb5|h=xavxscmhtkwu4bl52jiqnmow6pa6ntdf|/')
    # --------------------------------------------------------------------------------
    JobList = []
    OneJob = {
        "filesize":
        0,
        "name":
        '黄石.Yellowstone.2018.S01E07.中英字幕.WEB.720P-人人影视.mp4',
        "url":
        'ed2k://|file|%E9%BB%84%E7%9F%B3.Yellowstone.2018.S01E07.%E4%B8%AD%E8%8B%B1%E5%AD%97%E5%B9%95.WEB.720P-%E4%BA%BA%E4%BA%BA%E5%BD%B1%E8%A7%86.mp4|559753916|bdb7746c12f23558420a1bfd610e8bb5|h=xavxscmhtkwu4bl52jiqnmow6pa6ntdf|/',
    }
    OneJob2 = {
        "filesize":
        0,
        "name":
        '黄石.Yellowstone.2018.S01E08.中英字幕.WEB.720P-人人影视.mp4',
        "url":
        'ed2k://|file|%E9%BB%84%E7%9F%B3.Yellowstone.2018.S01E08.%E4%B8%AD%E8%8B%B1%E5%AD%97%E5%B9%95.WEB.720P-%E4%BA%BA%E4%BA%BA%E5%BD%B1%E8%A7%86.mp4|472873520|c273bf00703b45225f2056393d6de87f|h=yq4vc2vndh2fnqdiwnhnqapwh7xcvlrw|/',
    }
    OneJob3 = {
        "filesize":
        0,
        # "name": '【幻櫻字幕組】【一拳超人 第二季 ONE PUNCH MAN S2】【OVA】【02】【BIG5_MP4】【1280X720】.mp4',
        "name":
        '123.mp4',
        "url":
        "magnet:?xt=urn:btih:UK32AE3T2R3UOBAPDVZJ6W35T7DRSFGJ&dn=&tr=http%3A%2F%2F104.238.198.186%3A8000%2Fannounce&tr=udp%3A%2F%2F104.238.198.186%3A8000%2Fannounce&tr=http%3A%2F%2Ftracker.openbittorrent.com%3A80%2Fannounce&tr=udp%3A%2F%2Ftracker3.itzmx.com%3A6961%2Fannounce&tr=http%3A%2F%2Ftracker4.itzmx.com%3A2710%2Fannounce&tr=http%3A%2F%2Ftracker.publicbt.com%3A80%2Fannounce&tr=http%3A%2F%2Ftracker.prq.to%2Fannounce&tr=http%3A%2F%2Fopen.acgtracker.com%3A1096%2Fannounce&tr=https%3A%2F%2Ft-115.rhcloud.com%2Fonly_for_ylbud&tr=http%3A%2F%2Ftracker1.itzmx.com%3A8080%2Fannounce&tr=http%3A%2F%2Ftracker2.itzmx.com%3A6961%2Fannounce&tr=udp%3A%2F%2Ftracker1.itzmx.com%3A8080%2Fannounce&tr=udp%3A%2F%2Ftracker2.itzmx.com%3A6961%2Fannounce&tr=udp%3A%2F%2Ftracker3.itzmx.com%3A6961%2Fannounce&tr=udp%3A%2F%2Ftracker4.itzmx.com%3A2710%2Fannounce&tr=http%3A%2F%2Fnyaa.tracker.wf%3A7777%2Fannounce"
    }
    JobList.append(OneJob)
    JobList.append(OneJob2)
    JobList.append(OneJob3)
    # --------------------------------------------------------------------------------
    # 创建批量下载任务示例,原生,需要自己填入,需要下载到那个磁盘
    # 一般就是磁盘 0
    # partitionID = 0
    # rootPath = onething.user_info["usb_info"][1]['partitions'][partitionID]['path']
    # remoteLocation = rootPath + self.defaultPath
    # remoteLocation = remoteLocation.lower()
    # onething.CreateTasks(JobList, remoteLocation)
    # --------------------------------------------------------------------------------
    # 当玩客云关机再开机的时候,需要恢复为下载完成的任务,也可以操作暂停正在下载的任务
    # 查询下载的任务列表,下载完毕的也在内,需要过滤
    # nowDownloadingList = onething.user_info["remote_download_list"]["tasks"]
    # for oneTask in nowDownloadingList:
    #     iprogress = int(oneTask["progress"])
    #     if iprogress == 10000:
    #         pass
    #     else:
    #         # onething.StartRemoteDl(oneTask["id"])
    #         onething.PauseRemoteDl(oneTask["id"])
    # --------------------------------------------------------------------------------
    # 创建批量下载任务,扩展,会判断
    onething.AddDownloadTasks(JobList)
    # --------------------------------------------------------------------------------
    print("Done.")