Python getdata примеры, fetchdata.getdata Python примеры использования

Пример #1

0

Показать файл

Файл: aip_text_documents.py Проект: dimme/SwFlightPlanner

def parse_doc(path, icao, country, title, category):
    print "Parsing AIP doc"
    icao = icao.upper()
    assert len(icao) == 4
    url = fetchdata.getrawurl(path, country=country)
    ret = dict()
    ret['icao'] = icao
    ret['url'] = url
    ret['title'] = title
    ret['name'] = icao + " - " + title
    ret['category'] = category
    #data,nowdate=fetchdata.getdata(path,country=country,maxcacheage=7200)
    blobname = icao + "_" + category
    tmppath = os.path.join(os.getenv("SWFP_DATADIR"), "aiptext", icao)
    if not os.path.exists(tmppath):
        os.makedirs(tmppath)

    if path.lower().endswith("pdf"):
        outpath_inter = os.path.join(tmppath, blobname + ".tmp.html")

        def render(inputfile, outputfile):
            r = "pdftohtml -c -s -i -zoom 2 -noframes -nodrm %s %s" % (
                inputfile, outputfile
            )  #-s is not supported on older pdftohtml, and doesn't appear necessary either.
            print "running", r
            assert 0 == os.system(r)

        fetchdata.getcreate_derived_data_raw(path,
                                             outpath_inter,
                                             render,
                                             "html",
                                             country=country)

        whole = open(outpath_inter).read()

        fixed = (whole.replace("<BODY bgcolor=\"#A0A0A0\"",
                               "<BODY bgcolor=\"#FFFFFF\"").replace(
                                   "<TITLE>Microsoft Word - ", "<TITLE>"))

    else:
        assert path.endswith("html")
        fixed, date = fetchdata.getdata(path, country=country)

    cksum = md5.md5(fixed).hexdigest()
    outpath = os.path.join(tmppath, blobname + "." + cksum + ".html")
    f = open(outpath, "w")
    f.write(fixed)
    f.close()
    #print "Wrote raw:",out,outpath

    ret['checksum'] = cksum
    ret['date'] = fetchdata.get_filedate(outpath)
    ret['blobname'] = blobname

    return ret

Пример #2

0

Показать файл

Файл: aip_text_documents.py Проект: avl/SwFlightPlanner

def parse_doc(path,icao,country,title,category):
    print "Parsing AIP doc"
    icao=icao.upper()
    assert len(icao)==4
    url=fetchdata.getrawurl(path,country=country)
    ret=dict()
    ret['icao']=icao
    ret['url']=url
    ret['title']=title
    ret['name']=icao+" - "+title
    ret['category']=category
    #data,nowdate=fetchdata.getdata(path,country=country,maxcacheage=7200)
    blobname=icao+"_"+category
    tmppath=os.path.join(os.getenv("SWFP_DATADIR"),"aiptext",icao)
    if not os.path.exists(tmppath):
        os.makedirs(tmppath)
    
    if path.lower().endswith("pdf"):
        outpath_inter=os.path.join(tmppath,blobname+".tmp.html")
        def render(inputfile,outputfile):
            r="pdftohtml -c -s -i -zoom 2 -noframes -nodrm %s %s"%(inputfile,outputfile)  #-s is not supported on older pdftohtml, and doesn't appear necessary either.
            print "running",r
            assert 0==os.system(r)
                
        fetchdata.getcreate_derived_data_raw(
                    path,outpath_inter,render,"html",country=country)
        
        whole=open(outpath_inter).read()
        
        fixed=(whole.replace("<BODY bgcolor=\"#A0A0A0\"","<BODY bgcolor=\"#FFFFFF\"")
                .replace("<TITLE>Microsoft Word - ","<TITLE>"))
        
    else:
        assert path.endswith("html")
        fixed,date=fetchdata.getdata(path,country=country)
        
    cksum=md5.md5(fixed).hexdigest()
    outpath=os.path.join(tmppath,blobname+"."+cksum+".html")
    f=open(outpath,"w")
    f.write(fixed)        
    f.close()
    #print "Wrote raw:",out,outpath
        
    ret['checksum']=cksum
    ret['date']=fetchdata.get_filedate(outpath)
    ret['blobname']=blobname
    
    return ret

Пример #3

0

Показать файл

def lr_test(epo):
    x = tf.placeholder(tf.float32, shape=[None, 108])
    y = tf.placeholder(tf.float32, shape=[None])

    m = 1
    learning_rate = 0.3
    w = tf.Variable(tf.random_normal([108, m], 0.0, 0.5), name='u')

    W = tf.matmul(x, w)
    p2 = tf.reduce_sum(tf.nn.sigmoid(W), 1)

    pred = p2
    cost1 = tf.reduce_mean(
        tf.nn.sigmoid_cross_entropy_with_logits(logits=pred, labels=y))

    cost = tf.add_n([cost1])
    train_op = tf.train.FtrlOptimizer(learning_rate).minimize(cost)

    init_op = tf.group(tf.global_variables_initializer(),
                       tf.local_variables_initializer())
    sess = tf.Session()
    sess.run(init_op)
    train_x, train_y, test_x, test_y = getdata()

    result = []
    time_s = time.time()
    for epoch in range(0, epo):
        f_dict = {x: train_x, y: train_y}
        _, cost_, predict_ = sess.run([train_op, cost, pred], feed_dict=f_dict)
        auc = roc_auc_score(train_y, predict_)
        time_t = time.time()
        if epoch % 100 == 0:
            f_dict = {x: test_x, y: test_y}
            _, cost_, predict_test = sess.run([train_op, cost, pred],
                                              feed_dict=f_dict)
            test_auc = roc_auc_score(test_y, predict_test)
            print("%d %ld cost:%f,train_auc:%f,test_auc:%f" %
                  (epoch, (time_t - time_s), cost_, auc, test_auc))
            result.append([epoch, (time_t - time_s), auc, test_auc])

    pd.DataFrame(result, columns=['epoch', 'time', 'train_auc',
                                  'test_auc']).to_csv("data/lr.csv")

Пример #4

0

Показать файл

Файл: extract_segelsektorer.py Проект: avl/SwFlightPlanner

def extract_segel():
    segeldata,stamp=getdata("/ImageVaultFiles/id_21795/cf_78/Sektorer-2013-CU-rev1.TXT","segel")
    return list(getareas(segeldata,stamp))

Пример #5

0

Показать файл

Файл: parse_landing_chart.py Проект: dimme/SwFlightPlanner

def parse_landing_chart(path,arppos,icao,country='se',variant=''):
    icao=icao.upper()
    if variant and not variant.startswith("."):
        variant="."+variant
    print "Running parse_landing_chart"
    print "country:",country
    #p=parse.Parser(path,country=country)
    arppos=mapper.from_str(arppos)
    res=[]    
    #assert p.get_num_pages()<=2
    url=fetchdata.getrawurl(path,country=country)
    ret=dict()
    ret['url']=url
    data,nowdate=fetchdata.getdata(path,country=country,maxcacheage=7200)
    cksum=md5.md5(data).hexdigest()
    ret['checksum']=cksum
    #page=p.parse_page_to_items(0, donormalize=False)
    #ret['width']=page.width
    #ret['height']=page.height
    #width=page.width
    #height=page.height
    #scale=2048.0/min(width,height)
    #width*=scale
    #height*=scale
    #width=int(width+0.5)
    #height=int(height+0.5)
    
    blobname=icao+variant
    
    tmppath=os.path.join(os.getenv("SWFP_DATADIR"),"adcharts",icao)
    if not os.path.exists(tmppath):
        os.makedirs(tmppath)
    assert len(icao)==4
    outpath=os.path.join(tmppath,blobname+"."+cksum+".png")
    def render(inputfile,outputfile):
        ext=inputfile.split(".")[-1].lower()
        if ext=='jpg' or ext=='png':
            assert 0==os.system("convert -adaptive-resize 2500x2500 %s %s"%(inputfile,outputfile))            
        else:
            ext='pdf'
            r="pdftoppm -f 0 -l 0 -scale-to 2500 -png -freetype yes -aa yes -aaVector yes %s >%s"%(
                      inputfile,outputfile)
            print "rendering",r
            assert 0==os.system(r)
            
    ret['image']=blobname+"."+cksum+".png"
    fetchdata.getcreate_derived_data_raw(
                path,outpath,render,"png",country=country)

    
    fspath=fetchdata.getdatafilename(path,country=country)
    sizepts=None
    for line in os.popen("pdfinfo "+fspath):        
        m=re.match(r"\s*.age\s+size:\s*(\d+\.?\d*)\s*x\s*(\d+\.?\d*)\s*pts.*",line)
        if m:
            sizepts=(float(m.groups()[0]),float(m.groups()[1]))
    if sizepts:
        sizemm=(0.3527*sizepts[0],0.3527*sizepts[1])
        ret['mapsize']=sizemm
        print "Mapsize:",sizemm
    else:
        raise Exception("No size of this PDF!") 
    
    
    outpath2=os.path.join(tmppath,blobname+"."+cksum+".2.png")
    def greyscale(input,output):
        assert 0==os.system("convert -define png:color-type=3 -depth 8 -type Palette -define \"png:compression-level=9\" %s %s"%(input,output))
    
    fetchdata.getcreate_local_data_raw(
                outpath,outpath2,greyscale)
    i=Image.open(outpath2)
    width,height=i.size
    #ret['width']=page.width
    #ret['height']=page.height    
    ret['render_width']=width
    ret['render_height']=height
 
    if country!='raw':
        icao_prefix=get_icao_prefix(country)
        assert icao.startswith(icao_prefix)
    
    for level in xrange(5):
        hashpath=os.path.join(tmppath,"%s.%s-%d.bin"%(blobname,cksum,level))
        fetchdata.getcreate_local_data_raw(
                    outpath2,hashpath,lambda input,output:chop_up(input,output,level))    

    
    ret['blobname']=blobname
    ret['variant']=variant
    
    return ret

Пример #6

0

Показать файл

Файл: mlr.py Проект: MymInsomnia/CTR_material

def mlr_test(m, epo):
    """
    MLR模型的丐版实现，未添加L1、L2-1正则并实现在L1、L2-1正则下的参数更新。
    数据集比较曹丹，另L2-1正则和L2正则有多大区别呢？
    @param m: 结构先验个数
    @param epo: 训练轮数
    @return
    """

    m = m
    learning_rate = 0.3

    x = tf.placeholder(tf.float32, shape=[None, 108])  # 数据入口-x
    y = tf.placeholder(tf.float32, shape=[None])  # 数据入口-y

    u = tf.Variable(tf.random_normal([108, m], 0.0, 0.5), name='u')  # 初始化向量u
    w = tf.Variable(tf.random_normal([108, m], 0.0, 0.5), name='w')  # 初始化向量w

    U = tf.matmul(x, u)
    p1 = tf.nn.softmax(U)  # 得到结构先验-p1

    W = tf.matmul(x, w)
    p2 = tf.nn.sigmoid(W)  # 得到结构内的预测结果-p2

    pred = tf.reduce_sum(tf.multiply(p1, p2), 1)  # 在每一结构中的预测结果加权相加

    paras = tf.concat([w, u], 0)
    l1_loss = tf.contrib.layers.l1_regularizer(0.1)(paras)  # l1正则项
    l2_loss = tf.contrib.layers.l2_regularizer(0.1)(paras)  # l2正则项

    cost1 = tf.reduce_mean(
        tf.nn.sigmoid_cross_entropy_with_logits(logits=pred, labels=y) +
        l1_loss + l2_loss)

    cost = tf.add_n([cost1])
    train_op = tf.train.FtrlOptimizer(learning_rate).minimize(cost)
    train_x, train_y, test_x, test_y = getdata()
    time_s = time.time()
    result = []
    with tf.Session() as sess:
        sess.run(tf.global_variables_initializer())
        for epoch in range(0, epo):
            f_dict = {x: train_x, y: train_y}

            _, cost_, predict_ = sess.run([train_op, cost, pred],
                                          feed_dict=f_dict)

            auc = roc_auc_score(train_y, predict_)
            time_t = time.time()
            if epoch % 100 == 0:
                f_dict = {x: test_x, y: test_y}
                _, cost_, predict_test = sess.run([train_op, cost, pred],
                                                  feed_dict=f_dict)
                test_auc = roc_auc_score(test_y, predict_test)
                print("%d %ld cost:%f, train_auc:%f, test_auc:%f" %
                      (epoch, (time_t - time_s), cost_, auc, test_auc))
                result.append([epoch, (time_t - time_s), auc, test_auc])

    pd.DataFrame(result,
                 columns=['epoch', 'time', 'train_auc',
                          'test_auc']).to_csv("data/mlr_" + str(m) + '.csv')

Пример #7

0

Показать файл

def extract_segel():
    segeldata, stamp = getdata(
        "/ImageVaultFiles/id_21795/cf_78/Sektorer-2013-CU-rev1.TXT", "segel")
    return list(getareas(segeldata, stamp))

Python getdata примеры использования