Exemplo n.º 1
0
def create_relationship(
        old_dict_list):  # recebe a lista de dicionarios de t_atividade
    """
    Esta função tem o objetivo de montar um relacionamento entre 't_atividade' e 'custeio'.
    Neste relacionamento deve haver o ID de ambas as partes assim como o valor referente ao relacionamento
    :param old_dict_list:
    :return:
    """

    custeio_dict_list = readcsv(
        "csv_test/custeio.csv")  # lista de dicionario do custeio

    relationship_dict_list = [
    ]  # lista de dicionarios que formará o .csv do relacionamento

    for atividade_dict in old_dict_list:  # Itera sobre cada dicionario da lista de atividades
        #DEBUG
        #print(atividade_dict['nome'])

        for custeio_dict in custeio_dict_list:  # Itera sobre cada dicionario da lista de custeios
            """
            Plano de implementação:
            1. Estabelecer relacionamento entre todos os elementos de t_atividade e custeio ( X )
            2. Eliminar os relacionamentos que não possuam valor ( X )
            3. Tranformar os dados restantes em uma lista de dicionário para serem inseridos no .csv (  )
                3.1. Nomear as celulas da primeira linha do .csv com os cabeçalhos (  ) 
            """

            relationship_dict = {}

            # custeio_dict['id_custeio'] + "-" + custeio_dict['nome'] =
            # 'id_custeio-nome_custeio' que forma uma chave para o dicionario
            # Assim podemos recuperar o valor do custeio relcaionado a atividade

            # Se não houver um valor relacionado a uma relação entre t_atividade e custeio esta relação não deve aparecer na tabela
            # lembrando que a chave referente a cada custeio na planilha t_atividade é formada pelo id_custeio + nome_custeio
            if atividade_dict[custeio_dict['id_custeio'] + "-" +
                              custeio_dict['nome']] != "":

                relationship_dict['id_atividade'] = atividade_dict[
                    'id_atividade']
                relationship_dict['id_custeio'] = custeio_dict['id_custeio']
                relationship_dict['valor'] = atividade_dict[
                    custeio_dict['id_custeio'] + "-" + custeio_dict['nome']]

                relationship_dict_list.append(relationship_dict)
                #print(relationship_dict)

                #DEBUG
                #print("ID Atividade: " + atividade_dict['id_atividade'] + " -> " +
                #      "ID Custeio: " + custeio_dict['id_custeio'] + " " +
                #      custeio_dict['nome'] + ": " +
                #      atividade_dict[ custeio_dict['id_custeio'] + "-" + custeio_dict['nome']])
        #print("---------------------------------------------------------------------")

    #for dicio in old_dict_list:
    #    print("A atividade ID " + dicio['id_atividade'] +
    #          " '" + dicio['nome'] + "' esta relacionada ao custeio DE VALOR:" + dicio['11-passagens'])
    return relationship_dict_list
Exemplo n.º 2
0
def test_readcsv():
    from readcsv import readcsv
    raw_data = readcsv('test_readcsv.csv')

    assert raw_data == {'time': [1, 7, 1.2], 'voltage': [2, 9, 4.5]}
    assert raw_data != {
        'time': [1, '', 7, 'c', 1.2],
        'voltage': [2, 6, 9, 10, 4.5]
    }
Exemplo n.º 3
0
def allcountries():
    """
    Cette fonction permet de réduire la liste des pays en supprimant
    les doublons avec set() et de les classer
    par ordre croissant avec sorted().
    On retrouve chaque pays du fichier dans une liste.

    This function allows you to reduce the list of countries by deleting
    the duplicates with set() and to sort them
    in ascending order with sorted().
    Each country in the file is found in a list.
    """
    logging.debug("Utilisation de la fonction allcountries()")
    df = readcsv('Region')
    countries = list(sorted(set(df['Region'].to_list())))
    logging.debug(countries)
    return countries
Exemplo n.º 4
0
def allyears():
    """
    Cette fonction permet de réduire la liste des années en supprimant
    les doublons avec set() et de les classer
    par ordre croissant avec sorted().
    On retrouve chaque année du fichier dans une liste.

    This function allows you to reduce the list of years by deleting
    the duplicates with set() and to sort them
    in ascending order with sorted().
    Each year of the file can be found in a list.
    """
    logging.debug("Utilisation de la fonction allyears()")
    df = readcsv('Year', 'Region')
    years = list(sorted(set(df['Year'].to_list())))
    logging.debug(years)
    return years
Exemplo n.º 5
0
def showphoto(path,
              starttime,
              phototimefile=r'D:\mitacs\image\photo\phototime.csv'):
    photodata = readcsv(phototimefile)
    phototime = []
    for i in range(0, len(photodata)):
        phototime.append(
            datetime.datetime.strptime(photodata[i][1], '%Y-%m-%d %H:%M:%S'))
        print(phototime[i])
        if phototime[i] > starttime:
            if (phototime[i] - starttime) > (starttime - phototime[i - 1]):
                img = Image.open(photodata[i - 1][0])
                print(photodata[i - 1][0])
            else:
                img = Image.open(photodata[i][0])
                print(photodata[i][0])
            img.show()
def readctg(ctgcsv, excludeitself = False):
    ## include itself
    r = readcsv.readcsv(ctgcsv, 'int')
    ctg = r.getData()
    id = np.unique(ctg[:,0])
    ctgdic = {}

    for item in id:
        ctgdic[item] = []
        
    if excludeitself:
        for item in ctg:
            if item[0] <> item[1]:
                ctgdic[item[0]].append(item[1])
    else:
        for item in ctg:
            ctgdic[item[0]].append(item[1])
            
    return ctgdic
Exemplo n.º 7
0
def byyear(year):
    """
    Cette fonction permet, en fonction de l'année choisi, de récupérer
    la moyenne des émission totales de CO2 (en Milliers de tonnes)
    au niveau mondial.

    This function allows, depending on the chosen year, to retrieve
    average total CO2 emissions (in Thousands of tons)
    worldwide.
    """
    logging.debug(f"Utilisation de la fonction byyear({year})")
    globalemission = ["Emissions (thousand metric tons of carbon dioxide)"]
    df = readcsv('Year', 'Value', 'Emission', 'Region')
    df = df.loc[df['Year'].isin([str(year)])]
    df = df.loc[df['Emission'].isin(globalemission)]
    res = {}
    res["Year"] = year
    res["Total"] = round(df['Value'].mean(), 3)
    logging.debug(res)
    return res
Exemplo n.º 8
0
def bycountry(country):
    """
    Cette fonction permet, en fonction du pays choisi, de récupérer
    l'entrée la plus récente concernant l'émission totale de CO2
    en Milliers de tonnes.

    This function allows, depending on the chosen country, to retrieve
    the most recent entry for total CO2 emissions
    in Thousands of tons.
    """
    logging.debug(f"Utilisation de la fonction bycountry({country.title()})")
    df = readcsv('Region', 'Year', 'Value')
    df = df.loc[df['Region'].isin([country])]
    df = df.sort_values(by='Year', ascending=False)
    res = {}
    res["Country"] = str(df.iloc[0][0]).title()
    res["Year"] = int(df.iloc[0][1])
    res["Emissions"] = float(df.iloc[0][2])
    logging.debug(res)
    return res
Exemplo n.º 9
0
def bypercapita(country):
    """
    Cette fonction permet, en fonction du pays choisi,
    d'afficher les émissions de CO2 (en tonnes par habitant)
    par rapport aux différentes années de relevés.

    This function allows, depending on the chosen country,
    display CO2 emissions (in tons per capita)
    in relation to the different survey years.
    """
    logging.debug(f"Utilisation de la fonction bypercapita({country.title()})")
    capita = ["Emissions per capita (metric tons of carbon dioxide)"]
    df = readcsv('Region', 'Year', 'Emission', 'Value')
    df = df.loc[df['Region'].isin([country])]
    df = df.loc[df['Emission'].isin(capita)]
    res = {}
    nbannee = len(allyears())
    i = 0
    while i < nbannee:
        res[int(df.iloc[i][1])] = float(df.iloc[i][3])
        i += 1
    logging.debug(res)
    return res
Exemplo n.º 10
0
def test_readcsv(capsys):
    dirpath = Path("C:/code/cohort4/python-IO")
    filename = "Census_by_Community_2019.csv"
    hdr = "************************************************************\n"
    hdr += "**************  Calgary Public Data Summary  ***************\n"
    hdr += "************************************************************\n"
    content1 = "Residential and SOUTH: 230129"
    content2 = "Industrial and NORTH: 0"
    content3 = "Overall Total: 1283177"
    content4 = "Total number of records: 307"
    ftr = "************************************************************\n"
    ftr += "***************  End of Report Summary  ********************\n"
    ftr += "************************************************************\n"

    result = readcsv(dirpath, filename)
    captured = capsys.readouterr()
    reportdata = readfile(dirpath, "report.txt")

    assert result == {
        "keyvaluepair": {
            "RES_CNT": 10,
            "CLASS": 1,
            "SECTOR": 5
        },
        "linenum": 308
    }
    assert hdr in captured.out
    assert ftr in captured.out
    assert content1 in captured.out
    assert content2 in captured.out
    assert content3 in captured.out
    assert content4 in captured.out
    assert os.path.isfile(os.path.join(dirpath, "report.txt")) == True
    assert content1 in reportdata
    assert content2 in reportdata
    assert content3 in reportdata
    assert content4 in reportdata
Exemplo n.º 11
0
def drowFathers(slicelabel):
    label_data = readcsv(r'D:\mitacs\audio classification\label.csv')
    branch = []
    for z in range(1, len(slicelabel), 2):
        drowFather2child(label_data, slicelabel[z], branch)
    print(branch)
    x = 0
    while x < len(branch):
        y = x + 2
        while y < len(branch):
            if branch[x] == branch[y] and branch[x + 1] == branch[y + 1]:
                branch.pop(y + 1)
                branch.pop(y)
            y = y + 2
        x = x + 2
    print(branch)
    branch2 = list(map(lambda x: x.replace(' ', '\n'), branch))
    print(branch2)
    slicelabel = list(map(lambda x: x.replace(' ', '\n'), slicelabel))
    g = Digraph('测试图片', format="png")
    for x in range(1, len(slicelabel), 2):
        print(slicelabel[x])
        color = round(8 * float(slicelabel[x + 1]), 3)
        print(color)
        g.node(slicelabel[x],
               color='0.000 %f %f' % (color, color),
               shape='circle',
               width='2',
               height='2',
               penwidth='%f' % color)
    for i in range(0, len(branch) - 1, 2):
        g.node(branch2[i], shape='circle', width='2', height='2')
        g.node(branch2[i + 1], shape='circle', width='2', height='2')
        g.edge(branch2[i + 1], branch2[i])
        # i = i+2
    #print(g.source)
    g.view()
Exemplo n.º 12
0
    The calculated values are compiled into a dictionary called metrics and
    written into a JSON file under the same name as the input in the folder
    output_data. Once the file is properly written to the json file, a new
    message is entered into the log.

    """
    logging.basicConfig(filename="Main_Log.txt",
                        format='%(asctime)s %(message)s',
                        datefmt='%m/%d/%Y %I:%M:%S %p')
    Successful_Read = False

    while Successful_Read is False:
        filename = input("Enter filename: ")

        try:
            raw_data = readcsv('test_data/' + filename + '.csv')
        except FileNotFoundError:
            print("File not found, enter new filename")
            continue

        try:
            validate_data(raw_data)
        except DiffListLengthError:
            print("File contains lists of different lengths, "
                  "enter new filename")
            continue
        except NegTimeValueError:
            print("File contains negative time values, " "enter new filename")
            continue
        logging.warning("Info: " + filename + ".csv was read and validated")
        Successful_Read = True
Exemplo n.º 13
0
print data

datadict = {}
for item in data:
    datadict[item[0]] = int(item[1])

print datadict

for key in sorted(datadict):
    #print "%s: %s" % (key, datadict[key])
    print key, '\t', datadict[key]
"""

tractcsv = "C:/_DATA/CancerData/NewEnglandDiseaseData/tract/tractinfo_intonly.csv"
countycsv = "C:/_DATA/CancerData/NewEnglandDiseaseData/tract/countyinfo_intonly.csv"
r = readcsv.readcsv(tractcsv)
tractdata = r.getData()

r = readcsv.readcsv(countycsv)
countydata = r.getData()

countydict = {}
scale = []
for i in xrange(len(countydata)):
    countydict[int(countydata[i, 4])] = i
    scale.append(1.0 * countydata[i, 5] / countydata[i, 6])

print countydict
# tractpop = np.zeros(len(countydata))
for i in xrange(len(tractdata)):
    # print tractdata
Exemplo n.º 14
0
import sys
import os
sys.path.append(os.path.join(os.path.dirname(__file__), '..', 'common'))

import numpy as np
import readcsv
from scipy import stats

#--------------------------------------------------------------------------
#MAIN

if __name__ == "__main__":

    filePath = 'C:/_DATA/CancerData/SatScan/mult6000/three16_modify/'
    sigmacsv = filePath + 'sigma_summary.csv'
    r = readcsv.readcsv(sigmacsv)
    data = r.getData()
    diff = data[:,0] - data[:,1]
    print 't-statistic = %6.3f, pvalue = %6.4f' % stats.ttest_1samp(diff,0.0)
Exemplo n.º 15
0
import csv
from readcsv import readcsv

CSV_FILE_PATH = "csv_test/atividade.csv"
ATIVIDADE_DICT_LIST = readcsv(
    CSV_FILE_PATH
)  # Retorna a lista de dicionário gerado a partir de atividade.csv


def show_dicts(dict_list, showkeys=False):
    """
    Função auxiliar que mostra o estado da lista de dicionários
    passada como parâmentro. Esta função tem o objetivo apenas
    de facilitar o debug
    :param dict_list:
    :return:
    """
    for dicio in dict_list:
        print(dicio)

    if showkeys:
        print("-------------------------------------------------")
        print("KEYS LIST")
        print("[", end='')
        for key in dict_list[0].keys():
            print(key, end=' ')
        print("]")
        print("-------------------------------------------------")


def create_related_csv(filedirectory, dict_list):  #['id_custeio', 'nome']
Exemplo n.º 16
0
# -*- coding: UTF-8 -*-
import urllib2
from datetime import datetime
import sys
from readcsv import readcsv
from bs4 import BeautifulSoup

reload(sys)
sys.setdefaultencoding('UTF-8')

stock_id_dict = readcsv()

def examine_ipodate(stocks_array):
    # stocks_array = stocks.split("\n")
    stocks_to_remove_array = []
    print stocks_array

    for x in stocks_array:
        if x.startswith("00"): # delete ETF
            print "ETF: "+ x
            stocks_array.remove(x)
        if int(x)>=10000: # delete 可轉換公司債
            stocks_to_remove_array.append(str(x))

    for x in stocks_to_remove_array:
        stocks_array.remove(x)

    stock_examined_id = []
    stock_examined_shortname = []
    stock_failed_id = []
    stock_failed_shortname = []
    
    #plotshapefile.plotshapefile('C:/_DATA/CancerData/SatScan/NorthEasternUS.shp', plotvalue, 'Set3', 'maxllr_data')
    #debug---------------/
    
    return maxllr

#--------------------------------------------------------------------------
#MAIN

if __name__ == "__main__":
    start_time = datetime.now()
    print "begin at " + str(start_time)
    
    filePath = 'C:/_DATA/CancerData/NewEnglandDiseaseData/'
    randomdatacsv = filePath + 'format/Random.csv'
    r = readcsv.readcsv(randomdatacsv, 'int')
    randomdata = r.getData()
    pop = randomdata[:,1]
    popsum = sum(pop)
    
    repeatTime = len(randomdata[0,:])-2
    repeatTime = 1
    numUnit = len(pop)
    maxLLRlist = []
    
    llr = likelihoodratio.likelihoodratio(sum(randomdata[:,2]), popsum)
    printstep = 100
    for repeat in xrange(repeatTime):
        if repeat%printstep == 0:
            print repeat, str(datetime.now())
        repeat = 2
Exemplo n.º 18
0
Arquivo: kNN.py Projeto: TechBK/kNN
    tap_nb = deepcopy(traintable)
    for x in tap_nb:
        #if hamkhoangcach(x,z,w,colnum-1) < d: #truyen vao colnum da~ xac dinh
        d = hamkhoangcach(x,z,w,colnum-1)
        x.append(d)
        print("phan tu x trong nb",x)
        print("khoang cach: ",d)
        #tap_nb.append(y)
    tap_nb.sort(key=lambda y: y[-1]) #y[-1] lay phan tu cuoi cung
    tap_nbk = tap_nb[0:k] #lay k phan tu dau tien
    return tap_nbk


if __name__ == '__main__':
    w = [1,1,1,1,1,1,1,1]
    train = readcsv('prostate-training-data.csv',1)
    print("file train: ",train)
    test = readcsv('prostate-test-Vi-Du.csv',0)
    #print("row cua test:", len(test[2]))
    print("file test:",test)

    colnum = train[1]

    #tim max chuan hoa
    print("colnum :",colnum )
    print("max cua train: ", maxcol(colnum, train[2]))
    print("max cua test: ", maxcol(colnum, test[2]))


    maxchuanhoa = maxall(colnum,test[2],train[2])
    print("max chuan hoa",maxchuanhoa)
    if stopflag:
        return templist
    else:
        return checkContiguous(ctg, newRegionDict, regiondict)


# --------------------------------------------------------------------------
# MAIN

if __name__ == "__main__":
    start_time = datetime.now()
    print "begin at " + str(start_time)
    filePath = "C:/_DATA/CancerData/NewEnglandDiseaseData/"

    datacsv = "C:/_DATA/CancerData/NewEnglandDiseaseData/format/simundernull9999.csv"
    r = readcsv.readcsv(datacsv)
    data = r.getData()
    pop = data[:, 1]

    ctgcsv = "C:/_DATA/CancerData/SatScan/NortheeaternUS_re_contiguity.ctg"
    ctgdic = readctg(ctgcsv)
    ## read image via matplotlib
    # im = mpimg.imread(imagefile)
    # num_row, num_col = im.shape
    # num_row, num_col = int(num_row), int(num_col)
    # im.shape = (-1,1)
    # imagedata = im[:,0].tolist()

    ## global variable
    alpha = 1
    beta = 0.1
Exemplo n.º 20
0
import config
import readcsv
import datacleaning
import featurescaling
import algorithm
import predict
import writecsv
import numpy

data = readcsv.readcsv(config.trainingset)
config.data = data
config.row_ = data.shape[0]
config.col_ = data.shape[1]

datacleaning.cabin(config.data, 10)  # Problem specific data cleaning
datacleaning.gender(config.data, 3)  # Problem specific data cleaning
feature = config.allowed_feature
config.cdata = datacleaning.reduce_features(config.row_, len(feature), feature, config.data)
config.cdata = datacleaning.polynomial_feature_cubic(config.cdata)
crow_ = (config.cdata).shape[0]
ccol_ = (config.cdata).shape[1]

featurescaling.featurescaling(config.cdata)

feature = config.outputlabel
y = datacleaning.get_output_label(config.row_, feature, config.data)
config.y = y
theta = numpy.zeros((ccol_, 1))

print "training....."
Exemplo n.º 21
0
parser.add_argument('--csvcolumnseparator', default=',')
parser.add_argument('--csvemptycell', default='')
parser.add_argument('--csvcolorformat', default='html')
parser.add_argument('--nullcolor', default=__NULL_COLOR)
parser.add_argument('--topdepth', '-t', type=float, default=np.nan)
parser.add_argument('--bottomdepth', '-b', type=float, default=np.nan)
parser.add_argument('--topshift', type=float, default=0.0)
parser.add_argument('--bottomshift', type=float, default=0.0)
parser.add_argument('--width', type=int)
parser.add_argument('--height', type=int)
parser.add_argument('--outputfilename', '-o')

args = parser.parse_args()

layersheader, layersdata = readcsv.readcsv(args.layersfilename,
                                           delimiter=args.csvcolumnseparator,
                                           headerlines=args.layersskiplines,
                                           onlystr=True)
layercodes = layersdata[args.layerscodecolumn - 1]
tops = layersdata[args.layerstopcolumn - 1]
bottoms = layersdata[args.layersbottomcolumn - 1]

if tops[0] == args.csvemptycell:
    tops[0] = np.nan
if bottoms[-1] == args.csvemptycell:
    bottoms[-1] = np.nan

tops = np.array(list(map(float, tops)))
bottoms = np.array(list(map(float, bottoms)))

if args.topdepth is not np.nan and args.bottomdepth is not np.nan:
    n = args.height
    Duczmal_d = [95,99,109,110,113,117,129,142,148,150,159,160,164,180,182]
    Duczmal_e = [84,95,99,102,109,110,113,116,117,129,142,148,150,159,160,164,180,182,232,233,236]
    #Duczmal_f = []
    Duczmal_j = [61,76,78,89,94,98,106,111,119,121,123,131,136,151,154,157,158,160,161,162,163,164,165,167,168,169,\
                 170,171,172,173,174,175,178,179,180,181,182,183,184,185,227,228,230,231,232,233,234,235,236,237,238,239,240,241,242]
    Duczmal_k = Duczmal_j + [84,88,90,91,92,93,95,96,99,102,114,118,135,139,140,144,145,146,147,148,150,153,156,159]
    Duczmal_k.remove(169)

    
    clustername = ['6mixed16','6rural16','6urban16','Duczmal_a','Duczmal_b',\
                   'Duczmal_c','Duczmal_d','Duczmal_e','Duczmal_j','Duczmal_k']#'Duczmal_f','Duczmal_g','Duczmal_h',\
    ##'Duczmal_i','Duczmal_j']
    clusterIDlist = [mixed, rural, urban, Duczmal_a, Duczmal_b, Duczmal_c, Duczmal_d, Duczmal_e, Duczmal_j, Duczmal_k]

    tractcsv = 'C:/_DATA/CancerData/NewEnglandDiseaseData/tract/tractinfo_intonly.csv'
    r = readcsv.readcsv(tractcsv)
    data = r.getData()

    
    
    for clusteridx in xrange(len(clustername)):
        clustertype = clustername[clusteridx]
        cluster = clusterIDlist[clusteridx]
        cluster_tract = []
        for i in xrange(len(data[:,-2])):
            if int(data[i,-2]) in cluster:
                cluster_tract.append(i)
        print clustertype, cluster_tract
        
    print "end at " + str(finish_time)
    print 'Time for whole procedure: ' + str(finish_time - start_time)
Exemplo n.º 23
0
print datadict

for key in sorted(datadict):
    #print "%s: %s" % (key, datadict[key])
    print key, '\t', datadict[key]
'''

def weightedchoice(cumsumhere, totalsum):
    y = random.randint(totalsum)
    

filepath = 'C:/_DATA/CancerData/NewEnglandDiseaseData/'
tractcsv = filepath + 'tract/tractinfo_intonly.csv'
countycsv = filepath + 'tract/countyinfo_intonly.csv'
r = readcsv.readcsv(tractcsv)
tractdata = r.getData()

r = readcsv.readcsv(countycsv)
countydata = r.getData()

#countydict = {}
countytractid = []
countytractpop = []
for i in xrange(len(countydata)):
    #countydict[int(countydata[i,4])] = countydata[i,0]
    countytractid.append([])
    countytractpop.append([])

for i in xrange(len(tractdata)):
    #id = countydict[int(tractdata[i,6])]
    Duczmal_d = [95,99,109,110,113,117,129,142,148,150,159,160,164,180,182]
    Duczmal_e = [84,95,99,102,109,110,113,116,117,129,142,148,150,159,160,164,180,182,232,233,236]
    #Duczmal_f = []
    Duczmal_j = [61,76,78,89,94,98,106,111,119,121,123,131,136,151,154,157,158,160,161,162,163,164,165,167,168,169,\
                 170,171,172,173,174,175,178,179,180,181,182,183,184,185,227,228,230,231,232,233,234,235,236,237,238,239,240,241,242]
    Duczmal_k = Duczmal_j + [84,88,90,91,92,93,95,96,99,102,114,118,135,139,140,144,145,146,147,148,150,153,156,159]
    Duczmal_k.remove(169)

    
    clustername = ['6mixed16','6rural16','6urban16','Duczmal_a','Duczmal_b',\
                   'Duczmal_c','Duczmal_d','Duczmal_e','Duczmal_j','Duczmal_k']#'Duczmal_f','Duczmal_g','Duczmal_h',\
    ##'Duczmal_i','Duczmal_j']
    clusterIDlist = [mixed, rural, urban, Duczmal_a, Duczmal_b, Duczmal_c, Duczmal_d, Duczmal_e, Duczmal_j, Duczmal_k]

    popcsv = 'C:/_DATA/CancerData/NewEnglandDiseaseData/format/pop.csv'
    r = readcsv.readcsv(popcsv)
    pop = r.getData()
    pop = pop[:,1]
    sumpop = sum(pop)

    power = [] # the p-value of one unit in the true cluster is less than 0.05
    power2 = [] # as long as the p-value of one unit is less than 0.05 (no matter if it belongs to true cluster)
    ppv = []
    sensitivity = []
    misclassification = []
    
    for clusteridx in xrange(len(clustername)):
    #for clusteridx in xrange(1):
        #clusteridx = 3
        clustertype = clustername[clusteridx]
        cluster = clusterIDlist[clusteridx]
Exemplo n.º 25
0
                    self.power0[pid] = 1
                    self.power1[pid] += 1

        return self.power0, self.power1
#--------------------------------------------------------------------------
#MAIN

if __name__ == "__main__":

    # modified 
    hot_16 = [9, 10, 12, 13, 14, 17, 20, 23, 26, 27, 28, 33, 34, 35, 38, 41,
              95, 99, 109, 110, 114, 115, 117, 119, 125, 126, 127, 128, 130,
              131, 133, 134, 136, 137, 138, 139, 140, 141, 142, 143, 146, 147,
              149, 151, 152, 156, 157, 237 ]

    rural = set([9,10,12,13,14,17,20,23,26,27,28,33,34,35,38,41])
    mixed = set([95,99,109,110,114,115,117,119,126,131,139,140,142,146,147,237])
    urban = set([125,127,128,130,133,134,136,137,138,141,143,149,151,152,156,157])
    hot_16_set = [rural, mixed, urban]
    
    #print hot_16
    filePath = 'C:/_DATA/CancerData/SatScan/mult6000/three16_modify/LLR/'
    pvaluecsv = filePath + 'LLR_EBS_high_0_pvalue.csv'
    r = readcsv.readcsv(pvaluecsv)
    #print r
    data = r.getData()
    pvalue = data[:,-1]

    s = SSS_Power(range(len(pvalue)), hot_16_set)
    print s.getMeasure(pvalue)
    #print 'sigma =', s.getMeasure(pvalue)
Exemplo n.º 26
0
import json
import simplejson
import sys, os
sys.path.append(os.path.join(os.path.dirname(sys.argv[0]), '..', 'common'))
import readcsv

def putjson(data, filename):
    jsondata = simplejson.dumps(data, separators=(',', ':'))
    fd = open(filename, 'w')
    fd.write(jsondata)
    fd.close()

csvfile = 'C:/Users/Hu/Downloads/temp/cnty_pts_lnglat.csv'
r = readcsv.readcsv(csvfile, 'float')
cntypts = r.getData()
cntypts_object = []
for f in cntypts:
    t = {}
    t['id'] = int(f[0])
    t['lng'] = f[1]
    t['lat'] = f[2]
    cntypts_object.append(t)
putjson(cntypts_object, 'C:/_DATA/migration_census_2000/cnty/cnty_pts_lnglat.json')
Exemplo n.º 27
0


#--------------------------------------------------------------------------
#MAIN

if __name__ == "__main__":
    start_time = datetime.now()
    print "begin at " + str(start_time)    

    #filePath = 'C:/_DATA/CancerData/SatScan/mult6000/three16_modify/LLR_buildonly/'
    filePath = 'C:/_DATA/CancerData/SatScan/mult6000/three16/LLR_buildonly/'

    datacsv = filePath + 'minpvalue_Unsmoothed_corrected.csv'
    #datacsv = filePath + 'minpvalue_corrected.csv'
    r = readcsv.readcsv(datacsv, 'float')
    data = r.getData()

    data = data * 10

    repeatTime = len(data[0,:])
    header = ''
    fileLoc = datacsv[:-4] + '_2.csv'
    for i in xrange(repeatTime):
        header += 'data' + str(i) + ','
    np.savetxt(fileLoc, data, delimiter=',', comments='', header = header[:-1], fmt = '%s')

    finish_time = datetime.now()
    print "end at " + str(finish_time)
    print 'Time for whole procedure: ' + str(finish_time - start_time)
    print "========================================================================"  
import simplejson
import sys, os
sys.path.append(os.path.join(os.path.dirname(sys.argv[0]), '..', 'common'))
import readcsv
import numpy as np

def putjson(data, filename):
    jsondata = simplejson.dumps(data, separators=(',', ':'))
    fd = open(filename, 'w')
    fd.write(jsondata)
    fd.close()

#csvfile = 'C:/_DATA/migration_census_2000/cnty/sig_flowtable_above65_allm.csv'
#csvfile = 'C:/_DATA/migration_census_2000/cnty/sig_flowtable_allage_allm.csv'
csvfile = 'C:/_DATA/migration_census_2009/cnty/sig_flowtable_allage_1m.csv'
r = readcsv.readcsv(csvfile, 'int')
flowdata = r.getData() # oid, did, volume

#csvfile = 'C:/_DATA/migration_census_2000/cnty/cnty_pts_lnglat.csv'
#txtfile = 'C:/_DATA/migration_census_2000/cnty/cnty_pts_lnglat.txt'
txtfile = 'C:/_DATA/migration_census_2009/cnty/cnty_pts_lnglat.txt'
#r = readcsv.readcsv(csvfile, 'float')
#cntypnts = r.getData() # id, x, y
cntypnts = [line.strip() for line in open(txtfile)]

output = '{"type":"FeatureCollection","features":[\n'
i = 0
flowdata = flowdata[np.argsort(flowdata[:,2]),:]
for f in flowdata:
    output += '{"type":"Feature","id":"' + str(i) +'","properties":{"volume":' + str(f[2])
    output += '},"geometry":{"type":"LineString","coordinates":[['
    print '75Q:', temp_3Q

    return [temp_mean, temp_1Q, temp_median, temp_3Q]


#--------------------------------------------------------------------------
#MAIN

if __name__ == "__main__":
    start_time = datetime.now()
    print "begin at " + str(start_time)  

    filePath = 'C:/_DATA/CancerData/SatScan/mult6000/'
    originaldatacsv = filePath + 'TPFP_original_summary.csv'
    # satscan_rate	satscan_rate_error	satscan_elliptic_rate	satscan_elliptic_rate_error	LLR_rate	LLR_rate_error
    r = readcsv.readcsv(originaldatacsv)
    originaldata = r.getData()
    
    modifieddatacsv = filePath + 'TPFP_modified_summary.csv'
    # satscan_rate	satscan_rate_error	satscan_elliptic_rate	satscan_elliptic_rate_error	LLR_rate	LLR_rate_error
    r = readcsv.readcsv(modifieddatacsv)
    modifieddata = r.getData()

    interval = np.arange(0,1,0.001)
    #np.savetxt(fileLoc, zip(np.mean(intervalvalue,0),np.std(intervalvalue,0)), delimiter=',', comments='', header = 'rate,error', fmt = '%s')

    fig = plt.figure()
    ax = fig.add_subplot(111)
    ax1 = fig.add_subplot(121)
    ax2 = fig.add_subplot(122)
Exemplo n.º 30
0
Arquivo: maxcol.py Projeto: TechBK/kNN
def column(col,table):
    #for row in range(0,rownum):
    #print(len(table))
    for row in range(0,len(table)):
        #print(row)
        yield math.fabs(table[row][col])

def maxcolumn(colnum,table):
    for col in range(0,colnum-1):
        #print(col)
        yield max([x for x in column(col,table)])

def maxcol(colnum,table):
    return [x for x in maxcolumn(colnum,table)]

def maxall(colnum,test,train):
    testmax = maxcol(colnum,test)
    trainmax = maxcol(colnum,train)
    _maxall = []
    for col in range(0,colnum-1):
        _maxall.append(max(testmax[col],trainmax[col]))
    return _maxall


if __name__ == '__main__':
    rownum,colnum,table = readcsv(file='prostate-training-data.csv')
    #print(x for x in collumn(rownum,colnum,table))
    print(maxcol(rownum,colnum,table))

Exemplo n.º 31
0
 def test_readcsv(self):
     # on test si la sortie de la fonction test_readcsv est de type class
     self.assertIsInstance(type(readcsv('Region')), type)
     # on test que la sortie est != de None
     self.assertIsNotNone(readcsv('Region'))
    urban = [105,107,112,120,122,125,127,128,130,133,134,141,143,149,152,155]
    hot_16 = mixed + rural + urban

    '''
    ## modified risk area
    hot_16 = [9, 10, 12, 13, 14, 17, 20, 23, 26, 27, 28, 33, 34, 35, 38, 41,
              95, 99, 109, 110, 114, 115, 117, 119, 125, 126, 127, 128, 130,
              131, 133, 134, 136, 137, 138, 139, 140, 141, 142, 143, 146, 147,
              149, 151, 152, 156, 157, 237 ]
    
    #filePath = 'C:/_DATA/CancerData/SatScan/mult6000/three16/'
    filePath = 'C:/_DATA/CancerData/SatScan/mult6000/three16_modify/'

    datacsv = 'C:/_DATA/CancerData/SatScan/mult6000/three16_modify/three16_format_modify.csv'
    #datacsv = 'C:/_DATA/CancerData/SatScan/mult6000/three16_format.csv'
    r = readcsv.readcsv(datacsv)
    data = r.getData()
    pop = data[:,1]

    datacsv = 'C:/_DATA/CancerData/SatScan/mult6000/three16_modify/three16_lebs_rate_newCTG.csv'
    r = readcsv.readcsv(datacsv)
    smoothdata = r.getData()

    ctgcsv = 'C:/_DATA/CancerData/SatScan/NortheeaternUS_re_contiguity.ctg'
    ctgdic = readctg(ctgcsv)
    ## read image via matplotlib
    #im = mpimg.imread(imagefile)
    #num_row, num_col = im.shape
    #num_row, num_col = int(num_row), int(num_col)
    #im.shape = (-1,1)
    #imagedata = im[:,0].tolist()
    Duczmal_k.remove(169)

    clusteridx = 9
    clusterlist = ['6mixed16','6rural16','6urban16','Duczmal_a','Duczmal_b',\
                   'Duczmal_c','Duczmal_d','Duczmal_e','Duczmal_j','Duczmal_k']#'Duczmal_f','Duczmal_g','Duczmal_h',\
    ##'Duczmal_i','Duczmal_j']
    clusterIDlist = [mixed, rural, urban, Duczmal_a, Duczmal_b, Duczmal_c, Duczmal_d, Duczmal_e, Duczmal_j, Duczmal_k]
    clustertype = clusterlist[clusteridx]
    cluster = clusterIDlist[clusteridx]

    print 'Cluster type: ', clustertype
    
    # 
    filePath = 'C:/_DATA/CancerData/NewEnglandDiseaseData/'
    datacsv = filePath + 'format/' + clustertype + '.csv'
    r = readcsv.readcsv(datacsv, 'int')
    #print randomdatacsv
    data = r.getData()
    pop = data[:,1]

    randomLLRcsv = filePath + 'LLR_buildonly/Random/maxllr.csv'
    r = readcsv.readcsv(randomLLRcsv)
    randomLLR = r.getData()
    randomLLR = randomLLR[:,0]
    randomCount = len(randomLLR) + 1
    
    repeatTime = len(data[0,:])-2
    #repeatTime = 100
    numUnit = len(pop)
    
    maxPvaluelist = []
    Duczmal_d = [95,99,109,110,113,117,129,142,148,150,159,160,164,180,182]
    Duczmal_e = [84,95,99,102,109,110,113,116,117,129,142,148,150,159,160,164,180,182,232,233,236]
    #Duczmal_f = []
    Duczmal_j = [61,76,78,89,94,98,106,111,119,121,123,131,136,151,154,157,158,160,161,162,163,164,165,167,168,169,\
                 170,171,172,173,174,175,178,179,180,181,182,183,184,185,227,228,230,231,232,233,234,235,236,237,238,239,240,241,242]
    Duczmal_k = Duczmal_j + [84,88,90,91,92,93,95,96,99,102,114,118,135,139,140,144,145,146,147,148,150,153,156,159]
    Duczmal_k.remove(169)

    
    clustername = ['6mixed16','6rural16','6urban16','Duczmal_a','Duczmal_b',\
                   'Duczmal_c','Duczmal_d','Duczmal_e','Duczmal_j','Duczmal_k']#'Duczmal_f','Duczmal_g','Duczmal_h',\
    ##'Duczmal_i','Duczmal_j']
    clusterIDlist = [mixed, rural, urban, Duczmal_a, Duczmal_b, Duczmal_c, Duczmal_d, Duczmal_e, Duczmal_j, Duczmal_k]

    popcsv = 'C:/_DATA/CancerData/NewEnglandDiseaseData/format/pop.csv'
    r = readcsv.readcsv(popcsv)
    pop = r.getData()
    pop = pop[:,1]
    sumpop = sum(pop)

    power = [] # the p-value of one unit in the true cluster is less than 0.05
    power2 = [] # as long as the p-value of one unit is less than 0.05 (no matter if it belongs to true cluster)
    ppv = []
    sensitivity = []
    misclassification = []
    
    for clusteridx in xrange(len(clustername)):
    #for clusteridx in xrange(1):
        #clusteridx = 9
        clustertype = clustername[clusteridx]
        cluster = clusterIDlist[clusteridx]
Exemplo n.º 35
0
import readcsv
import datetime
import pandas as pd
import calculate

CSV_NAME = '平衡型基金_NAV_Return.csv'
test_list_of_fund = ['21719942', '42334401A', '42334401B']
test_ratio_of_fund = [0.2, 0.5, 0.3]
CD_NUMBER = 0.6 / 52  #better to depend on real number, not a stable one

if __name__ == '__main__':
    # start = datetime.datetime.strptime("2016/5/6","%Y/%m/%d")
    # end = datetime.datetime.strptime("2019/4/26","%Y/%m/%d")

    start = datetime.datetime.strptime("2016/5/6", "%Y/%m/%d")
    end = datetime.datetime.strptime("2016/12/6", "%Y/%m/%d")

    df = readcsv.readcsv(CSV_NAME, start, end, CD_NUMBER)
    print("finish df")
    # print(df)

    # cor = calculate.cal_cor(df,"normal")
    # print(cor.head(10))
    # print("===============")
    # cor_down = calculate.cal_cor(df,"downside")
    # print(cor_down)
    # print("finish cor")

    risk = calculate.cal_co_risk(test_list_of_fund, test_ratio_of_fund, df)
    print(risk)
Exemplo n.º 36
0
                print("--------------------------------------------------------------------------------------------------------------------------------")
                print("Chave primaria já existe: " + str(ViolacaoPK), end='')
                print("Comando excluido da lista:\n" + sql_insert)

                # Retira o comando problematico da lista e reexecuta a função
                insert_list.remove(sql_insert)
                execute_insert_list(insert_list)

            except psycopg2.InternalError as ErroInterno:
                print("Erro interno: " + str(ErroInterno))

        print("--------------------------------------------------------------------------------------------------------------------------------")

# Recupera uma lista de dicionários com os dados a serem inseridos no banco...
insert_list = construct_insert_list( readcsv(CSV_PATH), TABLE_NAME) # ...e constroi uma lista de inserts com esta lista
execute_insert_list(insert_list)


"""
TIPOS DE DADOS A SEREM TRATADOS
    - Com aspas
        1. string
        2. date
    - Sem aspas
        1. Numericos
        2. boolean
        
OBS: Dados do CSV que tenham alguma função dentro dos comandos do banco de dados(como aspas) devem ser tratados, por exemplo:
- Tratar aspas simple substituindo-as por aspas duplas
"""
Exemplo n.º 37
0
import config
import readcsv
import datacleaning
import featurescaling
import algorithm
import predict
import writecsv
import numpy

data = readcsv.readcsv(config.trainingset)
config.data = data
config.row_ = data.shape[0]
config.col_ = data.shape[1]

datacleaning.cabin(config.data, 10)  #Problem specific data cleaning
datacleaning.gender(config.data, 3)  #Problem specific data cleaning
feature = config.allowed_feature
config.cdata = datacleaning.reduce_features(config.row_, len(feature), feature,
                                            config.data)
config.cdata = datacleaning.polynomial_feature_cubic(config.cdata)
crow_ = (config.cdata).shape[0]
ccol_ = (config.cdata).shape[1]

featurescaling.featurescaling(config.cdata)

feature = config.outputlabel
y = datacleaning.get_output_label(config.row_, feature, config.data)
config.y = y
theta = numpy.zeros((ccol_, 1))

print "training....."
    #filePath = 'C:/_DATA/CancerData/SatScan/mult6000/three16/satscan/'
    #filePath = 'C:/_DATA/CancerData/SatScan/mult6000/three16/satscan/elliptic/'
    filePath = 'C:/_DATA/CancerData/SatScan/mult6000/three16/LLR_buildonly/'
    #filePath = 'C:/_DATA/CancerData/SatScan/mult6000/three16/'
    #filePath = 'C:/_DATA/CancerData/SatScan/mult6000/redcap/three16/LLR/EBS/constraint/5p_improved/'
    #filePath = 'C:/_DATA/CancerData/SatScan/mult6000/three16_modify/'
    #filePath = 'C:/_DATA/CancerData/SatScan/mult6000/three16_modify/satscan/'
    #filePath = 'C:/_DATA/CancerData/SatScan/mult6000/three16_modify/satscan/elliptic/'
    #filePath = 'C:/_DATA/CancerData/SatScan/mult6000/three16_modify/LLR_buildonly/'

    #minpvaluecsv = filePath + 'minpvalue_atleast2counties.csv'
    #minpvaluecsv = filePath + 'minpvalue2.csv'
    minpvaluecsv = filePath + 'minpvalue_corrected_2.csv'
    #minpvaluecsv = filePath + 'minpvalue_Unsmoothed_atleast2counties.csv'
    #minpvaluecsv = filePath + 'seg_minpvalue_beta0.1.csv'
    r = readcsv.readcsv(minpvaluecsv)
    minpvalue = r.getData()
    
    repeatTime = 1000
    #pvalueLevel = -1 # -1: min pvalue, -2: last level
    numUnit = 255
    sigma = []
    s = measureSigma.SSS_sigma(range(numUnit), hot_16)

    row = 0
    count = 0
    for repeat in xrange(repeatTime):
        row += 1
        if int(row/10) > 0:
            #print count*10
            row = 0
Exemplo n.º 39
0
    parser.add_argument('--data_dir', help='Path to dataset images')
    parser.add_argument('--output_dir',
                        help='Path to directory for saving outputs')
    args = parser.parse_args()

    train = "train_fluid_intelligence_household.csv"
    valid = "valid_fluid_intelligence_household.csv"
    test = "test_fluid_intelligence_household.csv"

    if not os.path.isdir(args.output_dir):
        try:
            os.mkdir(args.output_dir)
        except:
            raise Exception('Could not create output directory')

    csv_train = readcsv(args.data_dir, train)
    csv_valid = readcsv(args.data_dir, valid)
    csv_test = readcsv(args.data_dir, test)

    for key, value in csv_train.items():
        print(key, value)

    for key, value in csv_valid.items():
        print(key, value)

    for key, value in csv_test.items():
        print(key, value)

    print('saving train dict!')
    np.save(os.path.join(args.output_dir, 'csv_train_target_dti.npy'),
            csv_train)