Exemplos de UTILS em Python, exemplos de UTILS em Python

Exemplo n.º 1

0

Exibir arquivo

Arquivo: SAMPLING.py Projeto: EinKung/AI-Image-Denoising

 def __getitem__(self, index):
     orgData = utils.picCut(
         np.array(image.open(self.filepath[index]).convert('RGB')),
         config.crop_size_id)  # RGB
     noiData = utils.addGaussianNoise(orgData, 1)
     return noiData.astype(np.float32).transpose(
         [2, 0, 1]) / 255 - 0.5, orgData.astype(np.float32).transpose(
             [2, 0, 1]) / 255 - 0.5

Exemplo n.º 2

0

Exibir arquivo

Arquivo: LAUNCHER.py Projeto: EinKung/AI-Image-Denoising

 def netInit(self):
     utils.logMaker('INFO', 'NETWORK FILES INITIALIZING...')
     self.device = torch.device(
         'cuda' if torch.cuda.is_available() else 'cpu')
     self.generator_srgan = torch.load(config.network_srg_path,
                                       map_location=self.device.type)
     self.generator_idgan = torch.load(config.network_idg_path,
                                       map_location=self.device.type)

Exemplo n.º 3

0

Exibir arquivo

Arquivo: SAMPLING.py Projeto: EinKung/AI-Image-Denoising

 def __getitem__(self, index):
     cropSize = utils.sizeRecurrect(config.crop_size_esr, config.up_scale)
     hrPic = utils.picCut(
         np.array(image.open(self.filepath[index]).convert('RGB')),
         cropSize)
     lrPic = utils.resize(
         hrPic, (cropSize // config.up_scale, cropSize // config.up_scale))
     return lrPic.astype(np.float32).transpose(
         [2, 0, 1]) / 255 - 0.5, hrPic.astype(np.float32).transpose(
             [2, 0, 1]) / 255 - 0.5

Exemplo n.º 4

0

Exibir arquivo

def countyGroupByStatePersistence():
    stateWisePath = './pages/json/statewise/'
    statesUnique = usLiveCounty['state'].unique()
    tmpData = {}
    for state in statesUnique:
        tmpFrame = usLiveCounty[usLiveCounty['state'] == state].fillna(value=0)
        tmpData['countyX'] = tmpFrame['county'].tolist()
        tmpData['casesY'] = tmpFrame['cases'].astype(int).tolist()
        tmpData['deathsY'] = tmpFrame['deaths'].astype(int).tolist()
        statefips = str(tmpFrame['fips'].iloc[0])[:2]
        UTILS.toJsonFile(tmpData, '{}{}/'.format(stateWisePath, statefips),
                         'counties-under.json')

Exemplo n.º 5

0

Exibir arquivo

Arquivo: OCR.py Projeto: codealyze/smartreviewdata_app

def OCR(image_path):
    """
    Function to perform OCR given an Image Path.
    """
    UTILS.save_to_grayscale(image_path)
    with io.open(image_path, 'rb') as image_file:
        content = image_file.read()

        image = types.Image(content=content)

        response = client.document_text_detection(image=image)
        texts = response.text_annotations
        return texts[0].description

Exemplo n.º 6

0

Exibir arquivo

def mapDataPersistence():
    mapDataPath = './pages/json/mapdata/'
    tmp = [usLiveState, usLiveCounty]
    for idx in range(len(tmp)):
        df = tmp[idx]
        fipsX, seriesY = getCasesOrDeathsSeries(df, 'fips', 'both')
        if len(fipsX) != len(seriesY['cases']) or len(fipsX) != len(
                seriesY['deaths']):
            raise Exception('Index Must Match')
        tmpCaseData = []
        tmpDeathsData = []
        for i in range(len(fipsX)):
            tmpCaseEntry = {}
            tmpDeathEntry = {}
            tmpCaseEntry['fipsCode'] = fipsX[i]
            tmpDeathEntry['fipsCode'] = fipsX[i]
            tmpCaseEntry['value'] = seriesY['cases'][i][0]
            tmpDeathEntry['value'] = seriesY['deaths'][i][0]
            tmpCaseData.append(tmpCaseEntry)
            tmpDeathsData.append(tmpDeathEntry)
        if idx == 0:
            UTILS.toJsonFile(tmpCaseData, mapDataPath, 'states-cases.json')
            UTILS.toJsonFile(tmpDeathsData, mapDataPath, 'states-deaths.json')
        else:
            UTILS.toJsonFile(tmpCaseData, mapDataPath, 'counties-cases.json')
            UTILS.toJsonFile(tmpDeathsData, mapDataPath,
                             'counties-deaths.json')

Exemplo n.º 7

0

Exibir arquivo

def jwsim_contracts_irs(contracts, irs, suffix):
    '''
    Takes the contracts and IRS dataframes and returns a dataframe of records
    with matching names where the JW similarity is >= JWSIM_THRESH.
    '''

    # Rename the columns in IRS:
    irs = u.rename_cols(irs, irs.columns, suffix)

    # Restrict the contracts df to just those from IL
    contracts = contracts[contracts.CSDS_Contract_ID.str.startswith('IL')]

    # Take the cartesian product between the two; replace np.NaN with ''
    prod = mn.cart_prod(contracts, irs)
    prod = prod.replace(np.NaN, '')

    # Print progress report
    print('Calculating Jaro-Winkler similarity on vendor names')

    # Compute the Jaro-Winkler similarity on the VendorName cols
    col1 = 'VendorName'
    arg = ((prod, col1, col1 + suffix))
    jwsim = mn.parallelize(mn.jwsim, arg)

    # Return only the rows where JW similarity >= JWSIM_THRESH
    return jwsim[jwsim.JWSimilarity >= JWSIM_THRESH]

Exemplo n.º 8

0

Exibir arquivo

def try_fill(df):
    '''
    Fills in missing zip codes and coordinates as best as possible. Copies in
    values from elsewhere in the dataset and from the geocoded HQ addresses.
    Returns a dataframe.
    '''

    # Print progress report
    print('\nFilling in missing zip codes and coordinates as best as possible')

    # Fill in missing zip codes as best as possible
    targetsZ = ['ZipCode']
    keys1Z = ['Address', 'City', 'State']
    keys2Z = ['Name', 'Longitude', 'Latitude']
    df = filler(df, targetsZ, str, keys1Z, keys2Z)

    # Fill in missing longitude and latitude coordinates as best as possible
    targetsL = ['Longitude', 'Latitude']
    keys1L = ['Address', 'City', 'State', 'ZipCode']
    keys2L = ['Name']
    df = filler(df, targetsL, float, keys1L, keys2L)

    # Read in the geocoded HQ addresses and fill in zip codes and coordinates as
    # best as possible
    geo = read_geo()
    subset = ['Address', 'City', 'State']
    df = u.merge_coalesce(df.reset_index(drop=True), geo, subset)

    return df

Exemplo n.º 9

0

Exibir arquivo

def import_pb(fname):
    '''
    Reads in the PurpleBinder dataset. Splits each record into multiple based on
    the number of locations contained in the locations field. Splits the
    location column into its component parts (Address1, Address2, City, State, &
    ZipCode) and then converts all the strings to uppercase.
    Returns a dataframe.
    '''

    # Read in the json file
    df = read_pb(fname)

    # Split the locations into multiple rows (one row per location)
    splitR = split_rows(df)

    # Split the location column into its component parts
    splitC = split_cols(splitR)

    # Convert string columns to uppercase
    df_upper = u.upper(splitC)

    # There are serious problems with some of the geocoding in the PB data, so
    # drop the coordinates
    df_upper = df_upper.drop(['Latitude', 'Longitude'], axis=1)

    return df_upper

Exemplo n.º 10

0

Exibir arquivo

def merger(dollars_divided, geo):
    '''
    Merges the dollars_divided and geo dataframes, coalescing the values across
    matching columns. Drops unwanted columns. Returns a dataframe.
    '''

    # Define the arguments to merge_coalesce
    keys = ['Address', 'City', 'State', 'ZipCode']
    sfx = '_R'
    how = 'left'

    # Merge dollars_divided and geo together, filling in coordinates
    df = u.merge_coalesce(dollars_divided, geo, keys, sfx, how)

    # Drop these columns
    df = df.drop([
        'ClusterID', 'VendorName_LINK1', 'VendorName_LINK2', 'Name',
        'CSDS_Vendor_ID_LINK2'
    ],
                 axis=1)

    # Drop duplicates based only on this subset
    subset = ['CSDS_Vendor_ID', 'Address', 'City', 'State', 'ZipCode']

    return df.drop_duplicates(subset=subset).reset_index(drop=True)

Exemplo n.º 11

0

Exibir arquivo

Arquivo: svc_agencies.py Projeto: GeoDaCenter/contracts_cleaning

def read_svc():
    '''
    Reads in the service agency addresses. Calls the COMPARE_ADDRESSES module to
    merge duplicate addresses per agency. Counts the number of service addresses
    per organization. Returns a dataframe.
    '''

    # Print progress report
    print('\nReading in service agencies')

    # Read in the service agencies, converting zip code to string
    df = pd.read_csv(SVC, converters={'ZipCode': str})

    # Append '_SVC' to all columns except CSDS_Svc_ID
    df = u.rename_cols(df, [x for x in df.columns if x != 'CSDS_Svc_ID'],
                       '_SVC')

    # Rename a column to prepare for linking
    df = df.rename(columns={'CSDS_Svc_ID': 'CSDS_Vendor_ID_LINK2'}, index=str)

    # Use the COMPARE_ADDRESSES module to clean up multiple strings for a single
    # address record
    key = 'CSDS_Vendor_ID_LINK2'
    target = 'Address_SVC'
    fixed_addresses = ca.fix_duplicate_addresses(df, key, target)

    # Drop duplicates based on the key and target fields
    fixed_addresses = fixed_addresses.drop_duplicates(subset=[key, target])

    return fixed_addresses

Exemplo n.º 12

0

Exibir arquivo

def read_contracts():
    '''
    Reads in the contracts dataset via the MERGE_CONTRACTS module. Returns a
    dataframe.
    '''

    # Initialize an empty list to hold the dataframes
    dfs = []

    # For every ((filename,label)) tuple:
    for fname_tuple in mc.FNAMES:
        # Read in and process the dataset
        df = mc.process_dataset(fname_tuple)
        # If the label == 'CHI':
        if fname_tuple[-1] == 'CHI':
            # Send the dataframe through the round2 address cleaner
            df = addclean.round2(df)
            # Send the Address1 field through the address cleaner
            df['Address1'] = df['Address1'].apply(addclean.address_cleaner)
        # Add the newly processed dataframe into the list
        dfs.append(df)

    # Concatenate all the dataframes
    merged = pd.concat(dfs)

    # Convert the text columns (except for the URLs) to uppercase
    merged = u.upper(merged)

    # There should be this many records in the dataframe:  6591 records
    return merged

Exemplo n.º 13

0

Exibir arquivo

def import_addresses(dataset):
    '''
    Reads in one of three address datasets (specified with a string). Returns a
    dataframe.
    '''

    print('Reading in {} addresses'.format(dataset.upper()))

    # Read in the COOK address dataset; rename a column
    if dataset == 'cook':
        df = ad.read_cook_addr()
        df = df.rename(columns={'ID': 'VendorName'}, index=str)

    # Read in the IRS dataset; rename a column and standardize names
    elif dataset == 'irs':
        df = ad.read_irs()
        df = df.rename(columns={'OrganizationName': 'VendorName'}, index=str)
        df['VendorName'] = df['VendorName'].apply(stdname)

    # Read in the IL address dataset; standardize names
    elif dataset == 'il':
        df = ad.read_il_addr()
        df['VendorName'] = df['VendorName'].apply(stdname)

    # Conver text fields to uppercase
    df = u.upper(df)

    return df

Exemplo n.º 14

0

Exibir arquivo

def preprocess_contracts():
    '''
    Reads in the contract records. Preprocesses them to clean the amounts and
    keep only those over the minimum amount specified in the MIN_DOLLARS
    constant.  Imports hand-collected addresses for Cook and IL contracts and
    merges in addresses from IRS990 forms to fill in as many blanks as possible.
    Returns a dataframe.
    '''

    # Read in the contracts and clean the dollar amounts
    contracts = read_contracts()
    contracts = clean_amounts(contracts)

    # Read in the COOK addresses dataset
    cook = import_addresses('cook')

    # Fill in addresses from the COOK dataset, matching on VendorName; then,
    # standardize VendorName
    print('Coalescing COOK address matches')
    merged = u.merge_coalesce(contracts, cook, 'VendorName')
    merged['VendorName'] = merged['VendorName'].apply(stdname)

    # Read in the IRS dataset
    irs = import_addresses('irs')

    # Get a datframe of JW similarity matches >= JWSIM_THRESH between the merged
    # and irs dataframes
    sfx = '_IRS'
    jwsim = jwsim_contracts_irs(merged, irs, sfx)

    # Print progress report
    print('Coalescing IRS matches')

    # Fill in addresses from the IRS dataset
    coalesced = coalesce_matches(merged, jwsim, sfx)

    # Read in the IL addresses dataset
    il = import_addresses('il')

    # Print progress report
    print('Coalescing IL matches')

    # Fill in addresses from the IL dataset, matching on VendorName
    df = u.merge_coalesce(coalesced, il, 'VendorName')

    return df

Exemplo n.º 15

0

Exibir arquivo

Arquivo: LAUNCHER.py Projeto: EinKung/AI-Image-Denoising

    def __init__(self):
        utils.logMaker('INFO', 'APPLICATION LAUNCHED')

        super(Launcher, self).__init__()
        self.setupUi(self)
        self.setFixedSize(self.width(), self.height())
        self.setWindowIcon(QIcon(config.icon_path))
        self.setWindowFlags(Qt.FramelessWindowHint)

        self.open.clicked.connect(self.callManage)
        self.denoising.clicked.connect(self.callManage)
        self.save.clicked.connect(self.callManage)
        self.exit.clicked.connect(self.callManage)
        self.timer.timeout.connect(self.tipClose)

        self.thumbnailPath = None
        self.filePath = None
        self.denoised_thumbnailPath = None
        self.denoisedPath = None
        self.denoised_fileName = None

Exemplo n.º 16

0

Exibir arquivo

Arquivo: DFSS.py Projeto: GeoDaCenter/contracts_cleaning

def import_dfss(fname):
    '''
    Reads in the DFSS dataset, converting strings to uppercase. Assigns an ID.
    Returns a dataframe.
    '''

    df = read_dfss(fname)

    df_upper = u.upper(df)

    return df_upper

Exemplo n.º 17

0

Exibir arquivo

Arquivo: main.py Projeto: TianyiShi2001/pyjournal

def main():
    urls = []
    while True:
        line = input()
        if line:
            urls.append(line)
        else:
            break
    for url in urls:
        siteMap[_parse_site(url)](url)
        time.sleep(1)
        UTILS()

Exemplo n.º 18

0

Exibir arquivo

Arquivo: LAUNCHER.py Projeto: EinKung/AI-Image-Denoising

    def completion(self):
        check = utils.completionCheck()
        if not check[0]:
            self.ok = QPushButton("确定")
            self.ok.setStyleSheet(
                "background-color:rgb(110,200,209);color:white;")

            self.tipBox = QMessageBox()
            self.tipBox.setWindowFlags(Qt.FramelessWindowHint)
            self.tipBox.setText("文件缺失")
            self.tipBox.setWindowTitle("提示")
            self.tipBox.setStyleSheet(
                "background-color:rgb(51,51,51);color:white;")
            self.tipBox.addButton(self.ok, QMessageBox.AcceptRole)
            self.tipBox.setIcon(QMessageBox.NoIcon)
            self.tipBox.show()
            utils.logMaker('ERROR', 'FILES NOT EXIST', check[1])
            utils.logMaker('INFO', 'EXCEPTION CLOSED')
        else:
            self.netInit()
            self.show()

Exemplo n.º 19

0

Exibir arquivo

def lineDataPersistence():
    stateWisePath = './pages/json/statewise/'
    overviewPath = './pages/json/overview/'
    timeSplit = [7, 30, 365]
    for timeScale in timeSplit:
        fullDataWithinScale = gainDataWithinGivenDays(usFull, timeScale)
        dateSeries = list(np.array(fullDataWithinScale.index.unique()))
        dateSeries = UTILS.datetime64ToStr(dateSeries)
        tmpData = {}
        tmpData['dayX'] = dateSeries
        tmpData['casesY'] = fullDataWithinScale['cases'].tolist()
        tmpData['deathsY'] = fullDataWithinScale['deaths'].tolist()
        UTILS.toJsonFile(tmpData, overviewPath, '{}.json'.format(timeScale))

    for timeScale in timeSplit:
        stateDataWithinScale = gainDataWithinGivenDays(usState, timeScale)
        dateSeries = list(np.array(stateDataWithinScale.index.unique()))
        dateSeries = UTILS.datetime64ToStr(dateSeries)
        stateX, stateY = getCasesOrDeathsSeries(stateDataWithinScale,
                                                identifiedCol='fips',
                                                casesOrDeaths='both')
        if len(stateX) != len(stateY['cases']) or len(stateX) != len(
                stateY['deaths']):
            raise Exception('Index Must Match')
        for i in range(len(stateX)):
            tmpData = {}
            tmpData['dayX'] = dateSeries
            tmpData['casesY'] = stateY['cases'][i]
            tmpData['deathsY'] = stateY['deaths'][i]
            UTILS.toJsonFile(tmpData, '{}{}/'.format(stateWisePath, stateX[i]),
                             '{}.json'.format(timeScale))

Exemplo n.º 20

0

Exibir arquivo

Arquivo: LAUNCHER.py Projeto: EinKung/AI-Image-Denoising

    def runNet(self, path):
        fileName = path.split('\\')[-1]
        data = torch.tensor(
            np.array(Image.open(path).convert('RGB'),
                     dtype=np.float32).transpose([2, 0, 1]) / 255 -
            0.5).unsqueeze(dim=0).to(self.device)
        self.saveTip.setText('网络计算中....')
        data_denoised = self.generator_idgan(data)
        data_upscale = self.generator_srgan(data_denoised)

        utils.logMaker('INFO', 'OPERATION SUCCESSFULLY')

        pic_array = (data_upscale[0].cpu().detach().numpy() + 0.5) * 255
        picDenoised = Image.fromarray(
            pic_array.transpose([1, 2, 0]).astype(np.uint8))
        cachePath = os.path.join(config.cache_dir,
                                 'denoised_{}'.format(fileName))
        picDenoised.save(cachePath)

        utils.logMaker('INFO', 'DENOISED FILE SAVED IN CACHE', [cachePath])

        return cachePath

Exemplo n.º 21

0

Exibir arquivo

Arquivo: TRAINER_IDGAN.py Projeto: EinKung/AI-Image-Denoising

def train(new=False):
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

    Generator = torch.load(
        config.network_idg_path,
        map_location=device.type) if new and os.path.exists(
            config.network_idg_path) else Generator_ID().to(device)
    Discriminator = torch.load(
        config.network_idd_path,
        map_location=device.type) if new and os.path.exists(
            config.network_idd_path) else Discriminator_ID().to(device)
    optimizerGen = opt.Adam(Generator.parameters())
    optimizerDis = opt.Adam(Discriminator.parameters())

    spl = Sampling_ID(config.train_dir)
    dataset = DataLoader(dataset=spl,
                         batch_size=config.batch_size_idgan,
                         shuffle=True,
                         num_workers=4)

    epoch = 0
    while True:
        Generator.train()
        Discriminator.train()
        for no, (noi, org) in enumerate(dataset):
            noi, org = noi.to(device), org.to(device)

            ################################# Discriminator #################################
            fake = Generator(noi)
            fakePrediction = Discriminator(fake)
            realPrediction = Discriminator(org)

            lossDis = -torch.mean(
                torch.log(realPrediction) + torch.log(1. - fakePrediction))
            optimizerDis.zero_grad()
            lossDis.backward()
            optimizerDis.step()

            ################################# Generator #################################
            prediction = Discriminator(fake)

            lossGen=config.alphaADV*-torch.mean(torch.log(prediction))+config.alphaPIX*utils.pixelLoss(org,fake)\
                    +config.alphaFEA*utils.featureLoss(org,fake,device,config.num_vggLayer_idgan)+config.alphaSMO*utils.smoothLoss(fake)
            optimizerGen.zero_grad()
            lossGen.backward()
            optimizerGen.step()
            print('{}_{}_{}_{}'.format(epoch, no, lossDis, lossGen))

        torch.save(Discriminator, config.network_idd_path)
        torch.save(Generator, config.network_idg_path)
        epoch += 1

Exemplo n.º 22

0

Exibir arquivo

def coalesce_matches(contracts, jwsim, suffix):
    '''
    Pulls in the addresses from IRS records previously deemed to match the IL
    agencies. Returns a dataframe.
    '''

    jwsim = trim_jwsim(jwsim, suffix)

    # Define the key on which to coalesce
    keys = ['CSDS_Contract_ID']

    # Fill in missing values in contracts from matches in jwsim, matchin on keys
    df = u.merge_coalesce(contracts, jwsim, keys, suffix)

    return df

Exemplo n.º 23

0

Exibir arquivo

def train(new=False):
    device=torch.device('cuda' if torch.cuda.is_available() else 'cpu')

    Generator=torch.load(config.network_srg_path,map_location=device.type) if new and os.path.exists(config.network_srg_path) else Generator_ESR().to(device)
    Discriminator=torch.load(config.network_srd_path,map_location=device.type) if new and os.path.exists(config.network_srd_path) else Discriminator_ESR().to(device)

    optimizerGen=opt.Adam(Generator.parameters())
    optimizerDis=opt.Adam(Discriminator.parameters())

    spl=Sampling_ESR(config.train_dir)
    dataset=DataLoader(dataset=spl,batch_size=config.batch_size_srgan,shuffle=True,num_workers=4)

    epoch=0
    while True:
        Generator.train()
        Discriminator.train()
        for no,(lrPic,hrPic) in enumerate(dataset):
            lrPic,hrPic=lrPic.to(device),hrPic.to(device)

            ############################## Discriminator ###############################
            fakeHR=Generator(lrPic)
            fakeHR_Prediction_Dis=Discriminator(fakeHR).mean()
            realHR_Prediction_Dis=Discriminator(hrPic).mean()

            real_RelativisticLoss=1-(realHR_Prediction_Dis-fakeHR_Prediction_Dis)
            fake_RelativisticLoss_Dis=fakeHR_Prediction_Dis-realHR_Prediction_Dis
            lossDiscriminator=real_RelativisticLoss+fake_RelativisticLoss_Dis

            optimizerDis.zero_grad()
            lossDiscriminator.backward()
            optimizerDis.step()

            ############################## Generator ###############################
            fakeHR_Prediction_Gen=Discriminator(fakeHR)
            realHR_Prediction_Gen=Discriminator(hrPic)

            fake_RelativisticLoss_Gen=1-(fakeHR_Prediction_Gen-realHR_Prediction_Gen)
            lossGenerator=config.alphaADV_ESR*fake_RelativisticLoss_Gen+config.alphaPIX_ESR*utils.pixelLoss(hrPic,fakeHR)+\
                          config.alphaFEA_ESR*utils.featureLoss(hrPic,fakeHR,device,config.num_vggLayer_srgan)+config.alphaSMO_ESR*utils.smoothLoss_ESR(fakeHR)

            optimizerGen.zero_grad()
            lossGenerator.backward()
            optimizerGen.step()
            print('{}_{}_{}_{}'.format(epoch,no,lossDiscriminator,lossGenerator))

        torch.save(Discriminator,config.network_srd_path)
        torch.save(Generator,config.network_srg_path)
        epoch+=1

Exemplo n.º 24

0

Exibir arquivo

Arquivo: TEST_IDGAN.py Projeto: EinKung/AI-Image-Denoising

def test():
    device=torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    net=torch.load(config.network_idg_path,map_location=device.type)

    spl=Sampling_ID(config.test_dir)
    dataset=DataLoader(dataset=spl,batch_size=config.batch_size_srgan,shuffle=True)

    ssim_sum=0.
    num=0
    for lr,hr in dataset:
        lr,hr=lr.to(device),hr.to(device)
        fake_hr=net(lr)
        ssim_sum+=utils.qualityRank(fake_hr,hr)
        num+=lr.size()[0]

    print('SSIM FOR IDGAN : {}'.format(ssim_sum/float(num)))

Exemplo n.º 25

0

Exibir arquivo

Arquivo: WESTCHI.py Projeto: GeoDaCenter/contracts_cleaning

def import_wchi(fname):
    '''
    Reads in the West Chi dataset. Splits the address field into its component
    parts. Converts strings to uppercase. Returns a dataframe.
    '''

    # Read in the WESTCHI file
    df = read_wc(fname)

    # Split addresses into their compnent parts
    split = split_addr(df)

    # Convert strings to uppercase
    df_upper = u.upper(split)

    return df_upper

Exemplo n.º 26

0

Exibir arquivo

def import_mc(fname, sheetname):
    '''
    Reads in one MapsCorps dataset. Replaces str(np.NaN) with the empty string.
    Converts string values to uppercase. Drops duplicates. Returns a dataframe.
    '''

    # Extracts the year from the sheetname
    year = get_year(sheetname)

    # Uses a different function to read in the file based on the year
    if year == 2009:
        df = read_2009(fname, sheetname)
    elif year == 2016:
        df = read_2016(fname, sheetname)

    # Replaces the string 'nan' (str(np.NaN)) with the empty string and converts
    # strings to uppercase
    df = df.replace('nan', '')
    df_upper = u.upper(df)

    return df_upper.drop_duplicates().reset_index(drop=True)

Exemplo n.º 27

0

Exibir arquivo

Arquivo: svc_agencies.py Projeto: GeoDaCenter/contracts_cleaning

def linker():
    '''
    Reads in the linker file (to link HQ agencies to service agencies). Merges a
    copy of itself on cluster ID, then eliminates records that match on vendor
    ID (to produce only matches that have different vendor IDs). Returns a
    dataframe.
    '''

    # Read in the link dataframe
    link = read_linker()

    # Make two new dataframes by copying the link dataframe and renaming columns
    link1 = link.rename(columns={'VendorName': 'VendorName_LINK1'}, index=str)
    link2 = u.rename_cols(link, ['VendorName', 'CSDS_Vendor_ID'], '_LINK2')

    # Merge the two link dataframes together
    df = link1.merge(link2, how='left')

    # Drop self-matches and reset the index
    df = df[df['CSDS_Vendor_ID'] != df['CSDS_Vendor_ID_LINK2']].reset_index(
        drop=True)

    return df

Exemplo n.º 28

0

Exibir arquivo

GPIO.setup(buttonLEDPin, GPIO.OUT, initial=GPIO.LOW)
GPIO.output(buttonLEDPin, GPIO.LOW)

n = 0
while True: # Run forever
	# watch for button press to stop the clock
	pressed = GPIO.input(buttonPin)
	if pressed == GPIO.HIGH:
		buttonToggleState = not buttonToggleState
		sleep(.1)
	if buttonToggleState: 
		GPIO.output(buttonLEDPin, GPIO.HIGH)
	else:
		GPIO.output(buttonLEDPin, GPIO.LOW)
	
	# count up to 255
	if buttonToggleState:
		binaryNumbers = UTILS.int2bin(n)
		values = UTILS.getBinaryOnArray(binaryNumbers)
		for index, ledOn in enumerate(values):
			pin = pins[index];
			if( ledOn ):
				GPIO.output(pin, GPIO.HIGH) # Turn on
			else:
				GPIO.output(pin, GPIO.LOW) # Turn off
		n += 1
		if( n >= 256 ):
			n = 0
	# count by the second
	sleep(1)
GPIO.cleanup()

Exemplo n.º 29

0

Exibir arquivo

import CONFIG
import RTC
import AZURE
from azure.devops.v5_0.work_item_tracking.models import JsonPatchOperation
from azure.devops.v5_1.work_item_tracking.models import Comment
from azure.devops.v5_1.work_item_tracking.models import CommentCreate
from datetime import datetime
import json
import os
import UTILS
import glob
import mmap

FOLDER = CONFIG.DEFECT_FOLDER

UTILS.remove(FOLDER)
os.mkdir(FOLDER)
os.mkdir(FOLDER + '\items')

# Clients
validate_only = CONFIG.validate_only
bypass_rules = CONFIG.bypass_rules
suppress_notifications = CONFIG.suppress_notifications
rtcclient = RTC.rtcclient
queryclient = rtcclient.query
core_client = AZURE.core_client

wit_client = AZURE.wit_client
wit_5_1_client = AZURE.wit_5_1_client

# Project

Exemplo n.º 30

0

Exibir arquivo

Arquivo: COMPARE_ADDRESSES.py Projeto: GeoDaCenter/contracts_cleaning

def fix_duplicate_addresses(df, key='ClusterID', target='Address_SVC'):
    '''
    Takes in a dataframe. Attempts to fix duplicate addresses (by default, in
    the 'Address_SVC' field) if they have the same key (bby default, the
    'ClusterID' field). Returns a dataframe.
    '''

    print('\nFixing duplicate addresses')

    # Sort the target field by length, longest to shortest
    sorter = df[target].str.len().sort_values(ascending=False).index
    df = df.reindex(sorter)

    # Make a mini version of the dataframe with two fields, the key & the target
    # (which has been renamed to indicate it's the original field)
    minimized_df = df[[key, target]].drop_duplicates().dropna()
    minimized_df[target + '_Original'] = minimized_df[target]

    # Make a list of the unique values in the key field
    unique_keys = list(minimized_df[key].unique())

    # Set a flag to FALSE
    new_df_exists = False

    # OVERVIEW: Call the iter_df() function on subsets of the dataframe (one
    # subset per key) to compare and fix the address strings assigned to that
    # key.

    # For each value in the list of unique keys:
    # Make a mini dataframe that is just the rows corresponding to that key
    # If the there is more than 1 row:
    # Call iter_df() on the mini df & assign the result to local_df2
    # If the new_df_exists flag is set to TRUE:
    # Create new_df by concatenating the existing new_df and local_df2
    # else: Assign the name new_df to local_df2 and set the new_df_exists to TRUE
    for uKey in unique_keys:
        local_df = minimized_df[minimized_df[key] == uKey]
        if len(local_df) > 1:
            local_df2 = iter_df(
                local_df.copy().drop_duplicates().reset_index(drop=True),
                target)
            if new_df_exists:
                new_df = pd.concat([new_df, local_df2])
            else:
                new_df = local_df2
                new_df_exists = True

    print('Coalescing fixed addresses')

    # Rename the columns in preparation of calling merge_coalesce()
    new_cols = {target: target + '_COAL', target + '_Original': target}
    new_df = new_df.rename(columns=new_cols, index=str)

    # Rename the columns in preparation of calling merge_coalesce()
    min_cols = {target + '_Original': target + '_COAL'}
    minimized_df = minimized_df.rename(columns=min_cols, index=str)

    # Coalesce with the dfs in this order so that we keep the new values
    merged = u.merge_coalesce(new_df, minimized_df, [key, target], how='right')

    # Merge the new address strings in, drop the original field, and rename the
    # new one
    df = df.merge(merged, how='left').drop(target, axis=1)
    df = df.rename(columns={target + '_COAL': target}, index=str)

    return df