Пример #1
0
def test_dict_normalize_df():
    # NOTE: This test is a bit circuitous bc storage of a target df requires
    # storing some source from which to generate that df; in this case, we
    # are using test_utils_csv_to_df.csv which represents the desired csv
    # representation of the df with successful execution of the fxn;
    # however, to get our test df in a 1:1 comparable state to the target df
    # generated from this csv, we have to first convert the test df to csv
    # and then convert it back to a df; an annoying nuance, but necessary for
    # true apples-to-apples comparison between dfs generated this way
    utils = Utils()
    INDEX = "ISIN"
    ARGUMENT = "articleList"
    with open("./utils/tests/test_utils_dict_to_df.txt") as dict_file:
        test_dict = json.load(dict_file)
    target_df = pd.read_csv("./utils/tests/test_utils_csv_to_df.csv",
                            keep_default_na=False)
    temp_df = utils.dict_normalize_df(
        test_dict,
        INDEX,
        ARGUMENT,
    )
    temp_df.to_csv(
        "./utils/tests/test_utils_df_to_csv.csv",
        sep=",",
        index=None,
        header=True,
    )
    test_df = pd.read_csv("./utils/tests/test_utils_df_to_csv.csv",
                          keep_default_na=False)
    assert test_df.equals(target_df)
Пример #2
0
 def bamtools(self):
     i = self.__path('bamtools')
     cmd = "git clone {url} {d}".format(url=i.url, d=i.build_dir)
     Utils.run(cmd)
     i = self.__path('bamtools')
     cmd = "mkdir -p build && cd build && CC=gcc-4.8 CXX=g++-4.8 cmake -DCMAKE_INSTALL_PREFIX:PATH={local_dir} .. && make -j {num_cores} install".format(
         local_dir=shellquote(i.local_dir), num_cores=self.num_cores())
     Utils.run_in_dir(cmd, i.build_dir)
Пример #3
0
def test_text_parse_dict():
    utils = Utils()
    test_filepath = "./utils/tests/test_utils_creds.txt"
    test_dict = utils.text_parse_dict(filepath=test_filepath, separator="=")
    target_dict = {
        "USERNAME": "******",
        "PASSWORD": "******",
        "APPID": "MyTestApplication",
    }
    assert test_dict == target_dict
Пример #4
0
 def __init__(self):
     self.base_dir = os.path.abspath(os.path.join(os.path.dirname(__file__), "external"))
     self.ext_tars = os.path.join(self.base_dir, "tarballs")
     self.ext_build = os.path.join(self.base_dir, "build")
     self.install_dir = os.path.join(self.base_dir, "local")
     Utils.mkdir(self.ext_tars)
     Utils.mkdir(self.ext_build)
     self.paths = {}
     self.paths["zi_lib"] = self.__zi_lib()
     self.paths["cppitertools"] = self.__cppitertools()
     self.paths["boost"] = self.__boost()
Пример #5
0
def test_uuid_generator():
    utils = Utils()
    test_seedvar = 6
    test_bitcount = 128
    test_refid = "4295905573"  # Apple Inc (Organization) RefID
    target_uuid_final = "14fe8b5a6ec8d00eddbfb6bd60c2e82e13a59db3"
    test_uuid_final = utils.uuid_generator(
        seedvar=test_seedvar,
        bitcount=test_bitcount,
        refstring=test_refid,
    )
    assert test_uuid_final == target_uuid_final
Пример #6
0
 def __init__(self):
     self.base_dir = os.path.abspath(
         os.path.join(os.path.dirname(__file__), "external"))
     self.ext_tars = os.path.join(self.base_dir, "tarballs")
     self.ext_build = os.path.join(self.base_dir, "build")
     self.install_dir = os.path.join(self.base_dir, "local")
     Utils.mkdir(self.ext_tars)
     Utils.mkdir(self.ext_build)
     self.paths = {}
     self.paths["zi_lib"] = self.__zi_lib()
     self.paths["cppitertools"] = self.__cppitertools()
     self.paths["boost"] = self.__boost()
Пример #7
0
 def __build(self, i, cmd):
     print "\t getting file..."
     fnp = Utils.get_file_if_size_diff(i.url, self.paths.ext_tars)
     Utils.clear_dir(i.build_dir)
     Utils.untar(fnp, i.build_dir)
     try:
         Utils.run_in_dir(cmd, i.build_sub_dir)
     except:
         Utils.rm_rf(i.local_dir)
         sys.exit(1)
Пример #8
0
 def __build(self, i, cmd):
     print "\t getting file..."
     fnp = Utils.get_file_if_size_diff(i.url, self.paths.ext_tars)
     Utils.clear_dir(i.build_dir)
     Utils.untar(fnp, i.build_dir)
     try:
         Utils.run_in_dir(cmd, i.build_sub_dir)
     except:
         Utils.rm_rf(i.local_dir)
         sys.exit(1)
Пример #9
0
    def __processArgs(self):
        dirs = self.args.dirsToDelete
        if not dirs:
            return

        if "all" == dirs[0]:
            dirs = []
            for k, _ in self.paths.paths.iteritems():
                dirs.append(k)

        for e in dirs:
            p = self.__path(e)
            if p.build_dir:
                Utils.rm_rf(p.build_dir)
            if p.local_dir:
                Utils.rm_rf(p.local_dir)
Пример #10
0
 def num_cores(self):
     c = Utils.num_cores()
     if c > 8:
         return 8
     if 1 == c:
         return 1
     return c - 1
Пример #11
0
 def num_cores(self):
     c = Utils.num_cores()
     if c > 8:
         return 8
     if 1 == c:
         return 1
     return c - 1
Пример #12
0
 def bamtools(self):
     i = self.__path('bamtools')
     cmd = "git clone {url} {d}".format(url=i.url, d=i.build_dir)
     Utils.run(cmd)
     i = self.__path('bamtools')
     #cmd = "mkdir -p build && cd build && CC=gcc-4.8 CXX=g++-4.8 cmake -DCMAKE_INSTALL_PREFIX:PATH={local_dir} .. && make -j {num_cores} install".format(
     #    local_dir=shellquote(i.local_dir), num_cores=self.num_cores())
     if(sys.platform == "darwin"):
         cmd = "mkdir -p build && cd build && CC=clang CXX=clang++ cmake -DCMAKE_INSTALL_PREFIX:PATH={local_dir} .. && make -j {num_cores} install".format(
         local_dir=shellquote(i.local_dir), num_cores=self.num_cores())
     else:
         if self.args.clang:
             cmd = "mkdir -p build && cd build && CC=clang CXX=clang++ cmake -DCMAKE_INSTALL_PREFIX:PATH={local_dir} .. && make -j {num_cores} install".format(
         local_dir=shellquote(i.local_dir), num_cores=self.num_cores())
         else:
             cmd = "mkdir -p build && cd build && CC=gcc-4.8 CXX=g++-4.8 cmake -DCMAKE_INSTALL_PREFIX:PATH={local_dir} .. && make -j {num_cores} install".format(
         local_dir=shellquote(i.local_dir), num_cores=self.num_cores())
     Utils.run_in_dir(cmd, i.build_dir)
Пример #13
0
def returnresult(actionid, result):
    options = {
        "SN": "00010001",
        "CMD": "actionresult",
        "actionID": actionid,
        "result": result
    }
    print(options)
    print(Utils().http_post("/north/", options))
Пример #14
0
    def __processArgs(self):
        dirs = self.args.dirsToDelete
        if not dirs:
            return

        if "all" == dirs[0]:
            dirs = []
            for k, _ in self.paths.paths.iteritems():
                dirs.append(k)

        if "shark" in dirs and not "boost149" in dirs:
            dirs.append("boost149")

        for e in dirs:
            p = self.__path(e)
            if p.build_dir:
                Utils.rm_rf(p.build_dir)
            if p.local_dir:
                Utils.rm_rf(p.local_dir)
Пример #15
0
 def __init__(self):
     self.base_dir = os.path.abspath(os.path.join(os.path.dirname(__file__), "external"))
     self.ext_tars = os.path.join(self.base_dir, "tarballs")
     self.ext_build = os.path.join(self.base_dir, "build")
     self.install_dir = os.path.join(self.base_dir, "local")
     Utils.mkdir(self.ext_tars)
     Utils.mkdir(self.ext_build)
     self.paths = {}
     self.paths["zi_lib"] = self.__zi_lib()
     self.paths["cppitertools"] = self.__cppitertools()
     self.paths["cppprogutils"] = self.__cppprogutils()
     self.paths["boost"] = self.__boost()
     self.paths["R-devel"] = self.__Rdevel()
     self.paths["cppcms"] = self.__cppcms()
     self.paths["bamtools"] = self.__bamtools()
     self.paths["pear"] = self.__pear()
     self.paths["mathgl"] = self.__mathgl()
     self.paths["armadillo"] = self.__armadillo()
     self.paths["mlpack"] = self.__mlpack()
     self.paths["liblinear"] = self.__liblinear()
Пример #16
0
def main():
    args = parse_args()
    s = Setup(args)
    if args.print_libs:
        print "Available installs:"
        count = 1
        installs = s.allSetUps
        installs.sort()
        for set in installs:
            print count , ")" , set
            count = count + 1
    elif args.addBashCompletion:
        cmd = "cat bashCompletes/* >> ~/.bash_completion"
        Utils.run(cmd)
        if args.bib_cpp:
            if args.dev:
                cmd = "echo \"complete -F _bibCppTools proto\" >> ~/.bash_completion"
                Utils.run(cmd)
                cmd = "echo \"complete -F _bibCppTools bioalg\" >> ~/.bash_completion"
                Utils.run(cmd)
                cmd = "echo \"complete -F _bibCppTools euler\" >> ~/.bash_completion"
                Utils.run(cmd)
    else:     
        s.setup()
Пример #17
0
 def cppitertools(self):
     self.__git(self.__path('cppitertools'))
     i = self.__path('cppitertools')
     cmd = "cd {d} && git checkout d4f79321842dd584f799a7d51d3e066a2cdb7cac".format(d=shellquote(i.local_dir))
     Utils.run(cmd)
Пример #18
0
 def __git(self, i):
     cmd = "git clone {url} {d}".format(url=i.url, d=shellquote(i.local_dir))
     Utils.run(cmd)
Пример #19
0
df.info()

df.shape

Creating a list type variable called **col_remove**, in which the features that are not important for our goal will be added

col_remove = ['id']

## Analysis of Missing Values 

df.isna().sum()

Loading a class called Utils, this class helps to vizualize the data

utils = Utils()

utils.plot_variables_nan(df)

utils.df_nan

Removing the **riesgo** variable since it has more than 99% the NaN

df = df.drop(columns=['riesgo'])

df.shape

## Analysis of target value

df[['client']].hist()
plt.ylabel('Count')
Пример #20
0
    def train(self):

        # initializing some loss functions that will be used
        criterion = nn.MSELoss()
        l2_loss = nn.MSELoss()
        l1_loss = nn.L1Loss()

        print('Training...')
        for epoch in range(self.num_epochs):
            for sample in self.data_loader:

                # getting each key value of the sample in question (each sample is a dictionary)
                right_images = sample['face']
                onehot = sample['onehot']
                raw_wav = sample['audio']
                wrong_images = sample['wrong_face']
                id_labels = from_onehot_to_int(
                    onehot
                )  # list with the position of the youtuber which the audio in question belongs

                # defining the inputs as Variables and allocate them into the GPU
                right_images = Variable(right_images.float()).cuda()
                raw_wav = Variable(raw_wav.float()).cuda()
                wrong_images = Variable(wrong_images.float()).cuda()
                onehot = Variable(onehot.float()).cuda()
                id_labels = Variable(id_labels).cuda()

                # tensor of 64 (num of samples per batch) ones and zeros that will be used to compute D loss.
                real_labels = torch.ones(right_images.size(0))
                fake_labels = torch.zeros(right_images.size(0))

                # ======== One sided label smoothing ==========
                # Helps preventing the discriminator from overpowering the
                # generator adding penalty when the discriminator is too confident
                # =============================================
                smoothed_real_labels = torch.FloatTensor(
                    Utils.smooth_label(
                        real_labels.numpy(),
                        -0.1))  # so smooth_real_labels will now be 0.9

                # allocating the three variables into GPU
                real_labels = Variable(real_labels).cuda()
                smoothed_real_labels = Variable(smoothed_real_labels).cuda()
                fake_labels = Variable(fake_labels).cuda()

                # ======= #
                # TRAIN D #
                # ======= #

                # setting all the gradients to 0
                self.discriminator.zero_grad()

                # feeding G only with wav file
                fake_images, z_vector, _ = self.generator(raw_wav)

                # feeding D with the generated images and z vector whose dimensions will be needed
                # for the concatenation in the last hidden layer
                outputs, _ = self.discriminator(fake_images, z_vector)

                # computing D loss when feeding fake images
                fake_score = outputs  # log file purposes
                fake_loss = criterion(outputs, fake_labels)

                # feeding D with the real images and z vector again
                outputs, activation_real = self.discriminator(
                    right_images, z_vector)

                # computing D loss when feeding real images
                real_score = outputs
                real_loss = criterion(outputs, smoothed_real_labels)

                # feeding D with real images but not corresponding to the wav under training
                outputs, _ = self.discriminator(wrong_images, z_vector)
                # computing D loss when feeding real images but not the ones corresponding to the input audios
                wrong_loss = criterion(outputs, fake_labels)
                wrong_score = outputs

                # the discriminator loss function is the sum of the three of them
                d_loss = real_loss + fake_loss + wrong_loss

                d_loss.backward()

                self.optimD.step()

                # ======= #
                # TRAIN G #
                # ======= #

                # setting all the gradients to 0
                self.generator.zero_grad()

                # feeding G only with wav file
                fake_images, z_vector, softmax_scores = self.generator(raw_wav)

                # feeding D with the generated images and z vector. Storing intermediate layer activations for loss computation purposes
                outputs, activation_fake = self.discriminator(
                    fake_images, z_vector)

                # feeding D with the real images and z vector.  Storing intermediate layer activations for loss computation purposes
                _, activation_real = self.discriminator(right_images, z_vector)

                activation_fake = torch.mean(activation_fake, 0)
                activation_real = torch.mean(activation_real, 0)

                # ======= Generator Loss function============
                # This is a customized loss function, the first term is the mean square error loss
                # The second term is feature matching loss, this measure the distance between the real and generated
                # images statistics by comparing intermediate layers activations
                # The third term is L1 distance between the generated and real images, this is helpful for the conditional case
                # because it links the embedding feature vector directly to certain pixel values.
                # ===========================================

                # computing first the part of the loss related to the softmax classifier after the embedding
                softmax_criterion = nn.CrossEntropyLoss()
                softmax_loss = softmax_criterion(softmax_scores, id_labels)


                g_loss = criterion(outputs, real_labels) \
                         + self.l2_coef * l2_loss(activation_fake, activation_real.detach()) \
                         + self.l1_coef * l1_loss(fake_images, right_images)\
                         + self.softmax_coef * softmax_loss  # we have seen softmax_loss starts around 2 and g_loss around 20... That's why we've scaled by 10

                # applying backpropagation and updating parameters.
                g_loss.backward()
                self.optimG.step()

            # store the info in the logger at each epoch
            self.logger.log_iteration_gan(epoch, d_loss, g_loss, real_score,
                                          fake_score, wrong_score)

            # storing the parameters for every 10 epochs
            if (epoch) % 10 == 0:
                Utils.save_checkpoint(self.discriminator, self.generator,
                                      self.checkpoints_path, self.save_path,
                                      epoch)
Пример #21
0
 def __git(self, i):
     cmd = "git clone {url} {d}".format(url=i.url,
                                        d=shellquote(i.local_dir))
     Utils.run(cmd)
Пример #22
0
def test_text_parse_list():
    utils = Utils()
    test_filepath = "./utils/tests/test_utils_entities.txt"
    test_list = utils.text_parse_list(filepath=test_filepath)
    target_list = ["ABCD", "1234", "!@#$"]
    assert test_list == target_list
Пример #23
0
from scripts.BsHelper import BsHelper
from scripts.BsParser import BsParser
from scripts.utils import Utils
import pandas as pd
import os, time

email = input("Enter linkedin email: ")
password = input("Enter linkedin password: "******"Hi {}, I want to connect with you."

# Login to browser
bsHelper = BsHelper("chromedriver.exe")
bsHelper.loginLinkedin(email, password)
bsParser = BsParser()
utils = Utils()
time.sleep(5)
# Change according to your requirements. For 1 page you get 10 results.
URL_TO_SEARCH = "https://www.linkedin.com/search/results/people/?keywords=senior%20data%20scientist%20job%20actively&origin=SWITCH_SEARCH_VERTICAL"
noOfPages = 3
DATA_DIR = "data"
CSV_DIR = "csv"
CSV_NAME = "accounts.csv"
# Here we have set no of pages 1. For hundered profiles set number of pages 10


def createDir(path):
    if not os.path.exists(path):
        os.mkdir(path)

Пример #24
0
 def cppitertools(self):
     self.__git(self.__path('cppitertools'))
     i = self.__path('cppitertools')
     cmd = "cd {d} && git checkout d4f79321842dd584f799a7d51d3e066a2cdb7cac".format(
         d=shellquote(i.local_dir))
     Utils.run(cmd)
Пример #25
0
    def train(self):

        criterion = nn.MSELoss()
        l2_loss = nn.MSELoss()
        l1_loss = nn.L1Loss()

        print('Training...')
        for epoch in range(self.num_epochs):
            for sample in self.data_loader:

                right_images = sample['face']
                onehot = sample['onehot']
                raw_wav = sample['audio']
                wrong_images = sample['wrong_face']
                id_labels = from_onehot_to_int(onehot)

                right_images = Variable(right_images.float()).cuda()
                raw_wav = Variable(raw_wav.float()).cuda()
                wrong_images = Variable(wrong_images.float()).cuda()
                onehot = Variable(onehot.float()).cuda()
                id_labels = Variable(id_labels).cuda()

                real_labels = torch.ones(right_images.size(0))
                fake_labels = torch.zeros(right_images.size(0))

                smoothed_real_labels = torch.FloatTensor(
                    Utils.smooth_label(
                        real_labels.numpy(),
                        -0.1))  # so smooth_real_labels will now be 0.9

                real_labels = Variable(real_labels).cuda()
                smoothed_real_labels = Variable(smoothed_real_labels).cuda()
                fake_labels = Variable(fake_labels).cuda()

                self.discriminator.zero_grad()

                fake_images, z_vector, _ = self.generator(raw_wav)

                outputs, _ = self.discriminator(fake_images, z_vector)

                fake_score = outputs
                fake_loss = criterion(outputs, fake_labels)

                outputs, activation_real = self.discriminator(
                    right_images, z_vector)

                real_score = outputs
                real_loss = criterion(outputs, smoothed_real_labels)

                outputs, _ = self.discriminator(wrong_images, z_vector)
                wrong_loss = criterion(outputs, fake_labels)
                wrong_score = outputs

                d_loss = real_loss + fake_loss + wrong_loss

                d_loss.backward()

                self.optimD.step()

                self.generator.zero_grad()

                fake_images, z_vector, softmax_scores = self.generator(raw_wav)

                outputs, activation_fake = self.discriminator(
                    fake_images, z_vector)

                _, activation_real = self.discriminator(right_images, z_vector)

                activation_fake = torch.mean(activation_fake, 0)
                activation_real = torch.mean(activation_real, 0)

                softmax_criterion = nn.CrossEntropyLoss()
                softmax_loss = softmax_criterion(softmax_scores, id_labels)


                g_loss = criterion(outputs, real_labels) \
                         + self.l2_coef * l2_loss(activation_fake, activation_real.detach()) \
                         + self.l1_coef * l1_loss(fake_images, right_images)\
                         + self.softmax_coef * softmax_loss
                g_loss.backward()
                self.optimG.step()

            self.logger.log_iteration_gan(epoch, d_loss, g_loss, real_score,
                                          fake_score, wrong_score)

            if (epoch) % 10 == 0:
                Utils.save_checkpoint(self.discriminator, self.generator,
                                      self.checkpoints_path, self.save_path,
                                      epoch)
Пример #26
0

def getwans():
    sp = subprocess.run(["ip", "route", "show", "default"],
                        stdout=subprocess.PIPE)
    wans = ""
    for line in sp.stdout.splitlines():
        l = line.decode().split()
        #        wans += (l[4] + "," + l[2] + ";")
        sp2 = subprocess.run(["ip", "address", "show", l[4]],
                             stdout=subprocess.PIPE)
        ip = None
        for ll in sp2.stdout.splitlines():
            nl = ll.decode()
            if "inet " in nl:
                ip = nl.split()[1].split("/")[0]
                break
        if ip != None:
            wans += (l[4] + "," + ip + ";")

    return wans


if __name__ == "__main__":
    with open('config.json') as json_file:
        config = json.load(json_file)
        config["CMD"] = "query"
        config["wans"] = getwans()
        print(Utils.getInstance().http_post("/north/", config))
        print(config)
Пример #27
0
 def test_format_expected_date(self):
     expected_date = Utils.format_expected_date("Friday,10:30AM",
                                                "%Y-%m-%d %I:%M%p")
     self.assertTrue("10:30AM" in expected_date)