Пример #1
0
def absllm(p1, p2, words, We):
    p1 = p1.split()
    p2 = p2.split()
    total = 0
    for i in p1:
        v1 = lookup(We, words, i)
        max = 0
        for j in p2:
            v2 = lookup(We, words, j)
            score = -1 * cosine(v1, v2) + 1
            if (abs(score) > abs(max)):
                max = score
        total += max
    llm_score = 0.5 * total / len(p1)
    total = 0
    for i in p2:
        v1 = lookup(We, words, i)
        max = 0
        for j in p1:
            v2 = lookup(We, words, j)
            score = -1 * cosine(v1, v2) + 1
            if (abs(score) > abs(max)):
                max = score
        total += max
    llm_score += 0.5 * total / len(p2)
    return llm_score
def train_naive_bayes(freqs, train_x, train_y):
    '''
    Input:
        freqs: dictionary from (word, label) to how often the word appears
        train_x: a list of tweets
        train_y: a list of labels correponding to the tweets (0,1)
    Output:
        logprior: the log prior.
        loglikelihood: the log likelihood of you Naive bayes equation.
    '''
    loglikelihood = {}
    logprior = 0

    # calculate V, the number of unique words in the vocabulary
    vocab = set([pair[0] for pair in freqs.keys()])
    V = len(vocab)

    # calculate N_pos and N_neg
    N_pos = N_neg = 0
    for pair in freqs.keys():
        # if the label is positive (greater than zero)
        if pair[1] > 0:

            # Increment the number of positive words by the count for this (word, label) pair
            N_pos += freqs[pair]

        # else, the label is negative
        else:

            # increment the number of negative words by the count for this (word,label) pair
            N_neg += freqs[pair]

    # Calculate D, the number of documents
    D = len(train_y)

    # Calculate D_pos, the number of positive documents
    D_pos = np.sum(train_y)

    # Calculate D_neg, the number of negative documents
    D_neg = D - D_pos

    # Calculate logprior
    logprior = np.log(D_pos) - np.log(D_neg)

    # For each word in the vocabulary...
    for word in vocab:
        # get the positive and negative frequency of the word
        freq_pos = lookup(freqs, word, 1)
        freq_neg = lookup(freqs, word, 0)

        # calculate the probability that each word is positive, and negative
        p_w_pos = (freq_pos + 1) / (N_pos + V)
        p_w_neg = (freq_neg + 1) / (N_neg + V)

        # calculate the log likelihood of the word
        loglikelihood[word] = np.log(p_w_pos / p_w_neg)

    return logprior, loglikelihood
Пример #3
0
def prepare_data(p1,p2,model,params):
    if not params.nntype == "charagram":
        chars = model.chars
        X1 = []; X2 = []
        p1 = " "+p1+" "; p2 = " "+p2+" "
        for i in p1:
            X1.append(utils.lookup(chars,i))
        for i in p2:
            X2.append(utils.lookup(chars,i))
        return X1, X2
    else:
        return model.hash(p1), model.hash(p2)
Пример #4
0
def add(p1, p2, words, We):
    p1 = p1.split()
    p2 = p2.split()
    accumulator = np.zeros(lookup(We, words, p1[0]).shape)
    for i in p1:
        v = lookup(We, words, i)
        accumulator = accumulator + v
    p1_emb = accumulator / len(p1)
    accumulator = np.zeros(lookup(We, words, p2[0]).shape)
    for i in p2:
        v = lookup(We, words, i)
        accumulator = accumulator + v
    p2_emb = accumulator / len(p1)
    return -1 * cosine(p1_emb, p2_emb) + 1
Пример #5
0
 def parse_pair(k, v):
     parser_pair = utils.lookup(k, parsers, aliases=aliases)
     if parser_pair is None:
         warnings.warn("ignoring field %s:%s" % (k, v))
         return None
     k, parser = parser_pair
     return k, parser(v)
Пример #6
0
    def goto(self, frame, lookup_columns, lookup_values, zh_travel=0):
        """
        Finds lookup_values in lookup_columns of frame's position_list; retrieves corresponding X,Y,Z.
        Transforms X,Y,Z to hardware X,Y,Z by frame's transform.
        Moves to hardware X,Y,Z, taking into account zh_travel.

        :param frame: (str) frame that specifies position_list and transform
        :param lookup_columns: (str | list) column(s) to search in position_table
        :param lookup_values: (val | list) values(s) to find in lookup_columns
        :param zh_travel: (float) hardware height at which to travel
        """
        trans, position_table = self.frames[frame]

        if lookup_columns == 'xyz':
            lookup_values = tuple(lookup_values) + (1, )
            xh, yh, zh, _ = np.dot(lookup_values, trans)
        else:
            xyz = tuple(
                ut.lookup(position_table, lookup_columns,
                          lookup_values)[['x', 'y', 'z']].iloc[0])
            xyzw = xyz + (1, )  # concatenate for translation
            xh, yh, zh, _ = np.dot(xyzw, trans)  # get hardware coordinates

        if zh_travel:
            self.Z.goto(zh_travel)
        else:
            self.Z.home()

        self.XY.goto_xy(xh, yh)
        self.Z.goto(zh)
def get_ratio(freqs, word):
    '''
    Input:
        freqs: dictionary containing the words
        word: string to lookup

    Output: a dictionary with keys 'positive', 'negative', and 'ratio'.
        Example: {'positive': 10, 'negative': 20, 'ratio': 0.5}
    '''
    pos_neg_ratio = {'positive': 0, 'negative': 0, 'ratio': 0.0}
    pos_neg_ratio['positive'] = lookup(freqs, word, 1)
    pos_neg_ratio['negative'] = lookup(freqs, word, 0)

    # calculate the ratio of positive to negative counts for the word
    pos_neg_ratio['ratio'] = (pos_neg_ratio['positive'] + 1) / (pos_neg_ratio['negative'] + 1)
    return pos_neg_ratio
Пример #8
0
 def test_targeting(self):
     ips = {
         "Google":"8.8.8.8"          # Le Googs
         ,"USC":"129.252.37.66"      # USC
         ,"MIT":"18.62.0.96"         # MIT
         ,"Stanford":"171.64.168.31" # Stanford
     }
     for ip in ips:
         self.assertTrue(utils.lookup(ips[ip]) == ip)
Пример #9
0
    def open(self, lookup_cols, lookup_vals):
        """
        Finds lookup_vals in lookup_cols of valvemap; retrieves corresponding valve_num.
        Opens valve_num.

        :param lookup_cols: (str | list) column(s) to search in valvemap
        :param lookup_vals: (val | list) value(s) to find in lookup_cols
        """
        valve_num = ut.lookup(self.valvemap, lookup_cols, lookup_vals)[['valve']].iloc[0]
        self.depressurize(valve_num)
Пример #10
0
	def __init__(self, name, hopcount, description, numeric=0, flags='', protocol_version=0):
		self.name=name
		self.hopcount=int(hopcount)
		self.description=description
		self.numeric=int(numeric)
		self.flags=flags
		self.protocol_version=int(protocol_version)
		self.protoctl=lookup()
		self.nickchars=[]
		self.chanmodes={}
Пример #11
0
def get_ratio(freqs, word):
    '''
    Input:
        freqs: dictionary containing the words

    Output: a dictionary with keys 'positive', 'negative', and 'ratio'.
        Example: {'positive': 10, 'negative': 20, 'ratio': 0.5}
    '''
    pos_neg_ratio = {'positive': 0, 'negative': 0, 'ratio': 0.0}
    ### START CODE HERE (REPLACE INSTANCES OF 'None' with your code) ###
    # use lookup() to find positive counts for the word (denoted by the integer 1)
    pos_neg_ratio['positive'] = lookup(freqs,word,1)

    # use lookup() to find negative counts for the word (denoted by integer 0)
    pos_neg_ratio['negative'] = lookup(freqs,word,0)

    # calculate the ratio of positive to negative counts for the word
    pos_neg_ratio['ratio'] = (pos_neg_ratio['positive'] + 1)/(pos_neg_ratio['negative'] + 1)
    ### END CODE HERE ###
    return pos_neg_ratio
Пример #12
0
def generate_tensor_text(patient_text_list, w2i_lookup, conf_max_len):
    patient_list_of_indices = []
    max_words = 0
    max_notes = 0

    for patient_notes in patient_text_list:
        # each patient_text is a list of text
        list_of_word_idx = []
        for note in patient_notes:
            # each note is a list of word
            indices = list(
                map(lambda x: lookup(w2i_lookup, x),
                    str(note).split()))
            if conf_max_len > 0:
                indices = indices[:conf_max_len]
            list_of_word_idx.append(indices)
            max_words = max(len(indices), max_words)
        patient_list_of_indices.append(list_of_word_idx)
        max_notes = max(len(list_of_word_idx), max_notes)

    pad_token = w2i_lookup['<pad>']

    if max_notes == 0 or max_words <= 4:

        # in case all icu stay in a batch don't have text or all notes are too short to support bigram or trigam conv
        max_notes = 1
        max_words = 20

    # 3. 3d pad, padding token.
    # 4. convert to numpy tensor and return
    def extra_pad_tokens(cnt):
        return [pad_token] * cnt

    padded_patient_list_of_indices = []
    for pt in patient_list_of_indices:
        padded_pt = []
        if len(pt) < max_notes:
            pt = pt + [[]] * (max_notes - len(pt))
        for l in pt:
            l = l + extra_pad_tokens(max_words - len(l))
            padded_pt.append(l)
        padded_patient_list_of_indices.append(padded_pt)

    x = np.array(padded_patient_list_of_indices)

    try:
        assert len(x.shape) == 3
        assert x.shape[0] == len(patient_text_list), "x: {}, l: {}".format(
            str(x.shape), str(len(patient_text_list)))
        return x
    except:
        print('bad shape of x', x.shape)
Пример #13
0
 def set(self, lookup_cols, lookup_vals, pressure=0.0):
     """
     Sets pressure of specified channel.
     
     :param lookup_cols: (str | list) column(s) to search in chanmap
     :param lookup_vals: (val | list) value(s) to find in lookup_cols
     :param pressure: (float) desired pressure; units specified in config file
     """
     channel = ut.lookup(self.chanmap, lookup_cols,
                         lookup_vals)[['channel']].iloc[0]
     channel = int(channel)
     mbar = pressure * self.config['conversion_to_mbar']
     c_error = self.dll.mfcs_set_auto(self.handle, channel, c_float(mbar))
Пример #14
0
def report_latest():
    if len(current_user.session) <= 0:
        flash('You need to chat in order to generate a report')
        return redirect(url_for('dashboard'))
    score, x, scores_array, text = latest_report()
    url1 = freq_words(text)
    url2 = depression_dist(scores_array)
    url3 = depression_trend(x, scores_array)
    verdict = lookup(score)
    return render_template('report_latest.html',
                           score=round(score, 2),
                           verdict=verdict,
                           plot1=url1,
                           plot2=url2,
                           plot3=url3)
Пример #15
0
 def __init__(self,
              name,
              hopcount,
              description,
              numeric=0,
              flags='',
              protocol_version=0):
     self.name = name
     self.hopcount = int(hopcount)
     self.description = description
     self.numeric = int(numeric)
     self.flags = flags
     self.protocol_version = int(protocol_version)
     self.protoctl = lookup()
     self.nickchars = []
     self.chanmodes = {}
Пример #16
0
def generate_tensor_text(patient_text_list, w2i_lookup, conf_max_len):
    patient_list_of_indices = []
    max_indices_listlen = -1
    max_senteces_listlen = -1

    number_of_docs = []

    for patient_text in patient_text_list:
        # each patient_text is a list of text
        list_of_indices = []
        number_of_docs.append(len(patient_text))
        for sentence in patient_text:
            # each sentence is a list of word
            indices = list(map(lambda x: lookup(
                w2i_lookup, x), str(sentence).split()))
            if conf_max_len > 0:
                indices = indices[:conf_max_len]
            list_of_indices.append(indices)
            max_indices_listlen = max(len(indices), max_indices_listlen)
        patient_list_of_indices.append(list_of_indices)
        max_senteces_listlen = max(len(list_of_indices), max_senteces_listlen)

    pad_token = w2i_lookup['<pad>']

    # 3. 3d pad, padding token.
    # 4. convert to numpy tensor and return
    def extra_pad_tokens(cnt): return [pad_token]*cnt

    padded_patient_list_of_indices = []
    for pt in patient_list_of_indices:
        padded_pt = []
        if len(pt) < max_senteces_listlen:
            pt = pt + [[]]*(max_senteces_listlen-len(pt))
        for l in pt:
            l = l + extra_pad_tokens(max_indices_listlen - len(l))
            padded_pt.append(l)
        padded_patient_list_of_indices.append(padded_pt)

    x = np.array(padded_patient_list_of_indices)
    l = np.array(number_of_docs)

    assert len(x.shape) == 3
    assert x.shape[0] == l.shape[0]
    assert x.shape[0] == len(patient_text_list), "x: {}, l: {}".format(
        str(x.shape), str(len(patient_text_list)))
    return x, l
Пример #17
0
    def read(self, lookup_cols, lookup_vals):
        """
        Reads current pressure of the channel.
        
        :param lookup_cols: (str | list) column(s) to search in chanmap
        :param lookup_vals: (val | list) value(s) to find in lookup_cols
        :return: (float) current pressure; units specified in config file
        """
        pressure = c_float()
        timer = c_ushort()

        channel = ut.lookup(self.chanmap, lookup_cols,
                            lookup_vals)[['channel']].iloc[0]
        channel = int(channel)
        c_error = self.dll.mfcs_read_chan(self.handle, channel,
                                          pointer(pressure), pointer(timer))

        mbar = pressure.value
        return mbar / self.config['conversion_to_mbar']
Пример #18
0
async def on_message(message):
    channel = message.channel
    if message.content[0:2] == ">>":
        spell_query = message.content[2:]
        try:
            resp = utils.lookup(spell_query)
            # discord mesage limit
            chunks = utils.discordWrapper(resp, [])
            for chunk in chunks:
                await channel.send(chunk)
            #await channel.send(resp)
        except:
            await channel.send("Spell not found")

    if message.content[0:5] == "!roll":
        expression = message.content[5:]
        try:
            resp = utils.parseRoll(expression.strip())
            await channel.send(resp)
        except:
            await channel.send("Invalid expression")
Пример #19
0
    def goto(self, frame, lookup_columns, lookup_values):
        """
        Finds lookup_values in lookup_columns of frame's position_list; retrieves corresponding X,Y
        Transforms X,Y to hardware X,Y by frame's transform.
        Moves to hardware X,Y.

        :param frame: (str) frame that specifies position_list and transform
        :param lookup_columns: (str | list) column(s) to search in position_table
        :param lookup_values: (val | list) values(s) to find in lookup_columns
        """
        trans, position_table = self.frames[frame]

        if lookup_columns == 'xy':
            lookup_values = tuple(lookup_values) + (1, )
            xh, yh = np.dot(lookup_values, trans)
        else:
            xy = tuple(
                ut.lookup(position_table, lookup_columns,
                          lookup_values)[['x', 'y']].iloc[0])
            xyw = xy + (1, )  # concatenate for translation
            xh, yh, _ = np.dot(xyw, trans)  # get hardware coordinates

        self.XY.goto_xy(xh, yh)
Пример #20
0
def train_naive_bayes(freqs, train_x, train_y):
    '''
    Input:
        freqs: dictionary from (word, label) to how often the word appears
        train_x: a list of tweets
        train_y: a list of labels correponding to the tweets (0,1)
    Output:
        logprior: the log prior. (equation 3 above)
        loglikelihood: the log likelihood of you Naive bayes equation. (equation 6 above)
    '''
    loglikelihood = {}
    logprior = 0

    ### START CODE HERE (REPLACE INSTANCES OF 'None' with your code) ###

    # calculate V, the number of unique words in the vocabulary
    vocab = set([pair[0] for pair in freqs.keys()])
    V = len(vocab)

    # calculate N_pos and N_neg
    N_pos = N_neg = 0
    for pair in freqs.keys():
        # if the label is positive (greater than zero)
        if pair[1] > 0:

            # Increment the number of positive words by the count for this (word, label) pair
            N_pos += lookup(freqs,pair[0],1)

        # else, the label is negative
        else:

            # increment the number of negative words by the count for this (word,label) pair
            N_neg += lookup(freqs,pair[0],0)

    # Calculate D, the number of documents
    D = len(train_y)

    # Calculate D_pos, the number of positive documents (*hint: use sum(<np_array>))
    D_pos = sum(train_y)
    
    # Calculate D_neg, the number of negative documents (*hint: compute using D and D_pos)
    D_neg = D - D_pos
    
    # Calculate logprior
    logprior = np.log(D_pos) - np.log(D_neg)
    
    # For each word in the vocabulary...
    for word in vocab:
        # get the positive and negative frequency of the word
        freq_pos = lookup(freqs,word,1)
        freq_neg = lookup(freqs,word,0)
        
        # calculate the probability that each word is positive, and negative
        p_w_pos = (freq_pos + 1)/(N_pos + V)
        p_w_neg = (freq_neg + 1)/(N_neg + V)

        # calculate the log likelihood of the word
        loglikelihood[word] = np.log(p_w_pos / p_w_neg)

    ### END CODE HERE ###

    return logprior, loglikelihood
Пример #21
0
 def __init__( self, evalFn = 'scoreEvaluationFunction', depth = '2' ):
   self.index = 0 # Pacman is always agent index 0
   self.evaluationFunction = utils.lookup(evalFn, globals())
   self.depth = int(depth)
Пример #22
0
 def populate_embeddings_characters(self, chars):
     phrase = " " + self.phrase.lower() + " "
     for i in phrase:
         self.embeddings.append(utils.lookup(chars, i))
Пример #23
0
        batch_size=args.batch_size,
        num_workers=args.num_workers,
        root=root)
    logging.info("- Done.")

    # Training from scratch
    model_fd = getattr(models, model_folder)

    # Network-based
    if args.MulStu:
        model_cfg = getattr(model_fd, 'MultiNet')
        model = getattr(model_cfg,
                        'StuNet')(model=args.model,
                                  num_branches=args.num_branches,
                                  num_classes=num_classes,
                                  input_channel=utils.lookup(args.model),
                                  dropout=args.dropout)
    # Branch-based
    else:
        if "resnet" in args.model:
            model_cfg = getattr(model_fd, 'resnet_one')
            model = getattr(model_cfg,
                            args.model)(num_classes=num_classes,
                                        num_branches=args.num_branches,
                                        ind=args.ind,
                                        avg=args.avg,
                                        bpscale=args.bpscale)
        elif "vgg" in args.model:
            model_cfg = getattr(model_fd, 'vgg_one')
            model = getattr(model_cfg,
                            args.model)(num_classes=num_classes,
Пример #24
0
     model_folder = "model_cifar"
     root='/home/chendefang/MC/Data'
 elif args.dataset == 'imagenet':
     num_classes = 1000
     model_folder = "model_imagenet"
     root = '/home/meijianping/Test/Data'
 
 # Load data
 train_loader, test_loader = data_loader.dataloader(data_name = args.dataset, batch_size = args.batch_size, num_workers = args.num_workers, root=root)
 logging.info("- Done.")
 
 # Training from scratch
 model_fd = getattr(models, model_folder)
 if args.MulStu:
     model_cfg = getattr(model_fd, 'MultiNet')
     model = getattr(model_cfg, 'StuNet')(model = args.model, num_branches = args.num_branches, num_classes = num_classes, input_channel=utils.lookup(args.model), dropout = args.dropout)
 elif args.type == 'DML':
     model_cfg = getattr(model_fd, 'DML')
     model = getattr(model_cfg, 'MutualNet')(model = args.model, num_branches = args.num_branches, num_classes = num_classes)
 else:
     if "resnet" in args.model:
         model_cfg = getattr(model_fd, 'resnet_GL')
         model = getattr(model_cfg, args.model)(num_classes = num_classes, num_branches = args.num_branches, input_channel=utils.lookup(args.model))
     elif "vgg" in args.model:
         model_cfg = getattr(model_fd, 'vgg_GL')
         model = getattr(model_cfg, args.model)(num_classes = num_classes, num_branches = args.num_branches)
     elif "densenet" in args.model:
         model_cfg = getattr(model_fd, 'densenet_GL')
         model = getattr(model_cfg, args.model)(num_classes = num_classes, num_branches = args.num_branches)
     
     
Пример #25
0
cwd = os.getcwd()
startTime = int(time.time())
DEBUG = True

stats = {"normal": 0, "invisible": 0, "servers": 0, "opers": 0, "channels": 0}


def shutdown(code):
    global exitCode
    exitCode = code


protoctl = lookup(
    #"NOQUIT",
    "NICKv2",
    "VL",
    "SJ3",
    #"NS",
    "NICKIP",
    "CLK")
pro_nickchars = []

#see http://www.unrealircd.com/files/docs/technical/serverprotocol.html for details on meaning of flags
flags = ["h"]
#if command line config file location is used, add flag C
if (DEBUG): flags.append("D")
#if we're on windows, append flag W
#if we log to syslog, append flag Y
#if we support ipv6, append flag 6
#if we have ssl support, add flag e
#if we implement ziplinks, add flag Z
Пример #26
0
	"normal": 0,
	"invisible": 0,
	"servers": 0,
	"opers": 0,
	"channels": 0
	}

def shutdown(code):
	global exitCode
	exitCode=code

protoctl			=	lookup(
	#"NOQUIT",
	"NICKv2",
	"VL",
	"SJ3",
	#"NS",
	"NICKIP",
	"CLK"
	)
pro_nickchars		=	[]

#see http://www.unrealircd.com/files/docs/technical/serverprotocol.html for details on meaning of flags
flags				=	["h"]
#if command line config file location is used, add flag C
if(DEBUG): flags.append("D")
#if we're on windows, append flag W
#if we log to syslog, append flag Y
#if we support ipv6, append flag 6
#if we have ssl support, add flag e
#if we implement ziplinks, add flag Z
Пример #27
0
from dataloaders import MultiModal_Dataset, custom_collate_fn
import functools
import json
from tqdm import tqdm
from sklearn import metrics
from utils import BootStrap, BootStrapDecomp, BootStrapLos, BootStrapIhm, BootStrapPheno, BootStrapLtm
import matplotlib.pyplot as plt
from matplotlib.lines import Line2D      

#-----------------------Data locations ---------------------------------------------$
conf = utils.get_config()
args = utils.get_args()
vectors, w2i_lookup = utils.get_embedding_dict(conf)
#Note that some more paths are in conf
if conf.padding_type == 'Zero':
    vectors[utils.lookup(w2i_lookup, '<pad>')] = 0
train_val_ts_root_dir = '/home/luca/mutiltasking-for-mimic3/data/expanded_multitask/train'
test_ts_root_dir = '/home/luca/mutiltasking-for-mimic3/data/expanded_multitask/test'
train_val_text_root_dir = '/home/luca/mutiltasking-for-mimic3/data/root/train_text_ds/'
test_text_root_dir = '/home/luca/mutiltasking-for-mimic3/data/root/test_text_ds/'
train_val_tab_root_dir = '/home/luca/MultiModal-EHR/data/root/train/'
test_tab_root_dir = '/home/luca/MultiModal-EHR/data/root/test/'
train_listfile = '4k_train_listfile.csv'
val_listfile = '4k_val_listfile.csv'
test_listfile ='test_listfile.csv'
train_val_starttime_path = conf.starttime_path_train_val
test_starttime_path = conf.starttime_path_test

#======================================Hyperparameters======================================#
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
#If we don't care about a task, set it's weight to 0
Пример #28
0
 def __init__( self, evalFn = 'scoreEvaluationFunction', depth = '2' ):
   self.index = 0 # Pacman is always agent index 0
   self.evaluationFunction = utils.lookup(evalFn, globals())
   self.depth = int(depth)