Exemplo n.º 1
0
                ('%s_vs_deepwalk_distcorr' % method):
                compute_distance_correlation(topology_weights[method],
                                             topology_weights['deepwalk'],
                                             tokens, unattacked_indx)
            })
            exp_result.update({
                ('%s_vs_glove_distcorr' % method):
                compute_distance_correlation(topology_weights[method],
                                             topology_weights['glove'], tokens,
                                             unattacked_indx)
            })
        results.append(exp_result)

        # Compute cooccurrences
        (cooccurrence_list, index_vocab_list, vocab_index_lookup,
         tokenized_cooccurrences) = count_cooccurrences(walks_prime, 5)

        # Compute mrr curves
        for method in topology_weights:
            normalize_rows = False
            normalize_cols = False
            exp_result.update({
                ('%s_mrr_curve_full' % method): [
                    compute_mrr_curve(topology_weights[method],
                                      tokenized_cooccurrences,
                                      tokens,
                                      normalize_rows=normalize_rows,
                                      normalize_cols=normalize_cols)
                ]
            })
            exp_result.update({
  def __init__(self,
               vector_size,
               vocab_filename=None,
               covariate_size=0,
               random_seed=12345,
               init_weight_dir=None,
               random_walks=None,
               covariate_data=None,
               window_size=5):
    """Initializes the data reading and model variables.

    Args:
      vector_size: size of the word vectors.
      vocab_filename: filename for getting word tokens.
      covariate_size: size of the covariate embedding dimension
      random_seed: seed the initialization generator
      init_weight_dir: directory to pull initial weights from. defaults to a
        uniform initializer if none.
      random_walks: a list of tokenized sentences
      covariate_data: a keyed list of float lists, where each key identifies a
        token in the corpus, and each float list is a row of covariate data
      window_size: window size to use for cooccurrence counting, if needed
    Returns: (none)
    """
    print('setting up basic stuff...')
    # Get word tokens
    self._vector_size = vector_size
    self._covariate_size = covariate_size
    self._tokens = []
    self._vocab_index_lookup = None
    if vocab_filename:
      with open(vocab_filename, 'r') as f:
        for line in f:
          self._tokens.append(line.split()[0])
      self._vocab_index_lookup = dict(
          zip(self._tokens, list(range(len(self._tokens)))))
    self._cooccurrences = None
    self._cooccurrence_dict = None
    print('loading or computing co-occurrences...')
    if random_walks:
      (self._cooccurrences, self._tokens, self._vocab_index_lookup,
       self._cooccurrence_dict) = glove_util.count_cooccurrences(
           random_walks, window_size, self._vocab_index_lookup)
    self._vocab_size = len(self._tokens)

    # Get covariate data
    print('setting other placeholders...')
    if covariate_data is not None:
      self._covariate_data = np.array([covariate_data[t] for t in self._tokens])

    # Placeholders for parameter tensors and other trackers
    io_dict = {'input': None, 'outpt': None}
    self._word = copy.deepcopy(io_dict)
    self._bias = copy.deepcopy(io_dict)
    self._iter = 0
    self._sum_cost = 0
    self._sum_adv_cost_g = 0
    self._sum_adv_cost_d = 0
    self._random_seed = random_seed
    self._init_weight_dir = init_weight_dir

    # Pointers to variables needed for covariate model
    self._cvrt = copy.deepcopy(io_dict)
    self._cvrt_transformation = copy.deepcopy(io_dict)

    # Initialize the cooccurrence read format
    self._cooccurrence_fmt = 'iid'
    self._cooccurrence_fmt_length = struct.calcsize(self._cooccurrence_fmt)
    self._struct_unpack = struct.Struct(self._cooccurrence_fmt).unpack_from