def get_priors():
    try:
        load_dotenv('/home/amanda/bigDisk/Twitter/creds/.env')
        username = os.getenv('DATABASE_USER')
        password = os.getenv('DATABASE_PASSWORD')
        conn_string = "dbname='twitter' user="******" password = "******"got filenames"
    for file_path, file_name in file_names:
        try:
            f = json.load(open(file_path +'/' + file_name, 'r'))
        except:
            print file_path
            print file_name
            continue
        if len(f) > 150:
            user_id = file_name.split('_')[0]
            cur.execute('SELECT user_info_json FROM followers WHERE user_id = %s', (user_id,))
            record = cur.fetchone()
            if record:
                if record[0]:
                    user_info = ast.literal_eval(record[0])
                else:
                    continue
                gf = GetFeatures(user_id, user_info, f)
                gf.user_features()
                gf.collect_tweets()
                gf.content_features()
                gf.temporal_features()
                # need to incorporate other network features
        #        gf.features['num_shared_edges'] = follower_counts[user]
                features_list.append(gf.features)

    print len(features_list)
    pickle.dump(features_list, open('priors_feature_list.p', 'wb'))
Exemplo n.º 2
0
Arquivo: main.py Projeto: achic19/lead
    def __init__(self, neighborhood, folder, what_to_run, landmark_function_to_run, name):
        """
        :param neighborhood: which neiberhood exam accessibility
        :param folder: to save the results
        :param what_to_run: in developing stage no all the function should run
        :param landmark_function_to_run: many steps in this class , so sometime I run only part of them

        """
        # build the network based on the neighborhood
        # save absulute path to pedestrian_flow_model before changing workspace folder
        pedestrian_flow_folder = os.path.dirname(__file__) + '/pedestrian_flow'
        os.chdir(folder)
        if what_to_run['Network']:
            print('run Network')
            Network(neighborhood, name + '_ntwrk.shp')

        if what_to_run['get_features']:
            print(' get_features')
            gdb = gpd.read_file(name + '_ntwrk.shp')
            GetFeatures(gdb, neighborhood, name + '_features.shp')

        if what_to_run['pedestrian flow']:
            print(' pedestrian flow')
            GetFeatures.calculate_padestrain_flow(pedestrian_flow_folder + '/finalized_model.sav',
                                                  name + '_features.shp',
                                                  pedestrian_flow_folder, name + '_ped_flow.shp')

        # Calculate landmark criterion
        if what_to_run['landmark']:
            print('run landmark')
            Landmark(landmark_function_to_run, neighborhood, name)

        # Calculate waytype criterion
        if what_to_run['WayType']:
            print('run WayType')
            WayType(name)

        # Calculate complexity criterion
        if what_to_run['Complexity']:
            print('run Complexity')
            Complexity(name)

        # Calculate final cost
        if what_to_run['Final']:
            print('run Final')
            Final(name)
Exemplo n.º 3
0
def get_priors():
    folders = os.walk('/home/amanda/bigDisk/Twitter/random_streams/')
    try:
        load_dotenv('/home/amanda/bigDisk/Twitter/creds/.env')
        username = os.getenv('DATABASE_USER')
        password = os.getenv('DATABASE_PASSWORD')
        conn_string = "dbname='twitter' user="******" password = "******"too long"
                    print gf.features
                features_list.append(gf.features)
    pickle.dump(features_list, open('priors_feature_list.p', 'wb'))
    def GetFeatureMaps(self):
        """Stores feature maps of the stored faces obtained using a pretrained vgg16 network"""

        # Make feature model
        FeatureModel = MakeFeatureModel(modelName='vgg16')

        # Get list of list of feature maps
        FeatureMaps = []
        for Face in self.Faces:
            Face = Face.unsqueeze(0)
            Face = nn.functional.interpolate(Face,
                                             size=(256, 256),
                                             mode='bilinear')
            FeatureMaps.append(GetFeatures(Face, FeatureModel))
        assert (len(FeatureMaps) == len(self.Faces)
                ), 'Feature maps obtained not equal to number of stored faces'

        # Concatenate feature maps in batch size dimension
        self.FeatureMaps = []
        for i in range(len(FeatureMaps[0])):
            self.FeatureMaps.append(
                torch.cat([Map[i] for Map in FeatureMaps], dim=0))
def TrainLatentsEnc(Model, FaceLatents, Epochs_n, LearnRate, LearnRateW, BatchSize=2,\
                    Weight_feature=1e2, Weight_pixel=1, Weight_tp=1e4, \
                    TrainZ=True, StoredTargetFeatures=True, PrintInterval=20, MaxBatchPrint=10, UseCuda=True):
    """Training function to train a StyleGAN encoder network and optionally an input Zspace to map the Zspace 
    to an extended Wspace (input to StyleGAN synthesis network) corresponding to target faces stored in FaceLatents.
    """

    # Initialisation of model
    if UseCuda:
        device = 'cuda:0' if torch.cuda.is_available() else 'cpu'
        if device == 'cpu':
            raise Warning(
                'UseCuda was set true but cuda is not available, using cpu')
    else:
        device = 'cpu'

    if device != FaceLatents.device:
        print(
            'Training function and FaceLatents class are using different devices'
        )
    Model = Model.to(device)
    for param in Model.parameters():
        param.requires_grad = False

    # Initialise learnable parameters
    OptimParams = []
    # Input Zspace
    if TrainZ:
        OptimParams = OptimParams + [FaceLatents.Zspace]
    # Parameters of encoder network
    DecompParams = list(Model.g_all.g_decompress.parameters())
    for params in DecompParams:
        params.requires_grad = True
    OptimParams = OptimParams + DecompParams
    # Initialise optimiser
    Optimiser = torch.optim.Adam(OptimParams, lr=LearnRate)

    # Initialise optimiser for W+ for target propagation
    WOptimiser = torch.optim.Adam([FaceLatents.Wspace], lr=LearnRateW)

    # Initialise loss variables
    Loss = torch.tensor(0, device=device).float()
    Loss_feature = torch.tensor(0, device=device).float()
    Loss_pixel = torch.tensor(0, device=device).float()
    Loss_tp = torch.tensor(0, device=device).float()

    # Initialise feature model to get feature maps from
    if Weight_feature > 0:
        FeatureModel = MakeFeatureModel(modelName='vgg16')

    # Calculate batches
    Batch_n = FaceLatents.Size // BatchSize
    if FaceLatents.Size % BatchSize is not 0:
        Batch_n += 1

    # Training loop
    TimeStart = time.time()
    for Epoch_i in range(1, Epochs_n + 1):
        # Shuffle batch indices
        RandIdx = list(np.random.permutation(FaceLatents.Size))

        for Batch_i in range(Batch_n):
            # Reset gradients
            Optimiser.zero_grad()
            WOptimiser.zero_grad()

            # Get a batch
            StartIdx = Batch_i * BatchSize
            EndIdx = StartIdx + BatchSize
            BatchIdx = RandIdx[StartIdx:EndIdx]
            Zspace = FaceLatents.Zspace[BatchIdx].to(device)
            Wspace = FaceLatents.Wspace[BatchIdx].to(device)
            Faces = FaceLatents.Faces[BatchIdx].to(device)

            # Get target features for the batch
            if StoredTargetFeatures:
                TargetFeatures = [
                    Maps[BatchIdx].to(device)
                    for Maps in FaceLatents.FeatureMaps
                ]
            else:
                Faces_ds = nn.functional.interpolate(Faces,
                                                     size=(256, 256),
                                                     mode='bilinear')
                TargetFeatures = GetFeatures(Faces_ds,
                                             FeatureModel,
                                             UseCuda=UseCuda)

            # Learn target W+ and compute target propagation loss
            if Weight_tp > 0:
                # Get synthesis output of W+ target
                ModelOut_tgt = Model.g_all.g_synthesis(Wspace)

                # Standardise pixel values to [0,1]
                ModelOut_tgt = ModelOut_tgt.clone().clamp_(-1,
                                                           1).add_(1).div_(2.0)

                # Target feature loss
                if Weight_feature > 0:
                    # Get feature maps
                    ModelOut_ds_tgt = nn.functional.interpolate(
                        ModelOut_tgt, size=(256, 256), mode='bilinear')
                    OutFeatures_tgt = GetFeatures(ModelOut_ds_tgt,
                                                  FeatureModel)
                    Loss_feature_tgt = functools.reduce(
                                            lambda x, y : x + y ,
                                            [nn.functional.mse_loss(Features[0], Features[1])\
                                            for Features in zip(OutFeatures_tgt, TargetFeatures)])

                # Target pixel loss
                Loss_pixel_tgt = nn.functional.mse_loss(ModelOut_tgt, Faces)
                # Aggregate pixel and feature loss
                Loss_W_ex_tgt = Weight_feature * Loss_feature_tgt + Weight_pixel * Loss_pixel_tgt
                # Learn W+ target
                Loss_W_ex_tgt.backward()
                WOptimiser.step()

                # Calculate target propagation loss
                Wspace_Z = Model.g_all.g_decompress(Zspace)
                Loss_tp = nn.functional.mse_loss(Wspace_Z, Wspace.detach())

            # Get model output from Zspace
            ModelOut = Model(Zspace)

            # Standardise output pixel values to [0,1]
            ModelOut = ModelOut.clone().clamp_(-1, 1).add_(1).div_(2.0)

            # Compute feature (perceptual) loss
            Loss_feature = torch.tensor(0, device=device).float()
            if Weight_feature > 0:
                ModelOut_ds = nn.functional.interpolate(ModelOut,
                                                        size=(256, 256),
                                                        mode='bilinear')
                OutFeatures = GetFeatures(ModelOut_ds,
                                          FeatureModel,
                                          UseCuda=UseCuda)
                Loss_feature = functools.reduce(
                                    lambda x, y : x + y ,
                                    [nn.functional.mse_loss(Features[0], Features[1])\
                                    for Features in zip(OutFeatures, TargetFeatures)])

            # Compute pixel loss
            Loss_pixel = nn.functional.mse_loss(ModelOut, Faces)

            # Aggregate losses
            Loss = Weight_feature * Loss_feature + Weight_pixel * Loss_pixel + Weight_tp * Loss_tp

            # Print metrics
            if (Epoch_i == 1 or Epoch_i == Epochs_n or Epoch_i % PrintInterval == 0) \
                and Batch_i < MaxBatchPrint:
                print('Epoch: ', Epoch_i)
                print('Batch: ', Batch_i + 1)
                print('This batch: ',
                      [FaceLatents.FaceNames[Idx] for Idx in BatchIdx])
                print('Total time elapsed: ', time.time() - TimeStart, ' s')
                print('Feature loss: ', Loss_feature.item(), \
                      ' | Pixel loss: ', Loss_pixel.item(), \
                      ' | Target prop loss: ', Loss_tp.item())
                print('Weighted - Feature loss: ', Weight_feature*Loss_feature.item(), \
                      ' | Pixel loss: ', Weight_pixel*Loss_pixel.item(), \
                      ' | Target prop loss: ', Weight_tp*Loss_tp.item())
                print('Total loss: ', Loss.item())
                ShowModelOutput(ModelOut)
                if Weight_tp > 0:
                    print('Target W+ output')
                    print('W+ loss: ', Loss_W_ex_tgt.item())
                    ShowModelOutput(Model.g_all.g_synthesis(Wspace))

            # Perform gradient descent and backprop
            Loss.backward()
            Optimiser.step()
Exemplo n.º 6
0
            # special case with Tel Aviv
            city = 'tel_aviv'
            place = place['geometry'][0]
            output = os.path.join('networks', 'tel_aviv.shp')

        if 'network' in keys_parameters:
            print(' network')
            Network(place,
                    output,
                    centrality=True,
                    useful_tags_path=['highway'])

        if 'get_features' in keys_parameters:
            print(' get_features')
            gdb = gpd.read_file(output)
            GetFeatures(gdb, place, 'output/' + city + '.shp')

# merge labels and features
if 'merge_labels_features' in keys_parameters:
    print(' merge_labels_features')

    if parameters['country'] == 'Germany':
        features_folder = 'output'
        features_file = MergeLabelFeatures.merge_location_to_features(
            features_folder)
        os.chdir(os.path.dirname(__file__))
        # for Germany the feature file type and encoding are different
        features_file = pd.read_csv('germany/features.csv')
        labels_file = pd.read_csv('germany/labels.csv')
        encoding = 'utf-8-sig'
    else:
Exemplo n.º 7
0
    def find_bots(self, priors):
        self.users_to_query = set()
        user_features = {}
        followers_set = set(self.followers)
        if self.level > 0:
            print "Number of followers: " + str(len(self.followers))
            follower_counts = Counter(self.followers).most_common()
            # should fix this to be a more precise measure
            size_to_keep = int(.08*len(self.followers))
            connectedness_threshold = floor(0.3*self.n)
            print size_to_keep
            print connectedness_threshold
            tmp_followers = [f[0] for f in follower_counts if f[1] > connectedness_threshold]
            print "NUmber of followers over threshold = " + str(len(tmp_followers))
            if len(tmp_followers) < size_to_keep:
                tmp_followers.extend([f[0] for f in follower_counts[:size_to_keep] if f[1] > 1])
            followers_set = set(tmp_followers)
            print "Number of connected followers: " + str(len(followers_set))
        print "Getting all user info..."
        for follower in followers_set:
            user_info = None
            follower = str(follower)
            if follower not in self.users and follower not in self.ignore_users:
                self.cur.execute('SELECT suspended, deleted, other_error, user_info, user_info_json FROM followers WHERE user_id = %s', (follower,))
                record = self.cur.fetchone()
                if record:
                    if record[0] or record[1] or record[2]:
                        self.ignore_users.add(follower)
                        continue
                    if record[3] and not record[4]:
                        self.ignore_users.add(follower)
                        continue
                    if record[3] and record[4]:
                        try:
                            self.user_info[follower] = ast.literal_eval(record[4])
                            continue
                        except:
                            self.ignore_users.add(follower)
                            continue
                self.users_to_query.add(follower)
        get_user_info(self)
        print "Getting all timeline info and extracting features"
        for follower in followers_set:
            timeline = None
            follower = str(follower)
            if follower not in self.users and follower not in self.ignore_users:
                self.users.add(follower)
                self.cur.execute('SELECT suspended, deleted, other_error, timeline FROM followers WHERE user_id = %s', (follower,))
                record = self.cur.fetchone()
                if record:
                    if record[0] or record[1] or record[2]:
                        self.ignore_users.add(follower)
                        # print "User is suspended or deleted"
                        continue
                    if record[3]:
                        # print "Already have timeline information for user number " + follower
                        # Have to read in file to get timeline info
                        timeline = get_timeline_from_file(self, follower)
                    else:
                        timeline = get_user_timeline(self, follower)
                else:
                    timeline = get_user_timeline(self, follower)
                if timeline and self.user_info.get(follower) and len(timeline) > 50:
                    gf = GetFeatures(follower, self.user_info[follower], timeline)
                    try:
                        gf.user_features()
                        gf.collect_tweets()
                        gf.content_features()
                        gf.temporal_features()
                    except Exception as e:
                        print "ERROR GETTING FEATURES"
                        print e
                        print follower
                        print self.user_info[follower]
                    # need to incorporate other network features
                    #gf.features['num_shared_edges'] = follower_counts[user]
                    user_features[follower] = gf.features
                    self.current_level_users.append(follower)
        # we can look at the out-degree of the collapsed ego network. We also calculate the average out degree,
        # which is the average number of followers per follower.
        # need to get the followers for all these
        len_priors = len(priors)
        current_features = priors
        current_features.extend(user_features.values())
        print "Performing anomaly detection"
        #json.dump(priors, open('test.json', 'w'), indent=4, separators=(',', ': '))
        X = self.vec.fit_transform(current_features).toarray()
        current_features = {}
        X_norm = normalize(X)
        #print np.any(np.isnan(X))
        #print np.all(np.isfinite(X))
        outliers = self.perform_outlier_detection(X, len_priors)

        #How do I add back in the outliers to the anomaly detection? Mueen said not to so I will leave for now
        self.level += 1
        # Add highly connected followers to the clique and to_check
        clique_features = {}
        for follower in outliers:
            self.clique.add((follower, self.level))
            self.to_check.add(follower)
            self.clique_features[follower] = user_features[follower]
        user_features = {}
        print self.clique
        self.n = float(len(self.clique))
        print "Current size of cluster: " + str(self.n)
Exemplo n.º 8
0
def get_bot_features(users_file, output):
    folders = os.walk('/home/amanda/bigDisk/Twitter/Debot2/stream/')
    try:
        load_dotenv('/home/amanda/bigDisk/Twitter/creds/.env')
        username = os.getenv('DATABASE_USER')
        password = os.getenv('DATABASE_PASSWORD')
        conn_string = "dbname='twitter' user="******" password = "******"got filenames"
    i = 0
    for file_path, file_name in file_names:

        if i >= 9000:
            break

        try:
            f = json.load(open(file_path + '/' + file_name, 'r'))
        except:
            print file_path
            print file_name
            continue
        if len(f) > 150:
            i += 1
            user_id = file_name.split('_')[0]
            cur.execute(
                'SELECT user_info_json FROM followers WHERE user_id = %s',
                (user_id, ))
            record = cur.fetchone()
            if record:
                if record[0]:
                    user_info = ast.literal_eval(record[0])
                else:
                    continue
                gf = GetFeatures(user_id, user_info, f)
                gf.user_features()
                gf.collect_tweets()
                gf.content_features()
                gf.temporal_features()
                features.append(gf.features)
    pd.DataFrame(features).to_csv(output)
    print "dumped file"
Exemplo n.º 9
0
 def find_bots(self, priors):
     print "Getting all user info..."
     self.users_to_query = set()
     followers_set = set(self.followers)
     print "Number of followers: " + str(len(self.followers))
     follower_counts = Counter(self.followers).most_common()
     # should fix this to be a more precise measure
     size_to_keep = int(.15*len(self.followers))
     connectedness_threshold = floor(0.3*self.n)
     tmp_followers = [f[0] for f in follower_counts if f[1] >= connectedness_threshold]
     if len(tmp_followers) < size_to_keep:
         tmp_followers.extend([f[0] for f in follower_counts[:size_to_keep] if f[1] > 1])
     followers_set = set(tmp_followers)
     print "Number of connected followers: " + str(len(followers_set))
     for follower in followers_set:
         user_info = None
         follower = str(follower)
         if follower not in self.users and follower not in self.ignore_users:
             self.cur.execute('SELECT suspended, deleted, other_error, user_info_json FROM followers WHERE user_id = %s', (follower,))
             record = self.cur.fetchone()
             if record:
                 if record[0] or record[1] or record[2]:
                     self.ignore_users.add(follower)
                     # print "User is suspended or deleted"
                     continue
                 if record[3]:
                     # print "Already have profile information for user number " + follower
                     self.user_info[follower] = ast.literal_eval(record[3])
                     continue
             self.users_to_query.add(follower)
     get_user_info(self)
     print "Getting all timeline info and extracting features"
     for follower in followers_set:
         timeline = None
         follower = str(follower)
         if follower not in self.users and follower not in self.ignore_users:
             self.users.add(follower)
             self.cur.execute('SELECT suspended, deleted, other_error, timeline FROM followers WHERE user_id = %s', (follower,))
             record = self.cur.fetchone()
             if record:
                 if record[0] or record[1] or record[2]:
                     self.ignore_users.add(follower)
                     # print "User is suspended or deleted"
                     continue
                 if record[3]:
                     # print "Already have timeline information for user number " + follower
                     # Have to read in file to get timeline info
                     timeline = get_timeline_from_file(self, follower)
                 else:
                     timeline = get_user_timeline(self, follower)
             else:
                 timeline = get_user_timeline(self, follower)
             if timeline and self.user_info.get(follower) and len(timeline) > 50:
                 gf = GetFeatures(follower, self.user_info[follower], timeline)
                 try:
                     gf.user_features()
                     gf.collect_tweets()
                     gf.content_features()
                     gf.temporal_features()
                 except Exception as e:
                     print "ERROR GETTING FEATURES"
                     print e
                     print follower
                     print self.user_info[follower]
                 # need to incorporate other network features
                 #gf.features['num_shared_edges'] = follower_counts[user]
                 #cself.user_features[user] = gf.features
                 self.current_level_users.append(follower)
                 self.features_list.append(gf.features)
     # Axis=0 should be vertical
     len_priors = len(priors)
     current_features = priors
     current_features.extend(self.features_list)
     print "Performing anomaly detection"
     #json.dump(priors, open('test.json', 'w'), indent=4, separators=(',', ': '))
     X = self.vec.fit_transform(current_features).toarray()
     current_features = {}
     X_norm = normalize(X)
     #print np.any(np.isnan(X))
     #print np.all(np.isfinite(X))
     print X.shape
     # X = np.stack([current_features, priors], axis=0) Every round will find outliers, how do we stop exploring?
     clf = LocalOutlierFactor(n_neighbors=20)
     clf.fit(X)
     check_is_fitted(clf, ["threshold_", "negative_outlier_factor_", "n_neighbors_", "_distances_fit_X_"])
     if X is not None:
         X = check_array(X, accept_sparse='csr')
         y_pred = clf._decision_function(X)
     else:
         y_pred = clf.negative_outlier_factor_
     #y_pred = clf.fit_predict(X)
     y_pred_new = y_pred[len_priors:]
     # Do anomaly detection and set connected followers to certain outliers
     # this line is a stand-in
     users_scores = zip(self.current_level_users, y_pred_new)
     connected_followers = [u[0] for u in users_scores if u[1] <= clf.threshold_]
     #How do I add back in the outliers to the anomaly detection? Mueen said not to so I will leave for now
     self.level += 1
     # Add highly connected followers to the clique and to_check
     for follower in connected_followers:
         self.clique.add((follower, self.level))
         self.to_check.add(follower)
     print self.clique
     self.n = float(len(self.clique))
     print "Current size of cluster: " + str(self.n)