Пример #1
0
def main():

    print("Generating CSV file From Stats")
    fileNames = []

    #if args.debug: print("Checking " + args.db.title() + " Version")
    #command = str("nodetool version")
    #try:
    #    output = subprocess.check_output(command, stderr=subprocess.STDOUT, shell=True).decode()
    #except subprocess.CalledProcessError as e:
    #    result = str(e.output)
    #    if result.find("Connection refused") >= 0:
    #        print("Cannot Connect To " + args.db.title() + ", Terminating code")
    #        sys.exit();
    #if args.debug: print(args.db + " Version : " + output)

    #if int((output.split(": ")[1])[0]) == 2:
    #    if args.debug: print("Cassandra Version v2")
    #    version = 2
    #else:
    #    if args.debug: print("Cassandra Version v3")
    #    version = 3

    version = config.get_keys(args.region, args.environ, "version")
    keys = config.get_keys(args.region, args.environ, "key")
    hosts = config.get_keys(args.region, args.environ, args.db)

    if args.debug: print("Total No. of Hosts", len(hosts))
    progress(0, len(hosts), "Generating CSV File form Stats")

    for i, x in enumerate(hosts):
        if args.debug: print("Processing Stats From host", (i + 1))
        path = transform_cfstats(keys, args.region, args.environ, x, version)
        if args.debug: print(path)
        if path:
            fileNames.append(path)
        sys.stdout.flush()
        progress((i + 1), len(hosts), "Generating CSV File form Stats")
        if args.debug: print("Done processing Stats", (i + 1))

    print("\nFinished, CSV File/s Created")
    print("\nNow Combining CSV Files Form Diffrent Nodes")

    command = str("cat data/" + args.region + "/" + args.environ +
                  "/*.csv > data/" + args.region + "/" + args.environ + "/" +
                  args.region + "." + args.environ + ".cfstats.csv")
    output = subprocess.check_output(command,
                                     stderr=subprocess.STDOUT,
                                     shell=True).decode()

    print("\nMerging Complete.")
    print("\nPivoting The Data.")

    path = "data/" + args.region + "/" + args.environ + "/" + args.region + "." + args.environ + ".cfstats.csv"
    save = "data/" + args.region + "/" + args.environ + "/" + args.region + "." + args.environ + ".cfstats"
    cassandra_cfstatsCsvAnalyze.endStats(path, save)

    print("\nPivoting Complete.")
def main():

    print("Pushing The Stats To NewRelic")

    if not args.debug: print("Getting x_key From Environments")
    x_key = config.get_keys(args.region, args.environ, "x_key")[0]

    if not x_key:
        print("Failed To Get x_key From Environments Terminating The Code")
        sys.exit()

    if not args.debug: print("Getting Relic URL From Environments")

    relic = config.get_keys(args.region, args.environ, "relicDB")[0]

    if not relic:
        print("Failed To Get Relic URL From Environments Terminating The Code")
        sys.exit()

    file = str("data/" + args.region + "/" + args.environ + "/" + args.region +
               "." + args.environ + ".cfstats.pivot.json")

    if not os.path.exists(file):
        print("Stats File not Exists Terminating The Code")
        sys.exit()

    command = str(
        'cat ' + file +
        ' | curl -d @- -X POST -H Content-Type: application/json -H X-Insert-Key: '
        + str(x_key) + ' ' + str(relic))
    output = subprocess.check_output(command,
                                     stderr=subprocess.STDOUT,
                                     shell=True).decode()

    if output.find("error"):
        print("Failed To Push The Data Terminating The Code")
        print("Error From Command")
        error = (output.split("{")[1]).split("}")[0]
        print(error)
        sys.exit()

    print("Stats Pushed To New Relic Insights")
def main():
    location = args.location
    region = args.region
    if args.get_all:
        file = str(location + '/nodes')
        hosts = os.listdir(file)
    else:
        hosts = config.get_keys(args.region, args.environ, args.db)
    stats = str('cfstats')
    if args.version is 3:
        stats = str('tablestats')
    if len(hosts) > 1:
        for i, host in enumerate(hosts):
            if args.debug: print("Processing files", (i + 1))
            store_path = str('data' + '/' + region + '/' + args.environ + "/" +
                             host + '.txt')
            path = str(location + '/nodes/' + host + '/nodetool/' + stats)
            fw = open(store_path, 'w')
            for line in open(path):
                fw.write(line.rstrip("\n"))
                fw.write('\n')
            sys.stdout.flush()
            util.progress((i + 1), len(hosts), args.debug, "collecting files")
            if args.debug: print("Done collecting files", (i + 1))
        if args.debug: print("Finish collect all files")
    elif len(hosts) == 1:
        if args.debug: print("Processing file")
        store_path = str('data' + '/' + region + '/' + args.environ + "/" +
                         hosts[0] + '.txt')
        path = str(location + '/nodes/' + hosts[0] + "/nodetool/" + stats)
        fw = open(store_path, 'w')
        for line in open(path):
            fw.write(line.rstrip("\n"))
            fw.write('\n')
        sys.stdout.flush()
        util.progress(1, 1, args.debug, "collecting file")
        if args.debug: print("Finish collect all file")
    else:
        print("no files available")
Пример #4
0
from tempfile import mkdtemp

# helpers dependencies
import string
import random

# scraper dependencies
from bs4 import BeautifulSoup
import re

# Own directory
from helpers import Helpers
from config import get_keys
from scraper import ScheduleScraper

settings = get_keys(os)

helpers_dependencies = dict(random=random, string=string)
helpers = Helpers(settings, **helpers_dependencies)

scraper_dependencies = dict(get=requests.get,
                            BeautifulSoup=BeautifulSoup,
                            re=re)
scraper = ScheduleScraper(**scraper_dependencies)

settings['SECRET_KEY'] = os.environ.get('SECRET_KEY', helpers.get_salt(25))

app = Flask(__name__)

# required for datad url_for
PROJECT_ROOT = os.path.dirname(os.path.abspath(__file__))
Пример #5
0
        p_tweet_id_list = pickle.load(pickle_in)
    for tweet_id in tweet_id_list:
        if tweet_id in p_tweet_id_list:
            p_tweet_id_list.remove(tweet_id)
    print '* Removed %i IDs from tweet ID list.' % len(tweet_id_list)
    print '* %i tweet IDs remaining in source.' % len(p_tweet_id_list)
    with open(pickled_f, 'wb') as pickle_out:
        print '* Storing pickled file at:', pickle_out
        pickle.dump(p_tweet_id_list, pickle_out)


if __name__ == "__main__":
    t0 = time.time()
    files = ['yourfilesgohere']
    print '\nAuthorizing with Twitter...'
    APP_KEY, APP_SECRET = get_keys()
    # building Twitter client
    twitter = Twython(APP_KEY, APP_SECRET, oauth_version=2)
    ACCESS_TOKEN = twitter.obtain_access_token()
    try:
        twitter = Twython(APP_KEY, access_token=ACCESS_TOKEN)
        print '* Cleared!'
    except TwythonAuthError:
        sys.exit("Authorization failed.")
    for f in files:
        ids = loadIds(f)
        json_f = f + '-hydrated.json'
        pickled_f = f + '.p'
        removals = []
        for tweet_id in ids:
            t1 = time.time()
Пример #6
0
def main():
    useSSH = False
    detectTopology = True
    gotHeader = False
    nodeArray = []
    nodeTable = ''
    keys = []
    nodeToolVer=0
    if args.debug: print("Checking " + args.db.title() + " Version")
    command = str("nodetool -h `hostname -i` version")
    try:
        output = subprocess.check_output(command, stderr=subprocess.STDOUT, shell=True).decode()
        if args.debug: print("command output")
        if args.debug: print(output.split("\n"))
    except subprocess.CalledProcessError as e:
        result = str(e.output)
        if result.find("Connection refused") >= 0:
            print("Cannot Connect To " + args.db.title() + ", Terminating code")
            sys.exit();
        else:
            print("error: " + result)
            # TODO exit here as well?
    if args.debug: print(args.db + " Version : " + output)

    if int((output.split(": ")[1])[0]) == 2:
        if args.debug: print("NodeTool Version v2 using nodetool cfstats")
        stats = str("cfstats")
        nodeToolVer = 2
    elif int((output.split(": ")[1])[0]) == 3:
        if args.debug: print("NodeTool Version v3 using nodetool tablestats")
        stats = str("tablestats")
        nodeToolVer = 3
    elif (int((output.split(": ")[1])[0]) == 4) or (int(output.split("\n")[1].split(": ")[1][0]) == 4):
        if args.debug: print("NodeTool Version v4 using nodetool tablestats")
        stats = str("tablestats")
        nodeToolVer = 4

    if (args.keySpace == "") & (args.table != ""):
        print("Please Provide the Key Space Before Table Using -k / --keySapace")
        sys.exit()

    print("Collecting config variables from Config")

    try:
        with open('config/settings.json', 'r') as f:
            setting = json.load(f)
            useSSH = setting['connection']['useSSH']
            detectTopology = setting['connection']['detectTopology']
    except FileNotFoundError:
        print("settings.json File Not Found OR Error While Getting Setting Parameters, Applying Default Settings")

    if detectTopology:
        if args.debug: print("Detecting Nodes")
        command = str("nodetool -h `hostname -i` status")
        try:
            output = subprocess.check_output(command, stderr=subprocess.STDOUT, shell=True).decode()
            dataArray = output.split("\n")
            #if args.debug: print(dataArray)
            for i in dataArray:
                if i != '':
                    if i.find("Note:") != -1:
                        continue
                    if i.find("--  Address") != -1:
                        i = re.sub(r'\s+$','',i)
                        header = re.sub(r'\s+',',',i).split(",")
                        #print(re.sub(r'\s+',',',i))
                        #print(header)
                        if nodeToolVer==2:
                            if args.debug: print("Nodetool Version 2")
                            header[4] = str(str(header[4])+str(header[5]))
                            del header[5]
                            header[5] = str(str(header[5])+str(header[6]))
                            del header[6]
                        elif nodeToolVer==3:
                            if args.debug: print("NodeTool Version 3")
                            header[5] = str(str(header[5])+str(header[6]))
                            del header[6]
                        elif nodeToolVer==4:
                            if args.debug: print("Nodetool Version 4")
                            header[4] = str(str(header[4])+str(header[5]))
                            del header[5]
                            header[5] = str(str(header[5])+str(header[6]))
                            del header[6]
                        #print (header)
                        gotHeader = True
                        continue
                    if gotHeader:
                        temp = re.sub(r'\s+',',',i).split(",")
                        #print(temp)
                        temp[2] = str(str(temp[2])+str(temp[3]))
                        del temp[3]
                        #print(temp)
                        nodeArray.append(temp)
            #print ("Total table")
            #print (nodeArray)
            nodeTable = pd.DataFrame(nodeArray,columns =header)
            #print (nodeTable)
            for i in range(0, len(nodeTable["Address"])):
                keys.append(nodeTable["Address"].iloc[i])
        except subprocess.CalledProcessError as e:
            result = str(e.output)
            if result.find("Connection refused") >= 0:
                print("Cannot Connect To " + args.db.title() + ", Terminating code")
                sys.exit();
        except:
            print(sys.exc_info())
            print("Something Went Wrong While Getting The Node/s, Terminating code (You can also provide nodes via config)")
            sys.exit();
    else:
        keys = config.get_keys(args.region, args.environ, "key")

    hosts = config.get_keys(args.region, args.environ, args.db)

    if args.debug: print("Total No. of Hosts", len(hosts))
    progress(0, len(hosts), "Getting Data From Nodes")

    for i, x in enumerate(hosts):
        if args.debug: print("Processing host", (i + 1))
        receive_cfstats(keys, args.region, args.environ, x, args.keySpace, args.table, useSSH, stats)
        sys.stdout.flush()
        progress((i + 1), len(hosts), "Getting Data From Nodes")
        if args.debug: print("Done processing host", (i + 1))

    print("\nFinished Getting Data")
Пример #7
0
num_batches_per_epoch = exp_config.num_train_samples // exp_config.BATCH_SIZE
number_of_evaluation_per_epoch = num_batches_per_epoch // exp_config.eval_interval

num_val_images = 2
max_epoch = 5
num_rows = max_epoch * number_of_evaluation_per_epoch * NUMBER_OF_ROWS * num_val_images

# Read all the individual data frames into a dictionary of format {"annotator_id"}
base_path = get_base_path(exp_config.root_path,
                          exp_config.Z_DIM,
                          exp_config.num_units[2],
                          exp_config.num_units[1],
                          exp_config.num_cluster_config,
                          run_id=run_id
                          )
keys = get_keys(base_path, "manual_annotation_set_")
print("keys", keys)
number_of_keys = len(keys)

for key in keys:
    annotation_path = base_path + key
    if not os.listdir(annotation_path):
        print(f"No csv files found in directory. Skipping the directory")
        keys.remove(key)
data_dict = combine_annotation_sessions(keys=keys,
                                        base_path=base_path,
                                        max_epoch=max_epoch)

# Verify if there is duplicate annotations for the same combination of ( batch, image_no, row_number_with_image )
data_dict = combine_multiple_annotations(data_dict, exp_config, num_rows, run_id)
keys = [k for k in data_dict.keys()]