def main(): print("Generating CSV file From Stats") fileNames = [] #if args.debug: print("Checking " + args.db.title() + " Version") #command = str("nodetool version") #try: # output = subprocess.check_output(command, stderr=subprocess.STDOUT, shell=True).decode() #except subprocess.CalledProcessError as e: # result = str(e.output) # if result.find("Connection refused") >= 0: # print("Cannot Connect To " + args.db.title() + ", Terminating code") # sys.exit(); #if args.debug: print(args.db + " Version : " + output) #if int((output.split(": ")[1])[0]) == 2: # if args.debug: print("Cassandra Version v2") # version = 2 #else: # if args.debug: print("Cassandra Version v3") # version = 3 version = config.get_keys(args.region, args.environ, "version") keys = config.get_keys(args.region, args.environ, "key") hosts = config.get_keys(args.region, args.environ, args.db) if args.debug: print("Total No. of Hosts", len(hosts)) progress(0, len(hosts), "Generating CSV File form Stats") for i, x in enumerate(hosts): if args.debug: print("Processing Stats From host", (i + 1)) path = transform_cfstats(keys, args.region, args.environ, x, version) if args.debug: print(path) if path: fileNames.append(path) sys.stdout.flush() progress((i + 1), len(hosts), "Generating CSV File form Stats") if args.debug: print("Done processing Stats", (i + 1)) print("\nFinished, CSV File/s Created") print("\nNow Combining CSV Files Form Diffrent Nodes") command = str("cat data/" + args.region + "/" + args.environ + "/*.csv > data/" + args.region + "/" + args.environ + "/" + args.region + "." + args.environ + ".cfstats.csv") output = subprocess.check_output(command, stderr=subprocess.STDOUT, shell=True).decode() print("\nMerging Complete.") print("\nPivoting The Data.") path = "data/" + args.region + "/" + args.environ + "/" + args.region + "." + args.environ + ".cfstats.csv" save = "data/" + args.region + "/" + args.environ + "/" + args.region + "." + args.environ + ".cfstats" cassandra_cfstatsCsvAnalyze.endStats(path, save) print("\nPivoting Complete.")
def main(): print("Pushing The Stats To NewRelic") if not args.debug: print("Getting x_key From Environments") x_key = config.get_keys(args.region, args.environ, "x_key")[0] if not x_key: print("Failed To Get x_key From Environments Terminating The Code") sys.exit() if not args.debug: print("Getting Relic URL From Environments") relic = config.get_keys(args.region, args.environ, "relicDB")[0] if not relic: print("Failed To Get Relic URL From Environments Terminating The Code") sys.exit() file = str("data/" + args.region + "/" + args.environ + "/" + args.region + "." + args.environ + ".cfstats.pivot.json") if not os.path.exists(file): print("Stats File not Exists Terminating The Code") sys.exit() command = str( 'cat ' + file + ' | curl -d @- -X POST -H Content-Type: application/json -H X-Insert-Key: ' + str(x_key) + ' ' + str(relic)) output = subprocess.check_output(command, stderr=subprocess.STDOUT, shell=True).decode() if output.find("error"): print("Failed To Push The Data Terminating The Code") print("Error From Command") error = (output.split("{")[1]).split("}")[0] print(error) sys.exit() print("Stats Pushed To New Relic Insights")
def main(): location = args.location region = args.region if args.get_all: file = str(location + '/nodes') hosts = os.listdir(file) else: hosts = config.get_keys(args.region, args.environ, args.db) stats = str('cfstats') if args.version is 3: stats = str('tablestats') if len(hosts) > 1: for i, host in enumerate(hosts): if args.debug: print("Processing files", (i + 1)) store_path = str('data' + '/' + region + '/' + args.environ + "/" + host + '.txt') path = str(location + '/nodes/' + host + '/nodetool/' + stats) fw = open(store_path, 'w') for line in open(path): fw.write(line.rstrip("\n")) fw.write('\n') sys.stdout.flush() util.progress((i + 1), len(hosts), args.debug, "collecting files") if args.debug: print("Done collecting files", (i + 1)) if args.debug: print("Finish collect all files") elif len(hosts) == 1: if args.debug: print("Processing file") store_path = str('data' + '/' + region + '/' + args.environ + "/" + hosts[0] + '.txt') path = str(location + '/nodes/' + hosts[0] + "/nodetool/" + stats) fw = open(store_path, 'w') for line in open(path): fw.write(line.rstrip("\n")) fw.write('\n') sys.stdout.flush() util.progress(1, 1, args.debug, "collecting file") if args.debug: print("Finish collect all file") else: print("no files available")
from tempfile import mkdtemp # helpers dependencies import string import random # scraper dependencies from bs4 import BeautifulSoup import re # Own directory from helpers import Helpers from config import get_keys from scraper import ScheduleScraper settings = get_keys(os) helpers_dependencies = dict(random=random, string=string) helpers = Helpers(settings, **helpers_dependencies) scraper_dependencies = dict(get=requests.get, BeautifulSoup=BeautifulSoup, re=re) scraper = ScheduleScraper(**scraper_dependencies) settings['SECRET_KEY'] = os.environ.get('SECRET_KEY', helpers.get_salt(25)) app = Flask(__name__) # required for datad url_for PROJECT_ROOT = os.path.dirname(os.path.abspath(__file__))
p_tweet_id_list = pickle.load(pickle_in) for tweet_id in tweet_id_list: if tweet_id in p_tweet_id_list: p_tweet_id_list.remove(tweet_id) print '* Removed %i IDs from tweet ID list.' % len(tweet_id_list) print '* %i tweet IDs remaining in source.' % len(p_tweet_id_list) with open(pickled_f, 'wb') as pickle_out: print '* Storing pickled file at:', pickle_out pickle.dump(p_tweet_id_list, pickle_out) if __name__ == "__main__": t0 = time.time() files = ['yourfilesgohere'] print '\nAuthorizing with Twitter...' APP_KEY, APP_SECRET = get_keys() # building Twitter client twitter = Twython(APP_KEY, APP_SECRET, oauth_version=2) ACCESS_TOKEN = twitter.obtain_access_token() try: twitter = Twython(APP_KEY, access_token=ACCESS_TOKEN) print '* Cleared!' except TwythonAuthError: sys.exit("Authorization failed.") for f in files: ids = loadIds(f) json_f = f + '-hydrated.json' pickled_f = f + '.p' removals = [] for tweet_id in ids: t1 = time.time()
def main(): useSSH = False detectTopology = True gotHeader = False nodeArray = [] nodeTable = '' keys = [] nodeToolVer=0 if args.debug: print("Checking " + args.db.title() + " Version") command = str("nodetool -h `hostname -i` version") try: output = subprocess.check_output(command, stderr=subprocess.STDOUT, shell=True).decode() if args.debug: print("command output") if args.debug: print(output.split("\n")) except subprocess.CalledProcessError as e: result = str(e.output) if result.find("Connection refused") >= 0: print("Cannot Connect To " + args.db.title() + ", Terminating code") sys.exit(); else: print("error: " + result) # TODO exit here as well? if args.debug: print(args.db + " Version : " + output) if int((output.split(": ")[1])[0]) == 2: if args.debug: print("NodeTool Version v2 using nodetool cfstats") stats = str("cfstats") nodeToolVer = 2 elif int((output.split(": ")[1])[0]) == 3: if args.debug: print("NodeTool Version v3 using nodetool tablestats") stats = str("tablestats") nodeToolVer = 3 elif (int((output.split(": ")[1])[0]) == 4) or (int(output.split("\n")[1].split(": ")[1][0]) == 4): if args.debug: print("NodeTool Version v4 using nodetool tablestats") stats = str("tablestats") nodeToolVer = 4 if (args.keySpace == "") & (args.table != ""): print("Please Provide the Key Space Before Table Using -k / --keySapace") sys.exit() print("Collecting config variables from Config") try: with open('config/settings.json', 'r') as f: setting = json.load(f) useSSH = setting['connection']['useSSH'] detectTopology = setting['connection']['detectTopology'] except FileNotFoundError: print("settings.json File Not Found OR Error While Getting Setting Parameters, Applying Default Settings") if detectTopology: if args.debug: print("Detecting Nodes") command = str("nodetool -h `hostname -i` status") try: output = subprocess.check_output(command, stderr=subprocess.STDOUT, shell=True).decode() dataArray = output.split("\n") #if args.debug: print(dataArray) for i in dataArray: if i != '': if i.find("Note:") != -1: continue if i.find("-- Address") != -1: i = re.sub(r'\s+$','',i) header = re.sub(r'\s+',',',i).split(",") #print(re.sub(r'\s+',',',i)) #print(header) if nodeToolVer==2: if args.debug: print("Nodetool Version 2") header[4] = str(str(header[4])+str(header[5])) del header[5] header[5] = str(str(header[5])+str(header[6])) del header[6] elif nodeToolVer==3: if args.debug: print("NodeTool Version 3") header[5] = str(str(header[5])+str(header[6])) del header[6] elif nodeToolVer==4: if args.debug: print("Nodetool Version 4") header[4] = str(str(header[4])+str(header[5])) del header[5] header[5] = str(str(header[5])+str(header[6])) del header[6] #print (header) gotHeader = True continue if gotHeader: temp = re.sub(r'\s+',',',i).split(",") #print(temp) temp[2] = str(str(temp[2])+str(temp[3])) del temp[3] #print(temp) nodeArray.append(temp) #print ("Total table") #print (nodeArray) nodeTable = pd.DataFrame(nodeArray,columns =header) #print (nodeTable) for i in range(0, len(nodeTable["Address"])): keys.append(nodeTable["Address"].iloc[i]) except subprocess.CalledProcessError as e: result = str(e.output) if result.find("Connection refused") >= 0: print("Cannot Connect To " + args.db.title() + ", Terminating code") sys.exit(); except: print(sys.exc_info()) print("Something Went Wrong While Getting The Node/s, Terminating code (You can also provide nodes via config)") sys.exit(); else: keys = config.get_keys(args.region, args.environ, "key") hosts = config.get_keys(args.region, args.environ, args.db) if args.debug: print("Total No. of Hosts", len(hosts)) progress(0, len(hosts), "Getting Data From Nodes") for i, x in enumerate(hosts): if args.debug: print("Processing host", (i + 1)) receive_cfstats(keys, args.region, args.environ, x, args.keySpace, args.table, useSSH, stats) sys.stdout.flush() progress((i + 1), len(hosts), "Getting Data From Nodes") if args.debug: print("Done processing host", (i + 1)) print("\nFinished Getting Data")
num_batches_per_epoch = exp_config.num_train_samples // exp_config.BATCH_SIZE number_of_evaluation_per_epoch = num_batches_per_epoch // exp_config.eval_interval num_val_images = 2 max_epoch = 5 num_rows = max_epoch * number_of_evaluation_per_epoch * NUMBER_OF_ROWS * num_val_images # Read all the individual data frames into a dictionary of format {"annotator_id"} base_path = get_base_path(exp_config.root_path, exp_config.Z_DIM, exp_config.num_units[2], exp_config.num_units[1], exp_config.num_cluster_config, run_id=run_id ) keys = get_keys(base_path, "manual_annotation_set_") print("keys", keys) number_of_keys = len(keys) for key in keys: annotation_path = base_path + key if not os.listdir(annotation_path): print(f"No csv files found in directory. Skipping the directory") keys.remove(key) data_dict = combine_annotation_sessions(keys=keys, base_path=base_path, max_epoch=max_epoch) # Verify if there is duplicate annotations for the same combination of ( batch, image_no, row_number_with_image ) data_dict = combine_multiple_annotations(data_dict, exp_config, num_rows, run_id) keys = [k for k in data_dict.keys()]