def main(): # first create the directory for lst and rec files root = os.path.abspath(os.path.dirname(__file__)) imgDir = os.path.join(root, settings.images_directory) # make sure images directory exits settings.makeDir(settings.images_directory) for url in url_list: response_info = requests.get(url) tree = html.fromstring(response_info.text) #get the first occurrence of the data withen following tag (should be 100 entries) script = tree.xpath('//script[@language="javascript"]/text()')[0] #get what the first oi= points to (~100 entries) json_string = regex.findall(script)[0] #load 100 entries into json object json_data = json.loads(json_string) #get list of other pages, all anchors in the footer tag, get the href attribute next_page_url = tree.xpath('//footer/a/@href') #lets get the page of data and complete FS image for a particular case links = [settings.domain + "/" + x['nodeRef'] for x in json_data] for link in links: extract(link, imgDir)
#os.rename(os.path.join(prefix + "_train.lst"), os.path.join(root, prefix + ".lst")) def makeRecFile(prefix, imageloc): """ :param prefix: name of the list file :param imageloc: where images are located, both positive and negative samples :return: """ subprocess.call("python ~/mxnet/tools/im2rec.py --encoding .png " + prefix + " " + imageloc, shell=True) #first create the directory settings.makeDir(settings.record_IO_directory) #get the absolute path root = os.path.abspath(os.path.dirname(__file__)) recordIODir = os.path.join(root, settings.record_IO_directory) #some locations val = os.path.join(recordIODir, "Val") test = os.path.join(recordIODir, "Test") train = os.path.join(recordIODir, "Train") valdir = os.path.join(root, settings.mxnet_images_val_dir) testdir = os.path.join(root, settings.mxnet_images_test_dir) traindir = os.path.join(root, settings.mxnet_images_train_dir) #first lets create the list files makeLstFile(val, valdir)
from PySide.QtGui import QApplication, QMessageBox, QFileDialog, \ QInputDialog, QLineEdit from PySide.QtCore import QObject, Slot, Signal from sumokoin.address import Address from utils.common import print_money, print_money2 from settings import APP_NAME, VERSION, DATA_DIR, COIN, makeDir, seed_languages from utils.logger import log, LEVEL_ERROR, LEVEL_INFO tray_icon_tooltip = "%s v%d.%d" % (APP_NAME, VERSION[0], VERSION[1]) wallet_dir_path = os.path.join(DATA_DIR, 'wallets') makeDir(wallet_dir_path) wallet_log_dir_path = os.path.join(DATA_DIR, 'logs') makeDir(wallet_log_dir_path) password_regex = re.compile(r"^([a-zA-Z0-9!@#\$%\^&\*]{1,256})$") wallet_file_regex = re.compile(r"wallet_(\d+)") from webui import LogViewer class Hub(QObject): current_block_height = 0 def __init__(self, app): super(Hub, self).__init__()
def main(): #======================================================================================================================= # #create a list of all the diagnosis that are one of the above values, otherwise its "missing" # also figure out how many are nodule and normal as these are the images_all to train on #======================================================================================================================= all_diagnosis = {} with open(settings.json_data_file) as data_file: alldata = json.load(data_file) all_diagnosis = {k:get_items(v) for k,v in alldata.items() } total_images = len(all_diagnosis) print ("number initial records " + str(total_images)) #======================================================================================================================= #calculate unique combos of above #======================================================================================================================= unique_combos = Counter((all_diagnosis.values())) unique_combos = sorted(unique_combos.items()) # unique_combos.items().sort(key=lambda x: x[0]) #python 2.7 unique_combos = dict(unique_combos) print ("number unique combos " + str(len(unique_combos))) #sort by number cases, show last 4 sort = sorted(unique_combos.items(), key=itemgetter(1)) print (sort[-40:]) labels = sorted([i for i in sort[-4:] if i[0]!="missing"], key=lambda x: x[1]) print (labels) #======================================================================================================================= #find all indexes where a particular disease occurs of form { 'diagnosis':[1111,2222,3345...images_all]} #======================================================================================================================= from collections import defaultdict index_list = defaultdict(list) for key, value in all_diagnosis.items(): index_list[value].append(key) #======================================================================================================================= #Training and testing list of form # [normal indices] # [nodule indices] # ======================================================================================================================= normal_images_list = [] nodule_images_list = [] for i in index_list: if "normal"in i and "nodule" in i: print(" WARNING-Throwing out " + str(len(index_list[i])) + " values, has both nodule and normal in " + i ) elif "normal" in i: normal_images_list += index_list[i] #print( " normal in :"+ i + " numb:"+ str(len(index_list[i]))) # elif i=="opacity": # train_images_dict["abnormal"]=index_list[i][:354] # test_images_dict["abnormal"] = index_list[i][354:374] # elif i=="cardiomegaly": # train_images_dict["abnormal"]=index_list[i][:251] # test_images_dict["abnormal"] = index_list[i][251:266] # elif i=="lung/hypoinflation": # train_images_dict["abnormal"]=index_list[i][:229] # test_images_dict["abnormal"] = index_list[i][229:249] # elif i=="calcified granuloma": # train_images_dict["abnormal"]+=index_list[i][:243] # test_images_dict["abnormal"] += index_list[i][243:263] # elif i=="thoracic vertebrae/degenerative": # train_images_dict["abnormal"]+=index_list[i][:218] # test_images_dict["abnormal"] += index_list[i][218:238] # elif i=="lung/hyperdistention": # train_images_dict["abnormal"]+=index_list[i][:190] # test_images_dict["abnormal"] += index_list[i][190:210] # elif i=="surgical instruments": # train_images_dict["abnormal"]+=index_list[i][:71] # test_images_dict["abnormal"] += index_list[i][71:86] # elif i=="catheters, indwelling": # train_images_dict["abnormal"]+=index_list[i][:100] # test_images_dict["abnormal"] += index_list[i][100:112] # elif i=="calcinosis": # train_images_dict["abnormal"]+=index_list[i][:146] # test_images_dict["abnormal"] += index_list[i][146:166] #elif i == "nodule" or i== "calcinosis nodule": elif "nodule" in i: nodule_images_list += index_list[i] #print(" nodule in :" + i + " numb:" + str(len(index_list[i]))) numb_nodule=len(nodule_images_list) numb_normal=len(normal_images_list) print("Number nodule: " + str(numb_nodule)) print("Number normal: " + str(numb_normal)) print ("if normal and nodule are not approximately equal then dataset is unbalanced") #the following assummes that we have at least twice as many normal as nodule so use number nodule total if settings.test_run == True: numb_nodule = settings.test_images #use just 20 images numb_normal = numb_nodule * 2 # use all original normal, we will double nodule by flipping each nodule image nodule_images_list = nodule_images_list[:numb_nodule] normal_images_list = normal_images_list[:numb_normal] print ("Running test on " + str(numb_nodule) + " images") # ======================================================================================================================= #create separate directories for above classes and clear them if needed #part of making .rec file for mxnet #../images_rec/nodule and normal # ======================================================================================================================= settings.makeDir(settings.mxnet_images_train_dir) settings.makeDir(settings.mxnet_images_train_nodule_dir) settings.makeDir(settings.mxnet_images_train_normal_dir) settings.makeDir(settings.mxnet_images_val_dir) settings.makeDir(settings.mxnet_images_val_nodule_dir) settings.makeDir(settings.mxnet_images_val_normal_dir) settings.makeDir(settings.mxnet_images_test_dir) settings.makeDir(settings.mxnet_images_test_nodule_dir) settings.makeDir(settings.mxnet_images_test_normal_dir) # clear existing image data in if (settings.clear_subdirs == True): settings.clear_folder_files(settings.mxnet_images_train_nodule_dir) settings.clear_folder_files(settings.mxnet_images_train_normal_dir) settings.clear_folder_files(settings.mxnet_images_val_nodule_dir) settings.clear_folder_files(settings.mxnet_images_val_normal_dir) settings.clear_folder_files(settings.mxnet_images_test_nodule_dir) settings.clear_folder_files(settings.mxnet_images_test_normal_dir) # images_in_noduledir =[] #================================================================= #figure out how many images go in each split #================================================================= normal_train = int(settings.trainpercent * numb_normal) normal_test = int(settings.testpercent * numb_normal) normal_val = int(settings.valpercent * numb_normal) #if any leftover add back to train leftover = numb_normal - normal_train - normal_test - normal_val normal_train += leftover nodule_train = int(settings.trainpercent * numb_nodule) nodule_test = int(settings.testpercent * numb_nodule) nodule_val = int(settings.valpercent * numb_nodule) #if any leftover add back to train leftover = numb_nodule - nodule_train - nodule_test - nodule_val nodule_train += leftover #================================================================= #create train_list #================================================================= normal_train_list = normal_images_list[0:normal_train] normal_test_list = normal_images_list[normal_train:normal_train+ normal_test] normal_val_list = normal_images_list[normal_train+ normal_test :] nodule_train_list = nodule_images_list[0:nodule_train] nodule_test_list = nodule_images_list[nodule_train:nodule_test+nodule_train] nodule_val_list = nodule_images_list[nodule_test+nodule_train :] # ======================================================================================================================= #copy in nodule and normal files to ../images_rec/nodule and normal #note that this is an unbalanced dataset (10 to 1) with 211 nodules and 2706 normals # ======================================================================================================================= next_image_number = total_images + 1 next_image_number = copyfiles(settings.images_directory, settings.mxnet_images_test_nodule_dir, nodule_test_list, next_image_number, flip = True, crop=True, resize = True, newsize= (224,224) ) next_image_number = copyfiles(settings.images_directory, settings.mxnet_images_val_nodule_dir, nodule_val_list, next_image_number, flip = True, crop=True, resize = True, newsize= (224,224) ) next_image_number = copyfiles(settings.images_directory, settings.mxnet_images_train_nodule_dir, nodule_train_list, next_image_number, flip = True, crop=True, resize = True, newsize= (224,224) ) next_image_number = copyfiles(settings.images_directory, settings.mxnet_images_test_normal_dir, normal_test_list, next_image_number, flip = False, crop=True, resize = True, newsize= (224,224) ) next_image_number = copyfiles(settings.images_directory, settings.mxnet_images_val_normal_dir, normal_val_list, next_image_number, flip = False, crop=True, resize = True, newsize= (224,224) ) next_image_number = copyfiles(settings.images_directory, settings.mxnet_images_train_normal_dir, normal_train_list, next_image_number, flip = False, crop=True, resize = True, newsize= (224,224) ) # ================================================================= # if desired figure out how to balance normal and nodule # or how many more to add to nodule by DUPLICATING existing files if (settings.force_balance == True): diff = numb_normal - numb_nodule if diff >0: diff_train = int(settings.trainpercent * diff) diff_test = int(settings.testpercent * diff) diff_val = int(settings.valpercent * diff) leftover = diff - diff_train - diff_test - diff_val diff_train += leftover next_image_number = duplicatefiles(settings.mxnet_images_test_nodule_dir, nodule_test + diff_test, next_image_number) next_image_number = duplicatefiles(settings.mxnet_images_val_nodule_dir, nodule_val + diff_val, next_image_number) next_image_number = duplicatefiles(settings.mxnet_images_train_nodule_dir, nodule_train + diff_train, next_image_number) # ======================================================================================================================= #convert normal and nodule to 0 and 1 #IS THIS EVEN USED? # ======================================================================================================================= label_dict = {"normal":np.array([1.0,0.0]),"nodule":np.array([0.0,1.0])} # new_image_dict={} # for i in all_diagnosis: # if all_diagnosis[i] in label_index.keys(): # new_image_dict[i] = label_index[all_diagnosis[i]] # new_image_dict={} # for i in all_diagnosis: # if all_diagnosis[i] =="normal": # new_image_dict[i] = np.array([1,0]) # else: # new_image_dict[i] = np.array([0,1]) print("number of bogus images") for image in invalid_images: print (image)