示例#1
0
def download_images_meta(api: ISICApi, images_info: list, from_idx: int,
                         num: int, output: list, thread_num: int):
    for image in images_info[from_idx:from_idx + num]:
        time = datetime.now()
        image_detail: dict = api.getJson(f'image/{image["_id"]}')
        image_segmentation_data = api.getJson(
            f'segmentation?imageId={image["_id"]}')
        image_detail.update({'segmentation': image_segmentation_data})
        output.append(image_detail)
        debug(
            f'Thread#{thread_num}  Fetching of images details: {len(output)} from {num}\t'
            f'({round(len(output) / num * 100, 1)}%)\t'
            f'time remaining: {(datetime.now() - time) * (num - len(output))}')
示例#2
0
def download_images_info(api: ISICApi) -> list:
    all_images = list()
    images_per_time = 30000
    images_downloaded = 0
    info(f'Fetching of images info started')
    while True:
        images: list = api.getJson(
            f'image?limit={images_per_time}&offset={images_downloaded}&sort=name'
        )
        images_received = len(images)
        images_downloaded += images_received
        all_images.extend(images)
        info(f'Fetching of images info\tDownloaded: {images_downloaded}')
        if images_received != images_per_time:
            break
    return all_images
示例#3
0
def download_images(api: ISICApi, images_info: list, from_idx: int, num: int,
                    path: str, thread_num: int):
    downloaded = 0
    for image in images_info[from_idx:from_idx + num]:
        image_file_output_path = os.path.join(path, f'{image["_id"]}.jpg')
        time = datetime.now()
        image_file_resp = api.get(f'image/{image["_id"]}/download')
        image_file_resp.raise_for_status()
        with open(image_file_output_path, 'wb') as imageFileOutputStream:
            for chunk in image_file_resp:
                imageFileOutputStream.write(chunk)
        downloaded += 1

        debug(
            f'Thread#{thread_num}  Fetching of images details: {downloaded} from {num}\t'
            f'({round(downloaded / num * 100, 1)}%)\t'
            f'time remaining: {(datetime.now() - time) * (num - downloaded)}')
示例#4
0
def download_segmentation(api: ISICApi, images_info: list, from_idx: int,
                          num: int, path: str, thread_num: int):
    downloaded = 0
    for image in images_info[from_idx:from_idx + num]:
        time = datetime.now()
        for segmentation in image['segmentation']:
            segmentation_file_output_path = os.path.join(
                path, f'{segmentation["_id"]}.jpg')
            if os.path.exists(segmentation_file_output_path):
                continue
            image_file_resp = api.get(
                f'segmentation/{segmentation["_id"]}/mask')
            image_file_resp.raise_for_status()
            with open(segmentation_file_output_path,
                      'wb') as imageFileOutputStream:
                for chunk in image_file_resp:
                    imageFileOutputStream.write(chunk)
        downloaded += 1

        debug(
            f'Thread#{thread_num}  Fetching of images details: {downloaded} from {num}\t'
            f'({round(downloaded / num * 100, 1)}%)\t'
            f'time remaining: {(datetime.now() - time) * (num - downloaded)}')
from isic_api import ISICApi
import os
import json
import csv
from tqdm import tqdm
with open('config.json') as json_file:
    data = json.load(json_file)

api = ISICApi(username=data["user"], password=data["pw"])
data_path = data["data_folder"]
num_imgs = data["num_imgs"]
if not os.path.exists(data_path):
    os.makedirs(data_path)
imageList = api.getJson('image?limit=' + str(num_imgs) +'&offset=0&sort=name')

#%%            
print('Fetching metadata for %s images' % len(imageList))
imageDetails = []
for image in tqdm(imageList):
  
    # Fetch the full image details
    imageDetail = api.getJson('image/%s' % image['_id'])
    imageDetails.append(imageDetail)

# Determine the union of all image metadata fields
metadataFields = set(
        field
        for imageDetail in imageDetails
        for field in imageDetail['meta']['clinical'].keys()
    )
示例#6
0
from isic_api import ISICApi
import os
import json
with open('config.json') as json_file:
    data = json.load(json_file)

api = ISICApi(username=data["user"], password=data["pw"])
data_path = data["data_folder"]
num_imgs = data["num_imgs"]
#%%
savePath = os.path.join(data_path, 'raw')

if not os.path.exists(savePath):
    os.makedirs(savePath)
start_offset = 0
#%%

for i in range(int(num_imgs / 50) + 1):

    imageList = api.getJson('image?limit=50&offset=' + str(start_offset) +
                            '&sort=name')

    print('Downloading %s images' % len(imageList))

    for image in imageList:
        print(image['_id'])
        imageFileResp = api.get('image/%s/download' % image['_id'])
        imageFileResp.raise_for_status()
        imageFileOutputPath = os.path.join(savePath, '%s.jpg' % image['name'])
        with open(imageFileOutputPath, 'wb') as imageFileOutputStream:
            for chunk in imageFileResp:
def main(offset, count, meta=True):
    api = ISICApi()

    savePath = '../../ISICArchive/'

    if not os.path.exists(savePath):
        os.makedirs(savePath)

    imageList = api.getJson(f'image?limit={count}&offset={offset}&sort=name')

    i = count - 1
    if meta:
        print('Fetching metadata for %s images' % len(imageList))
        imageDetails = []
        for ind, image in enumerate(imageList):
            print(' ', image['name'])
            # Fetch the full image details
            try:
                imageDetail = api.getJson('image/%s' % image['_id'])
                imageDetails.append(imageDetail)
            except requests.exceptions.ConnectionError:
                imageList = api.getJson(
                    f'image?limit={count}&offset={offset}&sort=name')
                # i = ind
                # break

        # Determine the union of all image metadata fields
        metadataFields = set(
            field for imageDetail in imageDetails
            for field in imageDetail['meta']['clinical'].keys())
        metadataFields = ['isic_id'] + sorted(metadataFields)

        # Write the metadata to a CSV
        outputFileName = f"metadata_{offset}_{offset+i}"
        print('Writing metadata to CSV: %s' % outputFileName + '.csv')
        with open(savePath + outputFileName + '.csv', 'w') as outputStream:
            csvWriter = csv.DictWriter(outputStream, metadataFields)
            csvWriter.writeheader()
            for imageDetail in imageDetails:
                rowDict = imageDetail['meta']['clinical'].copy()
                rowDict['isic_id'] = imageDetail['name']
                csvWriter.writerow(rowDict)

    print('Downloading %s images' % len(imageList))
    imageDetails = []
    for ind, image in enumerate(imageList):
        if ind > i:
            break
        print(image['name'])
        try:
            imageFileResp = api.get('image/%s/download' % image['_id'])
            imageFileResp.raise_for_status()
            imageFileOutputPath = os.path.join(savePath,
                                               '%s.jpg' % image['name'])
            with open(imageFileOutputPath, 'wb') as imageFileOutputStream:
                for chunk in imageFileResp:
                    imageFileOutputStream.write(chunk)
        except requests.exceptions.ConnectionError:
            # imageList = api.getJson(
            #     f'image?limit={count-ind}&offset={offset+ind}&sort=name')
            print(ind, "FAILED.")
            break
示例#8
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument(
        '--account_user',
        required=True,
        help=
        "Create account at https://www.isic-archive.com/#!/topWithHeader/wideContentTop/main"
    )
    parser.add_argument('--account_password', required=True)
    parser.add_argument('--out_dir',
                        required=True,
                        help="directory where dataset will be stored")
    options = parser.parse_args()

    ### API authentication
    api = ISICApi(username=options.account_user,
                  password=options.account_password)

    ### get list of images
    imageList = api.getJson('image?limit=100000&offset=0&sort=name')
    print('Got list of {} images'.format(len(imageList)))

    ### download images and save metadata
    out_dir = options.out_dir
    for subfolder in ['images', 'dataset', 'meta']:
        try:
            mkdir(join(out_dir, subfolder))
        except FileExistsError:
            print('{} already exists'.format(subfolder))
    for subfolder in ['case', 'control']:
        try:
            mkdir(join(out_dir, 'dataset', subfolder))
        except FileExistsError:
            print('{} already exists'.format(subfolder))

    ### download images
    pool = mp.Pool(processes=mp.cpu_count())
    f_list = []
    for image in tqdm(imageList, desc="downloading dataset"):
        f_list.append(
            pool.apply_async(
                download_image(image, join(out_dir, 'images'), api)))

    ### get metadata of images
    # meta_data = load_meta_data(imageList, api)
    # meta_data.to_pickle(join(out_dir, 'meta', 'metadata.pkl'))
    # meta_data.to_csv(join(out_dir, 'meta', 'metadata.csv'), index=False)
    meta_data = pd.read_pickle(join(out_dir, 'meta', 'metadata.pkl'))

    ### Identify malignant images and download 1x malignant and 2x benign images
    malignant = meta_data[meta_data.meta_clinical_benign_malignant ==
                          'malignant']._id.values
    benign = meta_data[meta_data.meta_clinical_benign_malignant ==
                       'benign']._id.values
    np.random.seed(0)
    benign_sample = np.random.choice(benign,
                                     2 * malignant.shape[0],
                                     replace=False)
    accept_ids = np.concatenate([malignant, benign_sample])

    ### get list of images to be used as case/controls
    imageList = [var for var in imageList if var['_id'] in accept_ids]
    for image in tqdm(imageList,
                      desc="splitting images in case/control groups"):
        f_name = "%s.jpg" % image['name']
        f_path = join(out_dir, 'images', f_name)
        is_malignant = True if image['_id'] in malignant else False
        save_to = join(out_dir, 'dataset',
                       'case' if is_malignant else 'control')
        copyfile(f_path, join(save_to, f_name))
示例#9
0
import json
import csv
from io import BytesIO
import pandas as pd
from isic_api import ISICApi
from io import StringIO
from PIL import Image, ImageDraw, ImageFont
import os
from pandas.io.json import json_normalize
import urllib
import os

#9841 - 9920
## ISIC_557 - 9867

api = ISICApi()
savePath = 'dataset_images/'

if not os.path.exists(savePath):
    os.makedirs(savePath)

data = pd.read_csv('metadata.csv')
id_data = pd.read_csv('name_id.csv')

diseases = set()

for i in range(len(data)):
    diseases.add(data.loc[i]['diagnosis'])

# Create folders diseases
for diagnosis in diseases:
示例#10
0
def main():
    username = sys.argv[1]
    password = sys.argv[2]

    info(f'Username: {username}\tPassword: {password}')
    api = ISICApi(username=username, password=password)

    if not os.path.exists(Config.WORKSPACE_PATH):
        os.mkdir(Config.WORKSPACE_PATH)

    path_to_images_meta = os.path.join(Config.WORKSPACE_PATH,
                                       Config.IMAGES_META)
    if not os.path.exists(path_to_images_meta):
        all_images = download_images_info(api)
        outputs = list()
        for _ in range(Config.NUM_THREADS):
            outputs.append(list())
        threads = list()
        for thread_idx in range(Config.NUM_THREADS):
            from_idx = (thread_idx + 0) * len(all_images) // Config.NUM_THREADS
            to_idx = (thread_idx + 1) * len(all_images) // Config.NUM_THREADS
            num_images = to_idx - from_idx
            thread = Thread(target=download_images_meta,
                            args=(api, all_images, from_idx, num_images,
                                  outputs[thread_idx], thread_idx))
            thread.setDaemon(True)
            threads.append(thread)

        for thread in threads:
            thread.start()

        for thread in threads:
            thread.join()

        images_meta = list()
        for output in outputs:
            images_meta.extend(output)
        if len(images_meta) != 0:
            with open(path_to_images_meta, "w") as write_file:
                json.dump(images_meta, write_file, indent=4)
    else:
        with open(path_to_images_meta, "r") as read_file:
            images_meta = json.load(read_file)

    info(f'Number of images before script execution: {len(images_meta)}')

    if False:
        segmentation_path = os.path.join(Config.WORKSPACE_PATH,
                                         Config.IMAGES_PATH)
        if not os.path.exists(segmentation_path):
            os.mkdir(segmentation_path)
        images_meta = list()
        for image in images_meta:
            if not os.path.exists(
                    os.path.join(segmentation_path, f'{image["_id"]}.jpg')):
                images_meta.append(image)
        threads = list()
        for thread_idx in range(Config.NUM_THREADS):
            from_idx = (thread_idx +
                        0) * len(images_meta) // Config.NUM_THREADS
            to_idx = (thread_idx + 1) * len(images_meta) // Config.NUM_THREADS
            num_images = to_idx - from_idx
            thread = Thread(target=download_images,
                            args=(api, images_meta, from_idx, num_images,
                                  segmentation_path, thread_idx))
            thread.setDaemon(True)
            threads.append(thread)

        for thread in threads:
            thread.start()

        for thread in threads:
            thread.join()

    if False:
        segmentation_path = os.path.join(Config.WORKSPACE_PATH,
                                         Config.SEGMENTATION_PATH)
        if not os.path.exists(segmentation_path):
            os.mkdir(segmentation_path)
        threads = list()
        for thread_idx in range(Config.NUM_THREADS):
            from_idx = (thread_idx +
                        0) * len(images_meta) // Config.NUM_THREADS
            to_idx = (thread_idx + 1) * len(images_meta) // Config.NUM_THREADS
            num_images = to_idx - from_idx
            thread = Thread(target=download_segmentation,
                            args=(api, images_meta, from_idx, num_images,
                                  segmentation_path, thread_idx))
            thread.setDaemon(True)
            threads.append(thread)

        for thread in threads:
            thread.start()

        for thread in threads:
            thread.join()

    if False:
        unfinded_masks = [
            "584727129fc3c10f04859aad", "58470b479fc3c10f04859672"
        ]
        images_meta = object()
        with open(path_to_images_meta, "r") as read_file:
            images_meta = json.load(read_file)
        for image in images_meta:
            for segmentation in image["segmentation"]:
                if segmentation["_id"] in unfinded_masks:
                    info(f'Segmentation {segmentation["_id"]} will removed')
                    image["segmentation"].remove(segmentation)
        if len(images_meta) != 0:
            with open(path_to_images_meta, "w") as write_file:
                json.dump(images_meta, write_file, indent=4)
                info("File written")

    if False:
        images_meta = object()
        with open(path_to_images_meta, "r") as read_file:
            images_meta = json.load(read_file)
        for image in images_meta:
            if len(image["segmentation"]) == 0:
                info(f'Image {image["_id"]} will removed')
                images_meta.remove(image)
        if len(images_meta) != 0:
            with open(path_to_images_meta, "w") as write_file:
                json.dump(images_meta, write_file, indent=4)
                info("File written")

    if False:
        with open(path_to_images_meta, "r") as read_file:
            images_meta = json.load(read_file)
            images_ids = []
            for image in images_meta:
                images_ids.append(image["_id"] + ".jpg")
            images_path = os.path.join(Config.WORKSPACE_PATH,
                                       Config.IMAGES_PATH)
            for image in os.listdir(images_path):
                if image not in images_ids:
                    os.remove(os.path.join(images_path, image))
                    info(f'{image} deleted')

    if True:
        with open(path_to_images_meta, "r") as read_file:
            images_meta = json.load(read_file)
            num = 0
            for image in images_meta:
                if "diagnosis" in image["meta"]["clinical"].keys() or \
                        "benign_malignant" in image["meta"]["clinical"].keys():
                    num += 1
            print("Num:", num)

    with open(path_to_images_meta, "r") as read_file:
        images_meta = json.load(read_file)
        info(f'Number of images after script execution: {len(images_meta)}')
        print(
            "Benign:",
            len(
                list(
                    filter(
                        lambda x: x["meta"]["clinical"]["benign_malignant"] ==
                        "benign", images_meta))))
        print(
            "Malignant:",
            len(
                list(
                    filter(
                        lambda x: x["meta"]["clinical"]["benign_malignant"] ==
                        "malignant", images_meta))))
示例#11
0
import json
import csv
import pandas as pd
from isic_api import ISICApi
from pandas.io.json import json_normalize

# Initialize the API; no login is necessary for public data
api = ISICApi(username="******", password="******")
outputFileName = 'imagedata'

imageList = api.getJson('image?limit=25000&offset=0&sort=name')

print('Fetching metadata for %s images' % len(imageList))
imageDetails = []
i = 0
for image in imageList:
    print(' ', image['name'])
    # Pull image details
    imageDetail = api.getJson('image/%s' % image['_id'])
    imageDetails.append(imageDetail)
"""
# Testing Parameters 
print("****************************")
print(imageDetails[0]['meta']['clinical']['anatom_site_general'])
print("****************************")
data = json_normalize(imageDetails[0])
print(data.loc[0])

data = json_normalize(imageDetails[0])
print(data.loc[0])
print("========================================================")