Пример #1
0
def index_study_path(args):
    logger = logging.getLogger(__toolname__ + '.index_study_path_aecg')
    startmsg = f"Indexing: '{args.dir}' to: '{args.oxlsx}'"
    print(f"{startmsg}")
    logger.info(
        f",,{startmsg}")
    studyindex_info = aecg.tools.indexer.StudyInfo()
    studyindex_info.StudyDir = os.path.normpath(args.dir)
    studyindex_info.IndexFile = os.path.normpath(args.oxlsx)
    studyindex_info.Description = args.description
    studyindex_info.Version = aecg.__version__
    studyindex_info.AppType = args.apptype
    studyindex_info.AppNum = f"{int(args.appnum):06d}"
    studyindex_info.StudyID = args.studyid
    studyindex_info.NumSubj = args.numsubj
    studyindex_info.NECGSubj = args.necgsubj
    studyindex_info.TotalECGs = args.totalecgs
    studyindex_info.AnMethod = aecg.tools.indexer.AnnotationMethod[
        args.annmethod].name
    studyindex_info.AnLead = args.annlead
    studyindex_info.AnNbeats = args.nbeatsann
    studyindex_info.StudyDir = args.dir
    studyindex_info.Sponsor = args.sponsor

    n_cores = args.nprocs
    pbar = tqdm(desc=f"Indexing {studyindex_info.StudyDir} directory")
    mycb = aecg.tools.indexer.IndexingProgressCallBack(pbar)
    studyindex_df = aecg.tools.indexer.index_study(
        studyindex_info,
        args.allintervals == "Y",
        n_cores, mycb)
    pbar.close()

    return studyindex_df
Пример #2
0
def validation(model, device, criterion, metrics, dataloader, pbar_desc="validation phase"):
    model.eval()
    val_loss = 0.0
    val_metrics = {k: 0.0 for k, v in metrics.items()}
    
    for origins, masks in tqdm(dataloader, desc=pbar_desc):
        num = origins.size(0)

        origins = origins.to(device)
        masks = masks.to(device)

        with torch.no_grad():
            outs = model(origins)
        
            val_loss += criterion(outs, masks).item() * num
            val_metrics = {k: v + metrics[k](masks, outs).item() * num for k, v in val_metrics.items()}
        

    val_loss = val_loss / len(dataloader.sampler)
    val_metrics = {k: v / len(dataloader.sampler) for k, v in val_metrics.items()}
    
    return {
        "val_loss": val_loss,
        **val_metrics,
    }
Пример #3
0
def train(model, device, criterion, optimizer, dataloader, accumulation_steps, pbar_desc="train phase"):
    model.train()
    train_loss = 0.0
    
    for i, (origins, masks) in enumerate(tqdm(dataloader, desc=pbar_desc)):
        
        num = origins.size(0)

        origins = origins.to(device)
        masks = masks.to(device)
                
        outs = model(origins)
        loss = criterion(outs, masks)
        
        train_loss += loss.item() * num
        
        loss = loss / accumulation_steps
        loss.backward() 
        
        if (i+1) % accumulation_steps == 0:             
            optimizer.step()                            
            optimizer.zero_grad()
            
        
        
    train_loss = train_loss / len(dataloader.sampler)
    return {
        "train_loss": train_loss,
    }
Пример #4
0
def main():
    req = get(URL, stream=True)
    with open("data.zip", "wb") as file:
        for chunk in tqdm(
                req.iter_content(chunk_size=CHUNK_SIZE),
                total=ceil(int(req.headers['content-length']) / CHUNK_SIZE)):
            file.write(chunk)

    system("unzip data.zip")
    system("rm data.zip")
    system("rm -rf __MACOSX")
Пример #5
0
 def parse(self, progress_bar: bool = True):
     w = walk(self.path)
     self.data = np.array([[[r, pathlib.join(r, f_)] for f_ in f]
                           for r, _, f in list(w)[1:]]).reshape(-1, 2)
     self.y = np.array(self.data[:, 0])
     self.x = np.array([
         cv2.cvtColor(cv2.resize(cv2.imread(i), (self.resize, self.resize)),
                      cv2.COLOR_BGR2RGB) / 255
         for i in (tqdm(self.data[:, 1]) if progress_bar else self.data[:,
                                                                        1])
     ]).astype(np.float32)
     return self
Пример #6
0
 def wait(self, query={}):
     task_stat = self.task_stat(query)
     total = self._get_total(task_stat)
     last_undone_n = self._get_undone_n(task_stat)
     with tqdm(total=total, initial=total - last_undone_n) as pbar:
         while True:
             time.sleep(10)
             undone_n = self._get_undone_n(self.task_stat(query))
             pbar.update(last_undone_n - undone_n)
             last_undone_n = undone_n
             if undone_n == 0:
                 break
Пример #7
0
def extract_content_list_wise(arxiv_url, url):
    content, status_code = get_content(url)
    if status_code != 200:
        return
    urls = content.find_all('a')
    urls = [x.get('href') for x in urls if '/list/' in str(x)]
    urls = [url for url in urls if 'recent' not in url]
    urls = [url for url in urls if '?' not in url]
    urls = [arxiv_url + x for x in urls]
    urls = list(set(urls))
    for url in tqdm(urls, "Years"):
        extract_arxiv_links(arxiv_url, url)
Пример #8
0
def download_file(path, ):
    with open(path, "r") as file:
        image_urls = file.read().split("\n")
    broken = []
    for line in tqdm(image_urls):
        name, url = line.split("\t")
        with open(f"./images/{name}.jpg", "wb") as image:
            with requests.get(url) as response:
                if response.status_code == 200:
                    image.write(response.content)
                else:
                    broken.append([file, url])
    return broken
Пример #9
0
def main(config):
    """
    Responsible for the whole webscrape of the ARXIV website.

    :param config:  YAML config file content
    :return: None
    """
    url_list = extract_subjects(config['Arxiv_Website'], config['Subjects'])
    logging.info("Number of subjects found is {}".format(len(url_list)))
    if len(url_list) != 0:
        if 'all' not in config['Subjects']:
            idx = 0
            keys = list(config['Subjects'].keys())
            for url in tqdm(url_list, desc="Subjects"):
                extract_content_year_wise(config['Arxiv_Website'], url,
                                          config['Subjects'][keys[idx]])
                logging.info("Done with {}".format(url))
        else:
            for url in tqdm(url_list, desc="Subjects"):
                extract_content_year_wise(config['Arxiv_Website'], url, None)
                logging.info("Done with {}".format(url))
    logging.info("Done")
Пример #10
0
def extract_content_year_wise(arxiv_url, url, years):
    content, status_code = get_content(url)
    if status_code != 200:
        return
    urls = content.find_all('a')
    if years is None:
        urls = [x.get('href') for x in urls if '/year/' in str(x)]
    else:
        years = [str(x) for x in years]
        urls = [
            x.get('href') for x in urls
            if str(x.text) in years and '/year/' in str(x)
        ]
    logging.info("Number of years found for {} is {}".format(url, len(urls)))
    urls = [arxiv_url + x for x in urls]
    for link in tqdm(urls, desc="Year for {}".format(url)):
        extract_content_list_wise(arxiv_url, link)
Пример #11
0
def run_inferece(
        config_filename,
        checkpoint_filename,
        output_folder,
        use_tta=False,
        out_shape=(640, 400),
):
    set_global_seed(42)
    output_folder = Path(output_folder)

    config_parser = ConfigParser(
        config_filename, False, **{
            "checkpoint.filename": checkpoint_filename,
            "checkpoint.model": True,
        })
    config = config_parser()
    dataloader = config.dataloaders.test
    device = config.device
    model = config.model
    model.eval()

    model = tta.TTAWrapper(model, tta.fliplr_image2mask) if use_tta else model

    print("Inference stage")
    filenames = []
    with torch.no_grad():
        for imgs, pos in tqdm(dataloader):
            imgs = imgs.to(device)
            outs = model(imgs)
            outs = outs.argmax(1).cpu()

            seqs = pos[0].long().tolist()
            orders = pos[1].long().tolist()

            for out, seq, order in zip(outs, seqs, orders):
                out = out.numpy().astype(np.uint8)
                out = cv2.resize(out, out_shape)
                filename = f"S_{seq}/{order}.npy"
                path = output_folder / filename
                path.parent.mkdir(parents=True, exist_ok=True)
                np.save(path, out)
                filenames.append(filename)
    with open(output_folder / "output.txt", "w") as output_file:
        output_file.writelines("\n".join(filenames))
Пример #12
0
def main():
    system("mkdir annotations")
    system("mkdir images")

    print("[+] Downloading ")
    for i in tqdm(range(1, 411)):
        i = str(i)
        file = f"BloodImage_{(5-len(i))*'0'}{i}"
        url = f"{URL}/Annotations/{file}.xml"
        req = get(url)

        if req.status_code != 200:
            continue

        with open(f"annotations/{file}.xml", "wb") as xml:
            xml.write(req.content)

        file = f"BloodImage_{(5-len(i))*'0'}{i}"
        url = f"{URL}/JPEGImages/{file}.jpg"
        req = get(url)
        with open(f"images/{file}.jpg", "wb") as jpg:
            jpg.write(req.content)
Пример #13
0
    def calculatePageBreaks(self, lines: List[Line]):
        badness = np.full((len(lines) + 1, len(lines) + 1), inf)
        for i in irange(0, len(lines) - 1):
            for j in irange(i, len(lines)):
                badness[i, j] = (self.params.page_height -
                                 sum(l.height
                                     for l in stripGaps(lines[i:j + 1])))**3

                if badness[i, j] < 0:
                    badness[i, j] = inf

                elif lines[i].no_page_break:
                    badness[i, j] += 1e50

                elif j == len(lines):
                    badness[i, j] = 0

        scores = np.full((len(lines) + 1, len(lines)), inf)
        bps = {}
        j = len(lines)
        for n in tqdm(irange(0, len(lines) - 1)):
            for i in irange(len(lines) - 1, 0, -1):
                if n == 0:
                    scores[i, n] = inf
                    bps[(i, n)] = []

                else:
                    min_score = badness[i, j]
                    min_bps = []
                    for x in irange(i + 1, j):
                        score = scores[x, n - 1] + badness[i, x - 1]
                        if score < min_score:
                            min_score = score
                            min_bps = [x] + bps[(x, n - 1)]

                    scores[i, n] = min_score
                    bps[(i, n)] = min_bps

        return bps[(0, len(lines) - 1)]
Пример #14
0
    def wait(self, query={}):
        """
        When multiprocessing, the main progress may fetch nothing from TaskManager because there are still some running tasks.
        So main progress should wait until all tasks are trained well by other progress or machines.

        Args:
            query (dict, optional): the query dict. Defaults to {}.
        """
        task_stat = self.task_stat(query)
        total = self._get_total(task_stat)
        last_undone_n = self._get_undone_n(task_stat)
        if last_undone_n == 0:
            return
        self.logger.warning(f"Waiting for {last_undone_n} undone tasks. Please make sure they are running.")
        with tqdm(total=total, initial=total - last_undone_n) as pbar:
            while True:
                time.sleep(10)
                undone_n = self._get_undone_n(self.task_stat(query))
                pbar.update(last_undone_n - undone_n)
                last_undone_n = undone_n
                if undone_n == 0:
                    break
Пример #15
0
import dropbox

from tqdm.cli import tqdm
from pathlib import Path

data_folder = Path("data")
archive_folder = Path("archive")
sparse_segm_folder = data_folder / "sparse-segm"

token_filename = archive_folder / "dropbox_token.txt"
with open(token_filename) as token_file:
    TOKEN = token_file.read()
dbx = dropbox.Dropbox(TOKEN)

folder_name = "/Openedsdata2020/openEDS2020-SparseSegmentation/participant/"

for entry in tqdm(dbx.files_list_folder(folder_name).entries):
    output_filename = str(sparse_segm_folder /
                          entry.path_display.replace(folder_name, ""))
    output_filename = str(output_filename)
    if isinstance(entry, dropbox.files.FolderMetadata):
        dbx.files_download_zip_to_file(output_filename + ".zip",
                                       entry.path_lower)
    else:
        dbx.files_download_to_file(output_filename, entry.path_lower)
Пример #16
0
with open('input.txt') as file:
    enhancer = np.array(list(file.readline().strip())) == '#'
    file.readline()
    while line := file.readline().strip():
        img.append(np.array(list(line)) == '#')

img = np.array(img)

for line in img:
    print(''.join(['.', '#'][int(d)] for d in line))
print()

# it takes around 7 seconds
from tqdm.cli import tqdm
for i in tqdm(range(50)):
    points_that_might_change_plus_border = np.pad(img,
                                                  2,
                                                  mode='constant',
                                                  constant_values=outer_value)
    output = np.zeros(points_that_might_change_plus_border.shape)

    for x in range(1, points_that_might_change_plus_border.shape[0] - 1):
        for y in range(1, points_that_might_change_plus_border.shape[1] - 1):
            data = points_that_might_change_plus_border[x - 1:x + 2,
                                                        y - 1:y + 2]
            binary = ''.join(['0', '1'][int(d)] for d in data.flatten())
            index = int(binary, 2)
            output[x, y] = enhancer[index]
            img = output[1:-1, 1:-1]  # trim edge
Пример #17
0
          'pixelphase': xy_pairs,
          'fitshape': [(31, 31)],
          'σ': [0, 50],
          'λ': np.linspace(30_000, 200_000, 3),
          'model_oversampling': [2],
          'model_degree': [3],
          'model_mode': ['grid'],
          'fit_accuracy': [1.49012e-08],
          'use_weights': [False, True],
          'return_imgs': [True]
          }

    from thesis_lib.util import DebugPool, dictoflists_to_listofdicts

    with DebugPool() as p:
    #with mp.Pool() as p:
        results = pd.DataFrame.from_records(
            p.map(fit_models_dictarg, tqdm(create_arg_list(dl))))
    results = transform_dataframe(results)
    print(results.dev.describe())
    #results = results[['noise', 'residual', 'use_weights', 'pixelphase']]
    #results.noise = results.noise.transform(lambda n: (n.gauss_std, n.poisson_std))

    #plot_fitshape(results[results.n_sources1d == 1])
    plot_fitshape(results)
    plot_xy_deviation(results)
    plot_phase_vs_deviation(results)
    plot_phase_vs_deviation3d(results)
    plot_noise_vs_weights(results)
    plt.show()
Пример #18
0
from io import BytesIO
from rh import _get_info, _rgetv
from tqdm.cli import tqdm
from copy import copy

if __name__ == "__main__":

    i = 0
    next_url = "https://api.robinhood.com/orders/"
    orders = []

    while next_url != None:
        resp = _get_info(next_url)
        next_url = resp["next"]
        orders.extend(resp["results"])
        i += 1

    with tqdm(total=len(orders)) as pbar:
        for order in orders:
            for key in ["instrument", "position", "instrument.splits"]:
                try:
                    _keys = key.split(".")
                    _url = _rgetv(order, _keys)
                    _rgetv(order,
                           _keys[:-1])[_keys[-1]] = copy(_get_info(_url))
                except KeyError as e:
                    continue
            pbar.update(1)

with open("summary.json", "w") as fp:
    json.dump(orders, fp)