nitsaick / kits19-challenge Goto Github PK

View Code? Open in Web Editor NEW

53.0 2.0 18.0 320 KB

Kidney Tumor Segmentation Challenge 2019

License: MIT License

Python 100.00%

semantic-segmentation pytorch miccai-2019 kits19 deep-learning medical-imaging

kits19-challenge's People

Contributors

Stargazers

Watchers

Forkers

linggood tiaoguoyunduan bubblyyi fdsjk shunlu91 zhangshuang317 sixitingting mrbrownstone07 ricardozitseng rqatia pragyanaischool ramsha04 gyasmeen-ml hazardfy xwjbupt kardinalanna vpmlai hello7934

kits19-challenge's Issues

What does the "data/roi.json" look like?

I'm reading your code without running.
In this line of your code, you write the json file. But I don't get how it looks.

If it's possible and not too late, please let me know some figures of the .json file.

Pause training

Is there any way to pause the training and resume later?

data set

would like to ask if the data set used by your program is a standard data set given by git or a data set of interpolated values.

Hi,
I have a question that after I used the MedicalTransform model to augment the image, I printed the effect image and found that the label of the image has not changed: the first line is the image and label when the image is not augmented, and the second line is the augmented image And labels.Is it right?

Best,
Jiang

TypeError: default_collate: batch must contain tensors, numpy arrays, numbers, dicts or lists; found <U14

I am new to using PyTorch, which comes with a lot of new errors that I haven't seen before.

Error occurs on trainer.fit(model, dm).

# HERE is commented where my function is called and where there is a PyTorch error.

Note: the for loop is for record training time 10 times over as an average. I want to document training and prediction times across different datasets.

Invocation:

# re-train cell
args = """
      --max_epochs 20
      --progress_bar_refresh_rate 2
      --gradient_clip_val 0.5
      --log_gpu_memory True
      --gpus 1
    """.split()

for i in range(10):
  start_time = time.time()
  run_training(args) # HERE
  print("--- %s seconds ---" % (time.time() - start_time))

run_training():

def run_training(input=None):
    args = parse_args(input)
    pl.seed_everything(args.seed)
    module = importlib.import_module('pytorch_lightning.loggers')
    logger = getattr(module, args.logging)(save_dir='logs')
    csv_logger = pl.loggers.CSVLogger(save_dir=f'{args.modeldir}/csv_logs')
    loggers = [logger, csv_logger]
    dm = OntologyTaggerDataModule.from_argparse_args(args)
    if args.model_uri and len(args.checkpointfile) > 1:
        local_model_uri = os.environ.get('SM_CHANNEL_MODEL', '.')
        tar_path = os.path.join(local_model_uri,  'model.tar.gz')
        tar = tarfile.open(tar_path, "r:gz")
        tar.extractall(local_model_uri)
        tar.close()
        model_path = os.path.join(local_model_uri, args.checkpointfile)
        model = OntologyTaggerModel.load_from_checkpoint(model_path)        
    elif os.path.isfile(os.path.join(args.traindir, args.checkpointfile)):
        file_path = os.path.join(args.traindir, args.checkpointfile)
        model = OntologyTaggerModel.load_from_checkpoint(file_path)
    else:    
        model = OntologyTaggerModel(**vars(args), num_classes=dm.num_classes, class_map=dm.class_map)
    checkpoint_callback = pl.callbacks.ModelCheckpoint(
        args.checkpointdir, save_last=True, save_weights_only=True)

    checkpoint_dir = os.environ.get('SM_HP_CHECKPOINTDIR', './')
    if checkpoint_dir != './':
        labels_file_orig = os.path.join(checkpoint_dir, args.labels)
        labels_file_cp = os.path.join(args.modeldir, os.path.basename(args.labels))
        shutil.copyfile(labels_file_orig, labels_file_cp)
        
    trainer = pl.Trainer.from_argparse_args(args, callbacks=[checkpoint_callback], logger=loggers)
    trainer.fit(model, dm) # HERE
    model_file = os.path.join(args.modeldir, 'last.ckpt')
    trainer.save_checkpoint(model_file, weights_only=True)

Error Traceback:

---------------------------------------------------------------------------
TypeError                                 Traceback (most recent call last)
<ipython-input-14-847851467cf0> in <module>()
     10 for i in range(10):
     11   start_time = time.time()
---> 12   run_training(args)
     13   print("--- %s seconds ---" % (time.time() - start_time))

<ipython-input-6-7f8e9eed480d> in run_training(input)
     68 
     69     trainer = pl.Trainer.from_argparse_args(args, callbacks=[checkpoint_callback], logger=loggers)
---> 70     trainer.fit(model, dm)
     71     model_file = os.path.join(args.modeldir, 'last.ckpt')
     72     trainer.save_checkpoint(model_file, weights_only=True)

/usr/local/lib/python3.7/dist-packages/pytorch_lightning/trainer/trainer.py in fit(self, model, train_dataloader, val_dataloaders, datamodule)
    497 
    498         # dispath `start_training` or `start_testing` or `start_predicting`
--> 499         self.dispatch()
    500 
    501         # plugin will finalized fitting (e.g. ddp_spawn will load trained model)

/usr/local/lib/python3.7/dist-packages/pytorch_lightning/trainer/trainer.py in dispatch(self)
    544 
    545         else:
--> 546             self.accelerator.start_training(self)
    547 
    548     def train_or_test_or_predict(self):

/usr/local/lib/python3.7/dist-packages/pytorch_lightning/accelerators/accelerator.py in start_training(self, trainer)
     71 
     72     def start_training(self, trainer):
---> 73         self.training_type_plugin.start_training(trainer)
     74 
     75     def start_testing(self, trainer):

/usr/local/lib/python3.7/dist-packages/pytorch_lightning/plugins/training_type/training_type_plugin.py in start_training(self, trainer)
    112     def start_training(self, trainer: 'Trainer') -> None:
    113         # double dispatch to initiate the training loop
--> 114         self._results = trainer.run_train()
    115 
    116     def start_testing(self, trainer: 'Trainer') -> None:

/usr/local/lib/python3.7/dist-packages/pytorch_lightning/trainer/trainer.py in run_train(self)
    605             self.progress_bar_callback.disable()
    606 
--> 607         self.run_sanity_check(self.lightning_module)
    608 
    609         # set stage for logging

/usr/local/lib/python3.7/dist-packages/pytorch_lightning/trainer/trainer.py in run_sanity_check(self, ref_model)
    858 
    859             # run eval step
--> 860             _, eval_results = self.run_evaluation(max_batches=self.num_sanity_val_batches)
    861 
    862             self.on_sanity_check_end()

/usr/local/lib/python3.7/dist-packages/pytorch_lightning/trainer/trainer.py in run_evaluation(self, max_batches, on_epoch)
    710             dl_max_batches = self.evaluation_loop.max_batches[dataloader_idx]
    711 
--> 712             for batch_idx, batch in enumerate(dataloader):
    713                 if batch is None:
    714                     continue

/usr/local/lib/python3.7/dist-packages/torch/utils/data/dataloader.py in __next__(self)
    519             if self._sampler_iter is None:
    520                 self._reset()
--> 521             data = self._next_data()
    522             self._num_yielded += 1
    523             if self._dataset_kind == _DatasetKind.Iterable and \

/usr/local/lib/python3.7/dist-packages/torch/utils/data/dataloader.py in _next_data(self)
    559     def _next_data(self):
    560         index = self._next_index()  # may raise StopIteration
--> 561         data = self._dataset_fetcher.fetch(index)  # may raise StopIteration
    562         if self._pin_memory:
    563             data = _utils.pin_memory.pin_memory(data)

/usr/local/lib/python3.7/dist-packages/torch/utils/data/_utils/fetch.py in fetch(self, possibly_batched_index)
     45         else:
     46             data = self.dataset[possibly_batched_index]
---> 47         return self.collate_fn(data)

/usr/local/lib/python3.7/dist-packages/torch/utils/data/_utils/collate.py in default_collate(batch)
     82             raise RuntimeError('each element in list of batch should be of equal size')
     83         transposed = zip(*batch)
---> 84         return [default_collate(samples) for samples in transposed]
     85 
     86     raise TypeError(default_collate_err_msg_format.format(elem_type))

/usr/local/lib/python3.7/dist-packages/torch/utils/data/_utils/collate.py in <listcomp>(.0)
     82             raise RuntimeError('each element in list of batch should be of equal size')
     83         transposed = zip(*batch)
---> 84         return [default_collate(samples) for samples in transposed]
     85 
     86     raise TypeError(default_collate_err_msg_format.format(elem_type))

/usr/local/lib/python3.7/dist-packages/torch/utils/data/_utils/collate.py in default_collate(batch)
     60             # array of string classes and object
     61             if np_str_obj_array_pattern.search(elem.dtype.str) is not None:
---> 62                 raise TypeError(default_collate_err_msg_format.format(elem.dtype))
     63 
     64             return default_collate([torch.as_tensor(b) for b in batch])

TypeError: default_collate: batch must contain tensors, numpy arrays, numbers, dicts or lists; found <U14

when I use !python train_res_unet.py -e 100 -b 32 -l 0.0001 -g 4 -s 512 512 -d "/content/drive/MyDrive/DuyLinh/kits19-challenge/data" --log "runs" --eval_intvl 5 --cp_intvl 5 --vis_intvl 0 --num_workers 8

log path "/content/drive/MyDrive/DuyLinh/kits19-challenge/runs" has old file . What is fault?

Could you share your pretrain models?

Hi, could you share your pretrain models?

Aborted!

when I try to run get_roi.py
python get_roi.py -b 1 -g 1 -s 128 128 --org_data "kits19/data" --data "data" -r "runs/ResUNet/checkpoint/best.pth" -o "data/roi.json"

It does not show any explicit errors but Aborted!

0%| | 0/300 [00:00<?, ?it/s]
Aborted!

I wonder if you could help me with this?

result

The train result is bottom,but I can't know what is issue?
------------- Epoch 1/100 --------------
Learning rate: 0.0001
train: 100%|###########################################################################################| 8128/8128 [1:33:07<00:00, 1.49it/s, loss=nan]
Best epoch: 1
Best score: 0.00000
------------- Epoch 2/100 --------------
Learning rate: 0.0001
train: 100%|###########################################################################################| 8128/8128 [1:31:55<00:00, 1.48it/s, loss=nan]
Best epoch: 1
Best score: 0.00000
------------- Epoch 3/100 --------------
Learning rate: 0.0001
train: 100%|###########################################################################################| 8128/8128 [1:32:11<00:00, 1.46it/s, loss=nan]
Best epoch: 1
Best score: 0.00000
------------- Epoch 4/100 --------------
Learning rate: 0.0001
train: 100%|###########################################################################################| 8128/8128 [1:32:33<00:00, 1.47it/s, loss=nan]
Best epoch: 1
Best score: 0.00000
------------- Epoch 5/100 --------------
Learning rate: 0.0001
train: 100%|###########################################################################################| 8128/8128 [1:32:54<00:00, 1.46it/s, loss=nan]
eval/train: 100%|####################################################################################################| 147/147 [56:08<00:00, 13.30s/it]
train/dc_global_0: 0.99566
train/dc_global_1: 0.00000
train/dc_per_case_0: 0.99520
train/dc_per_case_1: 0.00000
eval/valid: 100%|######################################################################################################| 63/63 [22:02<00:00, 16.13s/it]
valid/dc_global_0: 0.99598
valid/dc_global_1: 0.00000
valid/dc_per_case_0: 0.99486
valid/dc_per_case_1: 0.00000
Train data score: 0.00000
Valid data score: 0.00000
Best epoch: 1
Best score: 0.00000

when i run train_denseunet this error exit , could you help me to resolve this question

when I run python train_res_unet.py -e 100 -b 32 -l 0.0001 -g 4 -s 512 512 -d "/content/drive/MyDrive/DuyLinh/kits19-challenge/data" --log "runs" --eval_intvl 5 --cp_intvl 5 --vis_intvl 0 --num_workers 8 log path "/content/drive/MyDrive/DuyLinh/kits19-challenge/runs" has old file . What is fault?

python get_roi.py -b 32 -s 512 512 --org_data "../../../kits19/data" --data "data" -r "runs/ResUNet/checkpoint/best.pth" -o "data/roi.json"

will raise this error

------subset----------- <torch.utils.data.dataset.Subset object at 0x7f7c289d7048>
------sampler----------- <torch.utils.data.sampler.SequentialSampler object at 0x7f7c269081d0>
------data_loader----------- <torch.utils.data.dataloader.DataLoader object at 0x7f7c26908080>
eval/test:   0%|                                                       | 0/90 [00:00<?, ?it/s]
Traceback (most recent call last):
  File "get_roi.py", line 185, in <module>
    get_roi()
  File "/root/anaconda3/envs/kits19-challenge/lib/python3.6/site-packages/click/core.py", line 764, in __call__
    return self.main(*args, **kwargs)
  File "/root/anaconda3/envs/kits19-challenge/lib/python3.6/site-packages/click/core.py", line 717, in main
    rv = self.invoke(ctx)
  File "/root/anaconda3/envs/kits19-challenge/lib/python3.6/site-packages/click/core.py", line 956, in invoke
    return ctx.invoke(self.callback, **ctx.params)
  File "/root/anaconda3/envs/kits19-challenge/lib/python3.6/site-packages/click/core.py", line 555, in invoke
    return callback(*args, **kwargs)
  File "get_roi.py", line 181, in get_roi
    get_roi_from_resunet(batch_size, num_gpu, img_size, data_path, resume, roi_file, vis_intvl, num_workers)
  File "get_roi.py", line 120, in get_roi_from_resunet
    for batch_idx, data in enumerate(data_loader):
  File "/root/anaconda3/envs/kits19-challenge/lib/python3.6/site-packages/torch/utils/data/dataloader.py", line 346, in __next__
    data = self.dataset_fetcher.fetch(index)  # may raise StopIteration
  File "/root/anaconda3/envs/kits19-challenge/lib/python3.6/site-packages/torch/utils/data/_utils/fetch.py", line 47, in fetch
    return self.collate_fn(data)
  File "/root/anaconda3/envs/kits19-challenge/lib/python3.6/site-packages/torch/utils/data/_utils/collate.py", line 75, in default_collate
    return {key: default_collate([d[key] for d in batch]) for key in elem}
  File "/root/anaconda3/envs/kits19-challenge/lib/python3.6/site-packages/torch/utils/data/_utils/collate.py", line 75, in <dictcomp>
    return {key: default_collate([d[key] for d in batch]) for key in elem}
  File "/root/anaconda3/envs/kits19-challenge/lib/python3.6/site-packages/torch/utils/data/_utils/collate.py", line 82, in default_collate
    raise TypeError(default_collate_err_msg_format.format(elem_type))
TypeError: default_collate: batch must contain tensors, numpy arrays, numbers, dicts or lists; found <class 'NoneType'>

the related code is

  # get_roi.py   get_roi_from_resunet method
   ......
    subset = dataset.test_dataset
    print('------subset-----------', subset)
    case_slice_indices = dataset.test_case_slice_indices
    
    sampler = SequentialSampler(subset)
    print('------sampler-----------', sampler)
    data_loader = DataLoader(subset, batch_size=batch_size, sampler=sampler,
                             num_workers=num_workers, pin_memory=True)
    print('------data_loader-----------', data_loader)
    case = 0
    vol_output = []
    
    with tqdm(total=len(case_slice_indices) - 1, ascii=True, desc=f'eval/test', dynamic_ncols=True) as pbar:
        for batch_idx, data in enumerate(data_loader):
            print('-----------batch_idx------------', batch_idx)
            print('---------data-------------', data)
            imgs, idx = data['image'].cuda(), data['index']

Could you give me some help?

The dimensions of the data and data labels do not match

I found that the dimension of the data is 4 dimensions, but the dimension of the data is 3 dimensions, so there is a problem in calculating the loss.