import openslide
import random
import sys
import numpy as np
class library(object):
def __init__(self, slides, idx, grid, size):
sl = []
for i,slide in enumerate(slides):
sl.append(openslide.OpenSlide(slide))
sys.stdout.write("{}/{} \r".format(i+1,len(slides)))
sys.stdout.flush()
tup = zip(grid,idx)
self.size = size
self.slides = slides
self.tup = tup
self.sl = sl
def make_list_of_objs(self):
sl = []
for i,slide in enumerate(self.slides):
sl.append(openslide.OpenSlide(slide))
sys.stdout.write("{}/{} \r".format(i+1,len(self.slides)))
sys.stdout.flush()
self.sl = sl
def __getitem__(self, index):
tup = self.tup[index]
img = self.sl[tup[1]].read_region(tup[0],0,(self.size,self.size)).convert('RGB')
return img
def __len__(self):
return len(self.tup)
class batch_loader(object):
def __init__(self,dset,n):
batches = []
for i in range(0,dset.__len__(),n):
d = min(n,dset.__len__()-i)
batches.append(range(i,i+d))
self.batches = batches
#MAKE DATA
slides = ['/scratch/gabriele/prostate_dict/389973.svs']*2000 #CHANGE SLIDE NAME
idx = list(np.random.choice(len(slides),2000000,replace=True))
gridx = list(np.random.choice(10000,2000000,replace=True))
gridy = list(np.random.choice(10000,2000000,replace=True))
grid = zip(gridx,gridy)
#SET UP LOADER
dset = library(slides,idx,grid,224)
loader = batch_loader(dset,512)
#LEAKING LOOP
for ii,batch in enumerate(loader.batches):
imgs = []
for i,idx in enumerate(batch):
img = np.array(dset.__getitem__(idx))
imgs.append(img)
sys.stdout.write("{}/{} \r".format(ii+1,len(loader.batches)))
sys.stdout.flush()
To circumvent this one can delete the list of openslide objects and recreate it every few iterations. This stabilizes memory footprint but it is an unfortunate work-around that wastes a lot of time reopening openslide objects:
import openslide
import random
import sys
import numpy as np
class library(object):
def __init__(self, slides, idx, grid, size):
sl = []
for i,slide in enumerate(slides):
sl.append(openslide.OpenSlide(slide))
sys.stdout.write("{}/{} \r".format(i+1,len(slides)))
sys.stdout.flush()
tup = zip(grid,idx)
self.size = size
self.slides = slides
self.tup = tup
self.sl = sl
self.idx = 0
def make_list_of_objs(self):
sl = []
for i,slide in enumerate(self.slides):
sl.append(openslide.OpenSlide(slide))
sys.stdout.write("{}/{} \r".format(i+1,len(self.slides)))
sys.stdout.flush()
self.sl = sl
def __getitem__(self, index):
if self.idx > 5120:
self.idx = 0
self.make_list_of_objs()
self.idx += 1
tup = self.tup[index]
img = self.sl[tup[1]].read_region(tup[0],0,(self.size,self.size)).convert('RGB')
return img
def __len__(self):
return len(self.tup)
class batch_loader(object):
def __init__(self,dset,n):
batches = []
for i in range(0,dset.__len__(),n):
d = min(n,dset.__len__()-i)
batches.append(range(i,i+d))
self.batches = batches
#MAKE DATA
slides = ['/scratch/gabriele/prostate_dict/389973.svs']*2000 #CHANGE SLIDE NAME
idx = list(np.random.choice(len(slides),2000000,replace=True))
gridx = list(np.random.choice(10000,2000000,replace=True))
gridy = list(np.random.choice(10000,2000000,replace=True))
grid = zip(gridx,gridy)
#SET UP LOADER
dset = library(slides,idx,grid,224)
loader = batch_loader(dset,512)
#LEAKING LOOP
for ii,batch in enumerate(loader.batches):
imgs = []
for i,idx in enumerate(batch):
img = np.array(dset.__getitem__(idx))
imgs.append(img)
sys.stdout.write("{}/{} \r".format(ii+1,len(loader.batches)))
sys.stdout.flush()
Thanks for your patience.