In [1]:
# imports
import pathlib
import random
import concurrent.futures
import tifffile
import napari
import zarr
import numpy as np
import dask.array as da
import cv2
from numcodecs import Blosc
from OpenGL.GL import glGetIntegerv, GL_MAX_TEXTURE_SIZE
from coda_visual.datasets.coda_dataset import CODADataset
from coda_visual.datasets.coda_raw_file import CODARawFile

In [2]:
test_data_path = pathlib.Path().resolve().parent.parent / "testing_python"

In [3]:
dataset = CODADataset(test_data_path)

In [4]:
training_labels_zarr_store_path = (
 dataset.classification_labels_dir_path / "training_labels.zarr"
)

In [5]:
root = zarr.open_group(str(training_labels_zarr_store_path), mode="w")
compressor = Blosc(cname='zstd', clevel=5, shuffle=Blosc.BITSHUFFLE)

def write_group(raw_file: CODARawFile) -> None:
 """Write a group for a raw file in the training labels Zarr store.
 
 Args:
 raw_file: The CODARawFile instance.
 """
 group = root.create_group(raw_file.filepath.stem, overwrite=True)
 chunk_shape = (raw_file.default_chunk_size[1], raw_file.default_chunk_size[0])
 for level_number in raw_file.pyramid_level_numbers:
 width, height = raw_file.get_image_dimensions_at_level(level_number)
 array_shape = (height, width)
 arr = group.create_dataset(
 name=str(level_number),
 shape=array_shape,
 chunks=chunk_shape,
 dtype=np.uint8,
 overwrite=True,
 compressor=compressor,
 )
 arr[:] = 0

with concurrent.futures.ThreadPoolExecutor() as executor:
 executor.map(write_group, dataset.raw_files)

In [None]:
downsample_level = dataset.get_best_downsample_level_for_target_dim(1500)
pad_to = dataset.get_largest_image_dimensions_for_level(downsample_level)

img_layer_list = []
training_label_layer_list = []

root = zarr.open_group(str(training_labels_zarr_store_path), mode="r")

for raw_file in dataset.raw_files:
 image_level = raw_file.get_dask_image_level(downsample_level, pad_to=pad_to)
 img_layer_list.append(image_level)
 raw_label_layer = da.from_zarr(
 root[raw_file.filepath.stem][str(downsample_level)]
 )
 width_pad = pad_to[0] - raw_label_layer.shape[1]
 left_pad = width_pad // 2
 right_pad = (width_pad // 2) + width_pad % 2
 height_pad = pad_to[1] - raw_label_layer.shape[0]
 top_pad = height_pad // 2
 bot_pad = (height_pad // 2) + height_pad % 2
 training_label_layer_list.append(
 da.pad(
 raw_label_layer,
 ((top_pad, bot_pad), (left_pad, right_pad)),
 mode="constant",
 constant_values=0,
 ).rechunk(image_level.chunks[:2])
 )

In [None]:
viewer = napari.Viewer()
image_layer = viewer.add_image(
 da.stack(img_layer_list, axis=0),
 name="dataset",
 contrast_limits=[0, 255],
 rgb=True,
)
training_label_layer = viewer.add_labels(
 da.stack(training_label_layer_list, axis=0),
 name="training labels",
)
napari.run()

In [None]:
example_image_index = random.choice(range(dataset.n_raw_files))

im_pyramid = dataset.raw_files[example_image_index].get_dask_image_pyramid()

root = zarr.open_group(str(training_labels_zarr_store_path), mode="r")
label_pyramid = [
 da.from_zarr(root[dataset.raw_files[example_image_index].filepath.stem][level])
 for level in dataset.raw_files[example_image_index].pyramid_level_numbers
]

In [None]:
viewer = napari.Viewer()
image_layer = viewer.add_image(
 im_pyramid,
 name=dataset.raw_files[example_image_index].filepath.stem,
 multiscale=True,
 contrast_limits=[0, 255],
 rgb=True,
)
training_label_layer = viewer.add_labels(
 label_pyramid,
 name="training labels",
 multiscale=True,
)
napari.run()

#### This works: loading the labels, fully computed, at a scale that doesn't exceed the max texture size

In [6]:
example_image_index = random.choice(range(dataset.n_raw_files))

im_pyramid = dataset.raw_files[example_image_index].get_dask_image_pyramid()

In [None]:
viewer = napari.Viewer()
image_layer = viewer.add_image(
 im_pyramid,
 name=dataset.raw_files[example_image_index].filepath.stem,
 multiscale=True,
 contrast_limits=[0, 255],
 rgb=True,
)
max_texture_size = glGetIntegerv(GL_MAX_TEXTURE_SIZE)
labels_level = 0
root = zarr.open_group(str(training_labels_zarr_store_path), mode="r")
for level_number in root[
 dataset.raw_files[example_image_index].filepath.stem
].array_keys():
 label_shape = root[dataset.raw_files[example_image_index].filepath.stem][
 level_number
 ].shape
 if label_shape[1] <= max_texture_size and label_shape[0] <= max_texture_size:
 labels_level = int(level_number)
 break
labels = da.from_zarr(
 root[dataset.raw_files[example_image_index].filepath.stem][str(labels_level)]
).compute()
training_label_layer = viewer.add_labels(
 labels,
 name="training labels",
 scale=(1.0 * 2**labels_level, 1.0 * 2**labels_level),
)
napari.run()

#### Writing out the raw tiffs results as multiscale zarrs for testing

In [9]:


root = zarr.open_group(str(training_labels_zarr_store_path), mode="w")
compressor = Blosc(cname='zstd', clevel=5, shuffle=Blosc.BITSHUFFLE)

def write_group_from_tiff(raw_file: CODARawFile) -> None:
 """Write a group for a raw file in the training labels Zarr store.
 
 Args:
 raw_file: The CODARawFile instance.
 """
 tif_file_path = test_data_path / "classification_labels" / f"{raw_file.filepath.stem}.tif"
 with tifffile.TiffFile(tif_file_path) as tif:
 raw_arr = tif.pages[0].asarray()
 group = root.create_group(raw_file.filepath.stem, overwrite=True)
 chunk_shape = (raw_file.default_chunk_size[1], raw_file.default_chunk_size[0])
 for level_number in raw_file.pyramid_level_numbers:
 width, height = raw_file.get_image_dimensions_at_level(level_number)
 array_shape = (height, width)
 arr = group.create_dataset(
 name=str(level_number),
 shape=array_shape,
 chunks=chunk_shape,
 dtype=np.uint8,
 overwrite=True,
 compressor=compressor,
 )
 arr[:] = cv2.resize(raw_arr, (width, height), interpolation=cv2.INTER_NEAREST)

# with concurrent.futures.ThreadPoolExecutor() as executor:
# executor.map(write_group_from_tiff, dataset.raw_files)

for raw_file in dataset.raw_files:
 print(f"writing out arrays for {raw_file.filename}")
 write_group_from_tiff(raw_file)

writing out arrays for lungs_001.ndpi
writing out arrays for lungs_003.ndpi
writing out arrays for lungs_005.ndpi
writing out arrays for lungs_007.ndpi
writing out arrays for lungs_009.ndpi
writing out arrays for lungs_011.ndpi
writing out arrays for lungs_013.ndpi
writing out arrays for lungs_015.ndpi
writing out arrays for lungs_017.ndpi
writing out arrays for lungs_019.ndpi
writing out arrays for lungs_021.ndpi
writing out arrays for lungs_023.ndpi
writing out arrays for lungs_025.ndpi
writing out arrays for lungs_027.ndpi
writing out arrays for lungs_029.ndpi
writing out arrays for lungs_031.ndpi
writing out arrays for lungs_033.ndpi
writing out arrays for lungs_035.ndpi
writing out arrays for lungs_037.ndpi
writing out arrays for lungs_039.ndpi


In [11]:
class_names_colors = {
 "bronchioles": [150, 99, 23, 255], # brown
 "alveoli": [23, 80, 150, 255], # dark blue
 # "alveoli": [23, 80, 150, 0], # dark blue
 "vasculature": [150, 31, 23, 255], # dark red
 "cancer": [199, 196, 147, 255], # v dark purple
 "nonexpanded": [23, 80, 150, 255], # dark blue
 # "nonexpanded": [23, 80, 150, 0], # dark blue
 # "whitespace": [255, 255, 255, 255], # white
 "whitespace": [255, 255, 255, 0], # white
 "collagen": [242, 167, 227, 255], # light pink
 # "collagen": [242, 167, 227, 0], # light pink
}

class_names = {i: name for i, name in enumerate(class_names_colors, start=1)}
class_colors = {
 i: tuple([v / 255.0 for v in color])
 for i, color in enumerate(class_names_colors.values(), start=1)
}

In [12]:
root = zarr.open_group(training_labels_zarr_store_path, mode="a")

root.attrs["class_names"] = class_names
root.attrs["class_colors"] = class_colors