ProjetRaoul/Segment_Stack_without_gpu.py

#!/usr/bin/env python
# coding: utf-8

# In[ ]:

# Autor : Raoul MISSODEY

import pandas as pd
import imageio.v2 as iio
import pydicom as dicom
import os
from glob import glob
import numpy as np
import matplotlib.pyplot as plt
from sklearn.cluster import KMeans, MiniBatchKMeans
from sklearn.preprocessing import minmax_scale, MinMaxScaler
from pydicom.dataset import FileDataset, FileMetaDataset
from skimage.filters import threshold_multiotsu, gabor
from scipy.ndimage import gaussian_filter, sobel, prewitt
from skimage.segmentation import find_boundaries
from skimage.measure import label, regionprops
from skimage import morphology, color, feature, restoration
from functools import partial
import skimage.util
from skimage import (exposure, filters, io, measure, future,
                      morphology, restoration, segmentation, transform,
                      util)
from sklearn.ensemble import RandomForestClassifier
import scipy.ndimage as nd
from skimage.restoration import (denoise_tv_chambolle, denoise_bilateral,
                                 denoise_wavelet, estimate_sigma)
from tqdm import tqdm
from skimage import filters, feature
from concurrent.futures import ThreadPoolExecutor
from sklearn.preprocessing import LabelEncoder
import matplotlib as mpl
from scipy.spatial.distance import cdist
from joblib import Parallel, delayed
import time
from scipy.ndimage import median_filter
from sklearn import decomposition
from skimage.filters import threshold_triangle, threshold_li
import skimage as ski
import pybaselines


# In[ ]:


class SegShrinkBScan(object) :
    
    
    """ 
    This class is used to segment all images in the third dimension (here in the LCOCT case this class is used 
    to process the 1232 images.)
    First, the data is preprocessed (normalization, denoising, and each batch of 5 images is averaged).
    Then eigenvalues of the Hessian matrix are used in KMeans algorithm for first segmentation.
    After that RandomForest is used to migrate pixels belonging to wrong classes.
    Then with Parallel Compusing we segmentation all images obtained after preprocessing.
    Finaly, results are postprocessed using morphology binary operation, closing, dilatation...
    
            input :
            
                my_data : the 3D dicom pixels array.
                
            return :
                
                mean of stratum thickness calcuted for each segmented image.
                
    
    """
    def __init__(self, my_data,remove_num = 50, disk_num = 2, disk_num1 = None,
                 disk_num2 = None, octagon_num1=None, octagon_num2=None, **kwargs) :
        
        self.my_data = my_data.copy()
        self.shape = self.my_data.shape
        if remove_num == None :
            self.remove_num = 50
        else :
            self.remove_num = remove_num
        
        if disk_num == None :
            self.disk_num = 2
        else :
            self.disk_num = disk_num
            
        self.disk_num1 = disk_num1
        self.disk_num2 = disk_num2
        self.octagon_num1 = octagon_num1
        self.octagon_num2 = octagon_num2
        
        
        # data normalization
        
        self.da_norm = self.normalize(self.my_data)
        
        self.vmin, self.vmax = np.percentile(self.da_norm, [1,99])
        
        
        #stretching intensity levels.
        
        self.stretched = exposure.rescale_intensity(
            self.da_norm, 
            in_range=(self.vmin, self.vmax), 
            out_range=np.float32
            )
        
        
        self.denoised2 = median_filter((self.da_norm), size=3)
        
        #self.shape1 = self.denoised2.shape
        
        self.mean_data2 = self.Mean(self.denoised2, 5)
        
        self.sigma_min = 0.5
        self.sigma_max = 20
        self.features_func2_pca = partial(feature.multiscale_basic_features,
                                intensity=False, edges=True, texture=True,
                                sigma_min=self.sigma_min, sigma_max=self.sigma_max,
                                channel_axis=None, num_workers=20)
        
        
        self.features_func2 = partial(feature.multiscale_basic_features,
                                intensity=False, edges=False, texture=True,
                                sigma_min=self.sigma_min, sigma_max=self.sigma_max,
                                channel_axis=None, num_workers=20)
        
        ## poil detection results 
        
        self.da_pca_red =  self.pca_hair(self.stretched)
        self.features2_pca = self.features_func2_pca((self.da_pca_red[:,:,0]))
        self.lab_v2_pca = self.kmeans_seg_pca(self.features2_pca)
        self.t1 =  threshold_triangle(self.features2_pca[:,:,8])
        self.ftr = self.features2_pca[:,:,8]<self.t1
        self.lab_pca = self.label_hair(self.ftr, self.lab_v2_pca)
        self.zero_data = self.replace_pixel_poil(self.stretched, self.lab_pca)
        
        
        #####################################
        # features extraction
        self.features2 = self.features_func2((self.mean_data2[:,:,60]))
   
        self.shape2 = self.mean_data2.shape
    
        self.lab_v2 = self.kmeans_seg(self.features2)
        
        self.foret2 = RandomForestClassifier(n_estimators=100, n_jobs=-1,
                                     max_depth=None, max_samples=2500)
        self.foret2 = future.fit_segmenter(self.lab_v2,self.features2, self.foret2)
        
        
        self.All_features2 = np.asarray(Parallel(n_jobs=1)(delayed(self.allfea2)((self.mean_data2[:,:,i])) 
                                                            for i in range(self.shape2[2])))

        self.All_seg2 = np.asarray(Parallel(n_jobs=1)(delayed(self.allseg2)(self.All_features2[j]) for j in range(self.shape2[2])))
        self.All_seg2 = np.transpose(self.All_seg2, (1, 0, 2))
        
        self.zero_seg2 = self.All_seg2.copy()
        for self.i in range(self.zero_seg2.shape[1]) :
            self.zero_seg2[:,self.i,:][np.where(self.zero_data2[:,self.i,:]==0)] = 0
    
        self.D_v2 = np.asarray(Parallel(n_jobs=1)(delayed(self.sc_th2)(self.All_seg2[:,i,:], self.zero_seg2[:,i,:],self.remove_num, self.disk_num, self.disk_num1, self.disk_num2, self.octagon_num1, self.octagon_num2)
                                                  for i in range( self.All_seg2.shape[1])))
        
        self.conts = Parallel(n_jobs=1)(delayed(self.return_contours)(self.All_seg2[:,i,:], self.zero_seg2[:,i,:], self.remove_num, self.disk_num, self.disk_num1, self.disk_num2, self.octagon_num1, self.octagon_num2)
                                                  for i in range(self.All_seg2.shape[1]))
        
        self.D_v2_sc = np.nanmean(np.delete(self.D_v2[:,0], np.where((self.D_v2[:,0] > 30) | (self.D_v2[:,0] == 0) )))
        self.D_v2_std = np.nanstd(np.delete(self.D_v2[:,0], np.where((self.D_v2[:,0] > 30) | (self.D_v2[:,0] == 0) )))
        self.D_v2_perc = np.nanmean(self.D_v2[:,1])
        
    def sc_shrink(self) :
        # return stratum thickness value
        return  self.D_v2_sc
    
    def image_percentage(self) :
        # return stratum thickness value
        return self.D_v2_perc
    def image_std(self) :
        # return stratum thickness value
        return self.D_v2_std
    def return_im_cont(self) :
        return self.conts, self.mean_data2, self.D_v2[:,0]
    
    
    def return_contours(self, one_seg, zer, remove_num, disk_num, disk_num1, disk_num2, octagon_num1, octagon_num2) : 
        
        self.i1 = (one_seg == 1).copy()
        self.i1[110:,:] = 0
        """
        
        self.i2 = morphology.binary_closing(morphology.dilation(morphology.remove_small_objects(self.i1, 50)
                                                   , footprint = morphology.disk(3))
                                                                 , footprint = morphology.disk(6))
        
        
        self.i2 = morphology.binary_closing(morphology.dilation(morphology.erosion(self.i1, footprint = morphology.octagon(0,1))
                                                   , footprint = morphology.disk(4))
                                                                 , footprint = morphology.disk(6))
        
        """
        if disk_num1 is None :
            self.i2 = morphology.remove_small_objects(self.i1, remove_num)
            self.i2[np.where(zer==0)] = 0
            self.i2 = morphology.dilation(self.i2, footprint = morphology.disk(disk_num))
        else :
            self.i2 = morphology.erosion(self.i1, footprint = morphology.octagon(octagon_num1,octagon_num2))
            self.i2[np.where(zer==0)] = 0
            self.i2 = morphology.dilation(self.i2, footprint = morphology.disk(disk_num1))
            self.i2 =  morphology.binary_closing(self.i2, footprint = morphology.disk(disk_num2))
        """
            self.i2 = morphology.binary_closing(morphology.dilation(morphology.erosion(self.i1, footprint = morphology.octagon(octagon_num1,octagon_num2))
                                           , footprint = morphology.disk(disk_num1))
                                                         , footprint = morphology.disk(disk_num2))
        """          
        
        self.c = measure.find_contours(self.i2)
        
        return self.c
    # segmentation according to the region of interest 
    def sc_th2(self, one_seg, zer, remove_num, disk_num, disk_num1, disk_num2, octagon_num1, octagon_num2) :
        self.i1 = (one_seg == 1).copy()
        self.i1[110:,:] = 0
        """
        
        self.i2 = morphology.binary_closing(morphology.dilation(morphology.remove_small_objects(self.i1, 50)
                                                   , footprint = morphology.disk(3))
                                                                 , footprint = morphology.disk(6))
        
        
        self.i2 = morphology.binary_closing(morphology.dilation(morphology.erosion(self.i1, footprint = morphology.octagon(0,1))
                                                   , footprint = morphology.disk(4))
                                                                 , footprint = morphology.disk(6))
        
        """
        if disk_num1 is None :
            self.i2 = morphology.remove_small_objects(self.i1, remove_num)
            self.i2[np.where(zer==0)] = 0
            self.i2 = morphology.dilation(self.i2, footprint = morphology.disk(disk_num))
        else :
            self.i2 = morphology.erosion(self.i1, footprint = morphology.octagon(octagon_num1,octagon_num2))
            self.i2[np.where(zer==0)] = 0
            self.i2 = morphology.dilation(self.i2, footprint = morphology.disk(disk_num1))
            self.i2 =  morphology.binary_closing(self.i2, footprint = morphology.disk(disk_num2))
        """
            self.i2 = morphology.binary_closing(morphology.dilation(morphology.erosion(self.i1, footprint = morphology.octagon(octagon_num1,octagon_num2))
                                           , footprint = morphology.disk(disk_num1))
                                                         , footprint = morphology.disk(disk_num2))
        """
        
        self.c = measure.find_contours(self.i2)
        self.c_up = []
        self.n_up = []
        self.l_up = []
        #if (len(self.c) > 1) :
            #self.ma = max(self.c, key=len)
        if ((len(self.c)==2) and (self.c[0][0][1] != self.c[0][-1][1])) :
            self.c_up.append(self.c)
            self.n_up.append(0)
            self.l_up.append(one_seg.shape[1])
        else :
            for ma in self.c :
                if (len(ma)) > 200 :
                    for w in range(10, 17) :
                        self.n = int(np.round(ma[:,1].min() + w))
                        self.l = int(np.round(ma[:,1].max() - w))
                        if (self.i2[:,self.n:self.l].shape[0] < 2 or self.i2[:,self.n:self.l].shape[1] < 2) :
                            pass
                        else :
                            self.c = measure.find_contours(self.i2[:,self.n:self.l])
                        
                        if (len(self.c) == 2) :
                            break
                    while (len(self.c) > 2) :
                        self.c.remove(min(self.c, key=len))
                    #if (self.i2[:,self.n:self.l].shape[0] < 2 or self.i2[:,self.n:self.l].shape[1] < 2) :
                    #    pass
                    #else :
                    if (len(self.c[0])>20):
                        self.c_up.append(self.c)
                        self.n_up.append(self.n)
                        self.l_up.append(self.l)
                    else :
                        pass
        
        if ((len(self.n_up)==2) and (self.n_up[0] == self.n_up[1]) and (self.l_up[0] == self.l_up[1])) :
            self.c_up = [self.c_up[0]]
            self.n_up = [self.n_up[0]]
            self.l_up = [self.l_up[0]]
            
        self.B = []
        for pos in range(len(self.c_up)) :
    
            self.base, self.p = pybaselines.whittaker.iasls(self.c_up[pos][0][:,0], lam=3*1e3, 
                                p=0.95, lam_1=0.01, max_iter=100, tol=0.001)
            self.B.append(self.base) 
        
        
        self.split_mean = [np.mean(np.min(cdist(self.c_up[pos][0],self.c_up[pos][1]),axis=1)) for pos in range(len(self.c_up))]
        #self.split_mean = [np.mean(np.min(cdist(np.column_stack(( self.B[pos], self.c_up[pos][0][:, 1])),
        #                                   self.c_up[pos][1]),axis=1)) for pos in range(len(self.c_up))]
        self.split_mean = list(filter(lambda x: x >8, self.split_mean))
        self.split_mean = list(filter(lambda x: x <30, self.split_mean))
        self.im_percentage = sum([one_seg[:,self.n_up[pos]:self.l_up[pos]].shape[1]/one_seg.shape[1] 
                         for pos in range(len(self.l_up))])

        #while (len(self.c)!=2) :
            
         #   self.c = measure.find_contours(self.i2[:,self.n:self.l])
          #  self.ma = max(self.c, key=len)
           # self.n = int(np.round(self.ma[:,1].min() + 10))
            #self.l = int(np.round(self.ma[:,1].max() - 10))
            
        #else :
         #   return 0
    
        return np.nanmean(np.array(self.split_mean)), self.im_percentage

    """
    def sc_th2(self, one_seg) :
        self.i1 = (one_seg == 1).copy()
        self.i1[120:,:] = 0
        self.i2 = morphology.binary_closing(morphology.dilation(morphology.remove_small_objects(self.i1, 50)
                                                   , footprint = morphology.disk(3))
                                                                 , footprint = morphology.disk(6))
        self.c = measure.find_contours(self.i2)
        
        if (len(self.c) > 1) :
            self.ma = max(self.c, key=len)
            for w in range(10, 17) :
                self.n = int(np.round(self.ma[:,1].min() + w))
                self.l = int(np.round(self.ma[:,1].max() - w))
                self.c = measure.find_contours(self.i2[:,self.n:self.l])
            
                if (len(self.c) == 2) :
                    break
        #while (len(self.c)!=2) :
         #   self.c = measure.find_contours(self.i2[:,self.n:self.l])
          #  self.ma = max(self.c, key=len)
           # self.n = int(np.round(self.ma[:,1].min() + 10))
            #self.l = int(np.round(self.ma[:,1].max() - 10))
            
        else :
            return 0
    
        return np.mean(np.min(cdist(self.c[0],self.c[1]),axis=1))
    """
        
    def allfea2(self, a) :
        return self.features_func2(a)

    def allseg2(self, a) :
        return future.predict_segmenter(a, self.foret2)
    
    def kmeans_seg(self, features) :
        
        self.F = np.concatenate((features[:,:,9].reshape(-1,1).T, features[:,:,7].reshape(-1,1).T), axis=0).T
        self.kmeans = MiniBatchKMeans(n_clusters = 3, batch_size = 5000,max_iter=500,
                                      n_init='auto', random_state = 0).fit(self.F)
        self.idx = np.argsort(self.kmeans.cluster_centers_.sum(axis=1))
        self.L = np.zeros_like(self.idx)
        self.L[self.idx] = np.arange(3)
        #self.kmeans.fit(self.F.get())
        #self.L = self.kmeans.labels_ +1
        #self.label_encoder = LabelEncoder()
        #self.label_encoder.fit(self.L)
        #self.lab = self.label_encoder.transform(self.L) + 1
        self.lab_v = self.L[self.kmeans.labels_].reshape(self.shape[0], self.shape[1]) + 1
        
        return self.lab_v
    
    
    def pca_hair(self, data) :
        
        self.da_pca = np.transpose(data[50:70,:,:].reshape((20,-1)), (1,0))
        self.pca = decomposition.PCA(n_components=3, svd_solver='full')
        self.pca_fit = self.pca.fit(self.da_pca)
        self.da_pca_red = self.pca_fit.transform(self.da_pca).reshape(data[0,:,:].shape + (-1,))
        
        return self.da_pca_red
    
    def moyenne(self, images, stretched, axis) :
        return np.array(np.ma.average(images, axis=axis, weights=images.astype(bool)).tolist()).astype(stretched.dtype)
    
    def replace_pixel_poil(self, stretched, lab3) :
   
        self.stretched2 = stretched.copy()
        for self.i in range(self.stretched2.shape[0]) :
            self.stretched2[self.i,:,:][np.where(lab3==1)] = 0
         
        self.stop = round(self.stretched2.shape[2]/5)*5 - 5
        self.denoised22 = np.transpose(self.stretched2,(0,2,1))
        self.consec_im2 = self.denoised22[:,:self.stop,:].reshape((self.stretched2.shape[0],-1,5, self.stretched2.shape[1]))
        self.zero_data2 = np.array(np.ma.average(self.consec_im2, axis=2, 
                                                 weights=self.consec_im2.astype(bool)).tolist()).astype(self.stretched2.dtype)
        self.zero_data2 = np.concatenate((self.zero_data2,
                                 np.transpose((self.moyenne(self.denoised22[:,self.stop+1:,:], 
                                                       self.stretched2, axis=1)[..., np.newaxis]),(0,2,1))), axis=1)
        self.zero_data2[np.isnan(self.zero_data2)] = 0
        
        return self.zero_data2
    
    def kmeans_seg_pca(self, features) :
        
        self.F = np.concatenate((features[:,:,11].reshape(-1,1).T, features[:,:,8].reshape(-1,1).T), axis=0).T
        self.kmeans = MiniBatchKMeans(n_clusters = 3, batch_size = 5000,max_iter=500, 
                                      n_init='auto', random_state = 0).fit(self.F)
        #self.kmeans.fit(self.F.get())
        self.idx = np.argsort(self.kmeans.cluster_centers_.sum(axis=1))
        self.L = np.zeros_like(self.idx)
        self.L[self.idx] = np.arange(3)
        #self.L = self.kmeans.labels_ +1
        #self.label_encoder = LabelEncoder()
        #self.label_encoder.fit(self.L)
        #self.lab = self.label_encoder.transform(self.L) + 1
        self.lab_v = self.L[self.kmeans.labels_].reshape(self.shape[1], self.shape[2]) + 1
        
        return self.lab_v
    
    def label_hair(self,ftr,lab) :
        #ftr is label from triangle threshold
        #lab is label from kmeans
        self.labr1 =  morphology.binary_dilation(ftr, footprint=np.ones((3, 3)))
        self.lab1 = morphology.remove_small_objects(self.labr1, min_size=500, connectivity=10)

        self.contours1 = measure.find_contours(self.lab1)

        self.labr = morphology.remove_small_objects(lab==1, min_size=500, connectivity=10)
        self.lab2 =  morphology.binary_dilation(self.labr)
        self.contours2 = measure.find_contours(self.lab2)

        self.contours3 = []
        for self.c1 in self.contours1 :
            for self.c2 in self.contours2 :
                if bool(set(self.c1[:,1]) & set(self.c2[:,1])) :
                    self.contours3.append(self.c2)
            
        self.lab3 = np.zeros_like(self.lab2)
        for self.c3 in self.contours3 :
            self.rr, self.cc = ski.draw.polygon(self.c3[:, 1], self.c3[:, 0])
            self.lab3[self.cc, self.rr] = 1
         
        return self.lab3
        
    
    def Mean(self, data, m) :
        
        self.stop = round(self.shape[2]/m)*m - m
        self.data1 = np.transpose(data,(0,2,1))
        self.consec_im = self.data1[:,:self.stop,:].reshape((self.shape[0],-1, m,self.shape[1]))
        self.mean_data = np.mean(self.consec_im, axis=2)
        self.mean_data = np.concatenate((self.mean_data, 
                                         np.transpose((np.mean(self.data1[:,self.stop+1:,:], axis=1)[..., np.newaxis]),(0,2,1))), 
                                         axis=1)
        self.mean_data = np.transpose(self.mean_data,(0,2,1))
        return self.mean_data

        
    def normalize(self, data) :
        self.scaler = MinMaxScaler()
        self.da_nor = self.scaler.fit_transform(data.reshape(-1, data.shape[2])).reshape(data.shape)
        return self.da_nor
        

# In[ ]:


class SegBScan(object) :
    
    
    """ 
    This class is used to segment all images in the second dimension (here in the LCOCT case this class is used 
    to process the 500 images.)
    First, the data is preprocessed (normalization, denoising, and each batch of 5 images is averaged).
    Then eigenvalues of the Hessian matrix are used in KMeans algorithm for first segmentation.
    After that RandomForest is used to migrate pixels belonging to wrong classes.
    Then with Parallel Compusing we segmentation all images obtained after preprocessing.
    Finaly, results are postprocessed using morphology binary operation, closing, dilatation...
    
            input :
            
                my_data : the 3D dicom pixels array.
                
            return :
                
                mean of stratum thickness calcuted for each segmented image.
                
    
    """
    
    
    def __init__(self, my_data,remove_num = None, disk_num = None, disk_num1 = None,
                 disk_num2 = None, octagon_num1=None, octagon_num2=None, **kwargs) :
        
        self.my_data = my_data.copy()
        self.shape = self.my_data.shape
        #self.remove_num = remove_num
        #self.disk_num = disk_num
        if remove_num == None :
            self.remove_num = 50
        else :
            self.remove_num = remove_num
        
        if disk_num == None :
            self.disk_num = 3
        else :
            self.disk_num = disk_num
            
        self.disk_num1 = disk_num1
        self.disk_num2 = disk_num2
        self.octagon_num1 = octagon_num1
        self.octagon_num2 = octagon_num2
        
        # data normalization
        self.da_norm = self.normalize(self.my_data)
        
        self.vmin, self.vmax = np.percentile(self.da_norm, [1,99])
        
        
        #stretching intensity levels.
        
        self.stretched = exposure.rescale_intensity(
            self.da_norm, 
            in_range=(self.vmin, self.vmax), 
            out_range=np.float32
            )
        
        
        self.sigma_min = 0.5
        self.sigma_max = 20
        self.features_func2_pca = partial(feature.multiscale_basic_features,
                                intensity=False, edges=True, texture=True,
                                sigma_min=self.sigma_min, sigma_max=self.sigma_max,
                                channel_axis=None, num_workers=20)
        
        self.features_func2 = partial(feature.multiscale_basic_features,
                                intensity=False, edges=False, texture=True,
                                sigma_min=self.sigma_min, sigma_max=self.sigma_max,
                                channel_axis=None, num_workers=20)
        
        ## poil detection results 
        
        self.da_pca_red =  self.pca_hair(self.stretched)
        self.features2_pca = self.features_func2_pca((self.da_pca_red[:,:,0]))
        self.lab_v2_pca = self.kmeans_seg_pca(self.features2_pca)
        self.t1 =  threshold_triangle(self.features2_pca[:,:,8])
        self.ftr = self.features2_pca[:,:,8]<self.t1
        self.lab_pca = self.label_hair(self.ftr, self.lab_v2_pca)
        self.zero_data = self.replace_pixel_poil(self.stretched, self.lab_pca)
        
        
        #####################################
        #self.shape1 = self.denoised2.shape
        
        self.mean_data2 = self.Mean(self.stretched, 5)
        #self.mean_data2  = median_filter(cp.asarray(self.stretched), size=3)
        
        self.denoised2 = median_filter((self.mean_data2), size=3)
        #self.denoised2 = self.Mean(self.mean_data2, 5)
        
        
        # features extraction

        self.features2 = self.features_func2((self.denoised2[:,60,:]))
   
        self.shape2 = self.denoised2.shape
    
        self.lab_v2 = self.kmeans_seg(self.features2)
        
        self.foret2 = RandomForestClassifier(n_estimators=100, n_jobs=-1,
                                     max_depth=None, max_samples=2500)
        self.foret2 = future.fit_segmenter(self.lab_v2,self.features2, self.foret2)
        
        
        self.All_features2 = np.asarray(Parallel(n_jobs=1)(delayed(self.allfea2)((self.denoised2[:,i,:])) 
                                                            for i in range(self.shape2[1])))

        self.All_seg2 = np.asarray(Parallel(n_jobs=1)(delayed(self.allseg2)(self.All_features2[j]) 
                                                       for j in range(self.shape2[1])))
        self.All_seg2 = np.transpose(self.All_seg2, (1, 0, 2))
        
        self.zero_seg = self.All_seg2.copy()
        for self.i in range(self.zero_seg.shape[1]) :
            self.zero_seg[:,self.i,:][np.where(self.zero_data[:,self.i,:]==0)] = 0
        
        self.D_v2 = np.asarray(Parallel(n_jobs=1)(delayed(self.sc_th2)(self.All_seg2[:,i,:], self.zero_seg[:,i,:],self.remove_num, self.disk_num, self.disk_num1, self.disk_num2, self.octagon_num1, self.octagon_num2)
                                                  for i in range(self.shape2[1])))
        
        self.conts = Parallel(n_jobs=1)(delayed(self.return_contours)(self.All_seg2[:,i,:], self.zero_seg[:,i,:],self.remove_num, self.disk_num, self.disk_num1, self.disk_num2, self.octagon_num1, self.octagon_num2)
                                                  for i in range(self.shape2[1]))
        self.D_v2_sc = np.nanmean(np.delete(self.D_v2[:,0], np.where((self.D_v2[:,0] > 30) | (self.D_v2[:,0] == 0) )))
        self.D_v2_std = np.nanstd(np.delete(self.D_v2[:,0], np.where((self.D_v2[:,0] > 30) | (self.D_v2[:,0] == 0) )))
        self.D_v2_perc = np.nanmean(self.D_v2[:,1])
        
    def sc_shrink(self) :
        # return stratum thickness value
        return self.D_v2_sc
    def image_percentage(self) :
        # return stratum thickness value
        return self.D_v2_perc
    def image_std(self) :
        # return stratum thickness value
        return self.D_v2_std
    def all_metrics(self) :
        # return stratum thickness value
        return self.All_seg2
    
    def return_im_cont(self) :
        return self.conts, self.denoised2, self.D_v2[:,0]
    
    # pca for hair follicle detection
    def pca_hair(self, data) :
        self.da_pca = np.transpose(data[50:70,:,:].reshape((20,-1)), (1,0))
        self.pca = decomposition.PCA(n_components=3, svd_solver='full')
        self.pca_fit = self.pca.fit(self.da_pca)
        self.da_pca_red = self.pca_fit.transform(self.da_pca).reshape(data[0,:,:].shape + (-1,))
        
        return self.da_pca_red
    
    def replace_pixel_poil(self, stretched, lab3) :
   
        self.stretched2 = stretched.copy()
        for self.i in range(self.stretched2.shape[0]) :
            self.stretched2[self.i,:,:][np.where(lab3==1)] = 0
        self.consec_im = self.stretched2.reshape((self.stretched2.shape[0],-1,5, self.stretched2.shape[2]))
        self.zero_data = np.array(np.ma.average(self.consec_im, axis=2,
                                                weights=self.consec_im.astype(bool)).tolist()).astype(self.stretched2.dtype)
        self.zero_data[np.isnan(self.zero_data)] = 0
        
        return self.zero_data
    
    def return_contours(self, one_seg, zer, remove_num, disk_num, disk_num1, disk_num2, octagon_num1, octagon_num2) :
        
        self.i1 = (one_seg == 1).copy()
        self.i1[110:,:] = 0
        
        """
        self.i2 = morphology.binary_closing(morphology.dilation(morphology.remove_small_objects(self.i1, 50)
                                                   , footprint = morphology.disk(2))
                                                                 , footprint = morphology.disk(6))
        
        
        self.i2 = morphology.binary_closing(morphology.dilation(morphology.erosion(self.i1, footprint = morphology.octagon(0,1))
                                                   , footprint = morphology.disk(4))
                                                                 , footprint = morphology.disk(10))
        """
        if disk_num1 is None :
            self.i2 = morphology.remove_small_objects(self.i1, remove_num)
            self.i2[np.where(zer==0)] = 0
            self.i2 = morphology.dilation(self.i2, footprint = morphology.disk(disk_num))
        else :
            self.i2 = morphology.erosion(self.i1, footprint = morphology.octagon(octagon_num1,octagon_num2))
            self.i2[np.where(zer==0)] = 0
            self.i2 = morphology.dilation(self.i2, footprint = morphology.disk(disk_num1))
            self.i2 =  morphology.binary_closing(self.i2, footprint = morphology.disk(disk_num2))
        """
            self.i2 = morphology.binary_closing(morphology.dilation(morphology.erosion(self.i1, footprint = morphology.octagon(octagon_num1,octagon_num2))
                                           , footprint = morphology.disk(disk_num1))
                                                         , footprint = morphology.disk(disk_num2))
        """
        
    
        self.c = measure.find_contours(self.i2)
        
        return self.c
    
    
    # segmentation according to the region of interest
    def sc_th2(self, one_seg, zer, remove_num, disk_num, disk_num1, disk_num2, octagon_num1, octagon_num2) :
        self.i1 = (one_seg == 1).copy()
        self.i1[110:,:] = 0
        
        """
        self.i2 = morphology.binary_closing(morphology.dilation(morphology.remove_small_objects(self.i1, 50)
                                                   , footprint = morphology.disk(2))
                                                                 , footprint = morphology.disk(6))
        
        
        self.i2 = morphology.binary_closing(morphology.dilation(morphology.erosion(self.i1, footprint = morphology.octagon(0,1))
                                                   , footprint = morphology.disk(4))
                                                                 , footprint = morphology.disk(10))
        """
        if disk_num1 is None :
            self.i2 = morphology.remove_small_objects(self.i1, remove_num)
            self.i2[np.where(zer==0)] = 0
            self.i2 = morphology.dilation(self.i2, footprint = morphology.disk(disk_num))
        else :
            self.i2 = morphology.erosion(self.i1, footprint = morphology.octagon(octagon_num1,octagon_num2))
            self.i2[np.where(zer==0)] = 0
            self.i2 = morphology.dilation(self.i2, footprint = morphology.disk(disk_num1))
            self.i2 =  morphology.binary_closing(self.i2, footprint = morphology.disk(disk_num2))
        """
            self.i2 = morphology.binary_closing(morphology.dilation(morphology.erosion(self.i1, footprint = morphology.octagon(octagon_num1,octagon_num2))
                                           , footprint = morphology.disk(disk_num1))
                                                         , footprint = morphology.disk(disk_num2))
        """
        
    
        self.c = measure.find_contours(self.i2)
        self.c_up = []
        self.n_up = []
        self.l_up = []
        #if (len(self.c) > 1) :
            #self.ma = max(self.c, key=len)
        if ((len(self.c)==2) and (self.c[0][0][1] != self.c[0][-1][1])) :
            self.c_up.append(self.c)
            self.n_up.append(0)
            self.l_up.append(one_seg.shape[1])
        else :
            for ma in self.c :
                if (len(ma)) > 500 :
                    for w in range(10, 17) :
                        self.n = int(np.round(ma[:,1].min() + w))
                        self.l = int(np.round(ma[:,1].max() - w))
                        if (self.i2[:,self.n:self.l].shape[0] < 2 or self.i2[:,self.n:self.l].shape[1] < 2) :
                            break
                        else :
                            self.c = measure.find_contours(self.i2[:,self.n:self.l])
                        
                        if (len(self.c) == 2) :
                            break
                    while (len(self.c) > 2) :
                        self.c.remove(min(self.c, key=len))
                        
                    #if (self.i2[:,self.n:self.l].shape[0] < 2 or self.i2[:,self.n:self.l].shape[1] < 2) :
                    #    pass
                    #else :
                    if (len(self.c[0]) > 20) :
                        self.c_up.append(self.c)
                        self.n_up.append(self.n)
                        self.l_up.append(self.l)
                    else :
                        pass
                     

        if ((len(self.n_up)==2) and (self.n_up[0] == self.n_up[1]) and (self.l_up[0] == self.l_up[1])) :
            self.c_up = [self.c_up[0]]
            self.n_up = [self.n_up[0]]
            self.l_up = [self.l_up[0]]
            
        self.B = []
        for self.pos in range(len(self.c_up)) :
            self.base, self.p = pybaselines.whittaker.iasls(self.c_up[self.pos][0][:,0], lam=3*1e3, 
                            p=0.9, lam_1=0.01, max_iter=100, tol=0.001)
            self.B.append(self.base) 
            
        #self.split_mean = [np.mean(np.min(cdist(np.column_stack((self.B[self.pos], 
         #                                                        self.c_up[self.pos][0][:, 1])),self.c_up[self.pos][1]),axis=1)) 
          #                 for self.pos in range(len(self.c_up))]
        self.split_mean = [np.mean(np.min(cdist(self.c_up[self.pos][0],self.c_up[self.pos][1]),axis=1))
                           for self.pos in range(len(self.c_up))]
        self.split_mean = list(filter(lambda x: x >8, self.split_mean))
        self.split_mean = list(filter(lambda x: x <30, self.split_mean))
        self.im_percentage = sum([one_seg[:,self.n_up[self.pos]:self.l_up[self.pos]].shape[1]/one_seg.shape[1] 
                         for self.pos in range(len(self.l_up))])

        #while (len(self.c)!=2) :
            
         #   self.c = measure.find_contours(self.i2[:,self.n:self.l])
          #  self.ma = max(self.c, key=len)
           # self.n = int(np.round(self.ma[:,1].min() + 10))
            #self.l = int(np.round(self.ma[:,1].max() - 10))
            
        #else :
         #   return 0
    
        return np.nanmean(np.array(self.split_mean)), self.im_percentage
    
        
    def allfea2(self, a) :
        return self.features_func2(a)

    def allseg2(self, a) :
        return future.predict_segmenter(a, self.foret2)
    
    def kmeans_seg(self, features) :
        
        self.F = np.concatenate((features[:,:,9].reshape(-1,1).T, features[:,:,7].reshape(-1,1).T), axis=0).T
        self.kmeans = MiniBatchKMeans(n_clusters = 3, batch_size = 5000,max_iter=500, 
                                      n_init='auto', random_state = 0).fit(self.F)
        #self.kmeans.fit(self.F.get())
        self.idx = np.argsort(self.kmeans.cluster_centers_.sum(axis=1))
        self.L = np.zeros_like(self.idx)
        self.L[self.idx] = np.arange(3)
        #self.L = self.kmeans.labels_ +1
        #self.label_encoder = LabelEncoder()
        #self.label_encoder.fit(self.L)
        #self.lab = self.label_encoder.transform(self.L) + 1
        self.lab_v = self.L[self.kmeans.labels_].reshape(self.shape[0], self.shape[2]) + 1
        
        return self.lab_v
    
    def kmeans_seg_pca(self, features) :
        
        self.F = np.concatenate((features[:,:,11].reshape(-1,1).T, features[:,:,8].reshape(-1,1).T), axis=0).T
        self.kmeans = MiniBatchKMeans(n_clusters = 3, batch_size = 5000,max_iter=500, 
                                      n_init='auto', random_state = 0).fit(self.F)
        #self.kmeans.fit(self.F.get())
        self.idx = np.argsort(self.kmeans.cluster_centers_.sum(axis=1))
        self.L = np.zeros_like(self.idx)
        self.L[self.idx] = np.arange(3)
        #self.L = self.kmeans.labels_ +1
        #self.label_encoder = LabelEncoder()
        #self.label_encoder.fit(self.L)
        #self.lab = self.label_encoder.transform(self.L) + 1
        self.lab_v = self.L[self.kmeans.labels_].reshape(self.shape[1], self.shape[2]) + 1
        
        return self.lab_v

        
    def label_hair(self,ftr,lab) :
        #ftr is label from triangle threshold
        #lab is label from kmeans
        self.labr1 =  morphology.binary_dilation(ftr, footprint=np.ones((3, 3)))
        self.lab1 = morphology.remove_small_objects(self.labr1, min_size=500, connectivity=10)

        self.contours1 = measure.find_contours(self.lab1)

        self.labr = morphology.remove_small_objects(lab==1, min_size=500, connectivity=10)
        self.lab2 =  morphology.binary_dilation(self.labr)
        self.contours2 = measure.find_contours(self.lab2)

        self.contours3 = []
        for self.c1 in self.contours1 :
            for self.c2 in self.contours2 :
                if bool(set(self.c1[:,1]) & set(self.c2[:,1])) :
                    self.contours3.append(self.c2)
            
        self.lab3 = np.zeros_like(self.lab2)
        for self.c3 in self.contours3 :
            self.rr, self.cc = ski.draw.polygon(self.c3[:, 1], self.c3[:, 0])
            self.lab3[self.cc, self.rr] = 1
         
        return self.lab3
    
    def Mean(self, data, m) :
        
        self.consec_im = data.reshape((self.shape[0],-1,m, self.shape[2]))
        self.mean_data = np.mean(self.consec_im, axis=2)
        return self.mean_data
        
        
    def normalize(self, data) :
        self.scaler = MinMaxScaler()
        self.da_nor = self.scaler.fit_transform(data.reshape(-1, data.shape[2])).reshape(data.shape)
        return self.da_nor
        

# In[ ]:


def segment(F) :
     
    """ 
    this function is used to segment All stack of one patient
    
        input : 
            F : files containing the stacks
        return :
            average of stratum corneum thickness of each stack
    """
    all_sc_oneP = []
    All_long_perc = []
    All_short_perc = []
    All_long_std = []
    All_short_std = []
    for i in range(len(F)) :
        da = dicom.dcmread(F[i])
        data = da.pixel_array
        D11 = SegShrinkBScan(data)
        D22 = SegBScan(data)
        all_sc_oneP.append((D11.sc_shrink() + D22.sc_shrink())/2)
        All_long_perc.append(D22.image_percentage())
        All_short_perc.append(D11.image_percentage()) 
        All_long_std.append(D22.image_std())
        All_short_std.append(D11.image_std()) 
        del D11
        del D22
    return np.asarray(all_sc_oneP), np.asarray(All_long_perc), np.asarray(All_short_perc), np.asarray(All_long_std), np.asarray(All_short_std)


def segment_long(F) :
     
    """ 
    this function is used to segment All stack of one patient
    
        input : 
            F : files containing the stacks
        return :
            average of stratum corneum thickness of each stack
    """
    all_sc_oneP = []

    for i in range(len(F)) :
        da = dicom.dcmread(F[i])
        data = da.pixel_array
        D22 = SegBScan(data)
        all_sc_oneP.append(D22.sc_shrink())
 
        del D22
    return np.asarray(all_sc_oneP)