Module code >> skvideo.measure.ssim
Fork me on GitHub

Source code for skvideo.measure.ssim

from ..utils import *
import numpy as np
import scipy.ndimage

def _ssim_core(referenceVideoFrame, distortedVideoFrame, K_1, K_2, bitdepth, scaleFix, avg_window):

    referenceVideoFrame = referenceVideoFrame.astype(np.float32)
    distortedVideoFrame = distortedVideoFrame.astype(np.float32)

    M, N = referenceVideoFrame.shape

    extend_mode = 'constant'
    if avg_window is None:
      avg_window = gen_gauss_window(5, 1.5)
    
    L = np.int(2**bitdepth - 1)

    C1 = (K_1 * L)**2
    C2 = (K_2 * L)**2

    factor = np.int(np.max((1, np.round(np.min((M, N))/256.0))))
    factor_lpf = np.ones((factor,factor), dtype=np.float32)
    factor_lpf /= np.sum(factor_lpf)

    if scaleFix:
      M = np.int(np.round(np.float(M) / factor + 1e-9))
      N = np.int(np.round(np.float(N) / factor + 1e-9))

    mu1 = np.zeros((M, N), dtype=np.float32)
    mu2 = np.zeros((M, N), dtype=np.float32)
    var1 = np.zeros((M, N), dtype=np.float32)
    var2 = np.zeros((M, N), dtype=np.float32)
    var12 = np.zeros((M, N), dtype=np.float32)

    # scale if enabled
    if scaleFix and (factor > 1):
        referenceVideoFrame = scipy.signal.correlate2d(referenceVideoFrame, factor_lpf, mode='same', boundary='symm')
        distortedVideoFrame = scipy.signal.correlate2d(distortedVideoFrame, factor_lpf, mode='same', boundary='symm')
        referenceVideoFrame = referenceVideoFrame[::factor, ::factor]
        distortedVideoFrame = distortedVideoFrame[::factor, ::factor]

    scipy.ndimage.correlate1d(referenceVideoFrame, avg_window, 0, mu1, mode=extend_mode)
    scipy.ndimage.correlate1d(mu1, avg_window, 1, mu1, mode=extend_mode)
    scipy.ndimage.correlate1d(distortedVideoFrame, avg_window, 0, mu2, mode=extend_mode)
    scipy.ndimage.correlate1d(mu2, avg_window, 1, mu2, mode=extend_mode)

    mu1_sq = mu1**2
    mu2_sq = mu2**2
    mu1_mu2 = mu1 * mu2

    scipy.ndimage.correlate1d(referenceVideoFrame**2, avg_window, 0, var1, mode=extend_mode)
    scipy.ndimage.correlate1d(var1, avg_window, 1, var1, mode=extend_mode)
    scipy.ndimage.correlate1d(distortedVideoFrame**2, avg_window, 0, var2, mode=extend_mode)
    scipy.ndimage.correlate1d(var2, avg_window, 1, var2, mode=extend_mode)
    scipy.ndimage.correlate1d(referenceVideoFrame * distortedVideoFrame, avg_window, 0, var12, mode=extend_mode)
    scipy.ndimage.correlate1d(var12, avg_window, 1, var12, mode=extend_mode)

    sigma1_sq = var1 - mu1_sq
    sigma2_sq = var2 - mu2_sq
    sigma12 = var12 - mu1_mu2

    ssim_map = ((2 * mu1_mu2 + C1) * (2 * sigma12 + C2)) / ((mu1_sq + mu2_sq + C1) * (sigma1_sq + sigma2_sq + C2))
    cs_map = (2*sigma12 + C2)/(sigma1_sq + sigma2_sq + C2)

    ssim_map = ssim_map[5:-5, 5:-5]
    cs_map = cs_map[5:-5, 5:-5]

    mssim = np.mean(ssim_map)
    mcs = np.mean(cs_map)

    return mssim, ssim_map, mcs, cs_map


[docs]def ssim(referenceVideoData, distortedVideoData, K_1 = 0.01, K_2 = 0.03, bitdepth=8, scaleFix=True, avg_window=None): """Computes Structural Similarity (SSIM) Index. [#f1]_ Both video inputs are compared frame-by-frame to obtain T SSIM measurements on the luminance channel. Parameters ---------- referenceVideoData : ndarray Reference video, ndarray of dimension (T, M, N, C), (T, M, N), (M, N, C), or (M, N), where T is the number of frames, M is the height, N is width, and C is number of channels. Here C is only allowed to be 1. distortedVideoData : ndarray Distorted video, ndarray of dimension (T, M, N, C), (T, M, N), (M, N, C), or (M, N), where T is the number of frames, M is the height, N is width, and C is number of channels. Here C is only allowed to be 1. K_1 : float Luminance saturation weight K_2 : float Contrast saturation weight bitdepth : int The number of bits each pixel effectively has scaleFix : bool Whether to scale the input frame size based on assumed distance, to improve subjective correlation. avg_window : ndarray 2-d averaging window, normalized to unit volume. Returns ------- ssim_array : ndarray The ssim results, ndarray of dimension (T,), where T is the number of frames References ---------- .. [#f1] Z. Wang, A. C. Bovik, H. R. Sheikh, and E. P. Simoncelli, "Image quality assessment: From error measurement to structural similarity" IEEE Transactions on Image Processing, vol. 13, no. 1, Jan. 2004. """ referenceVideoData = vshape(referenceVideoData) distortedVideoData = vshape(distortedVideoData) assert(referenceVideoData.shape == distortedVideoData.shape) T, M, N, C = referenceVideoData.shape assert C == 1, "ssim called with videos containing %d channels. Please supply only the luminance channel" % (C,) ssim_scores = np.zeros(T, dtype=np.float32) for t in range(T): mssim, ssim_map, mcs, cs_map = _ssim_core(referenceVideoData[t, :, :, 0], distortedVideoData[t, :, :, 0], K_1 = K_1, K_2 = K_2, bitdepth=bitdepth, scaleFix=scaleFix, avg_window=avg_window) ssim_scores[t] = mssim return ssim_scores
def ssim_full(referenceVideoData, distortedVideoData, K_1 = 0.01, K_2 = 0.03, bitdepth=8, scaleFix=True, avg_window=None): """Returns all parameters from the Structural Similarity (SSIM) Index. [#f1]_ Both video inputs are compared frame-by-frame to obtain T SSIM measurements on the luminance channel. Parameters ---------- referenceVideoData : ndarray Reference video, ndarray of dimension (T, M, N, C), (T, M, N), (M, N, C), or (M, N), where T is the number of frames, M is the height, N is width, and C is number of channels. Here C is only allowed to be 1. distortedVideoData : ndarray Distorted video, ndarray of dimension (T, M, N, C), (T, M, N), (M, N, C), or (M, N), where T is the number of frames, M is the height, N is width, and C is number of channels. Here C is only allowed to be 1. K_1 : float Luminance saturation weight K_2 : float Contrast saturation weight bitdepth : int The number of bits each pixel effectively has scaleFix : bool Whether to scale the input frame size based on assumed distance, to improve subjective correlation. avg_window : ndarray 2-d averaging window, normalized to unit volume. Returns ------- ssim_array : ndarray The ssim results, ndarray of dimension (T,), where T is the number of frames ssim_map_array : ndarray The ssim maps, ndarray of dimension (T,M-10, N-10), where T is the number of frames, and MxN are the widthxheight contrast_array : ndarray The ssim result based on only on contrast (no luminance masking), ndarray of dimension (T,), where T is the number of frames contrast_map_array : ndarray The ssim contrast-only maps, ndarray of dimension (T,M-10, N-10), where T is the number of frames, and MxN are the widthxheight References ---------- .. [#f1] Z. Wang, A. C. Bovik, H. R. Sheikh, and E. P. Simoncelli, "Image quality assessment: From error measurement to structural similarity" IEEE Transactions on Image Processing, vol. 13, no. 1, Jan. 2004. """ referenceVideoData = vshape(referenceVideoData) distortedVideoData = vshape(distortedVideoData) assert(referenceVideoData.shape == distortedVideoData.shape) T, M, N, C = referenceVideoData.shape assert C == 1, "ssim called with videos containing %d channels. Please supply only the luminance channel" % (C,) ssim_maps = np.zeros((T, M-10, N-10), dtype=np.float32) contrast_maps = np.zeros((T, M-10, N-10), dtype=np.float32) ssim_scores = np.zeros(T, dtype=np.float32) contrast_scores = np.zeros(T, dtype=np.float32) for t in range(T): mssim, ssim_map, mcs, cs_map = _ssim_core(referenceVideoData[t, :, :, 0], distortedVideoData[t, :, :, 0], K_1 = K_1, K_2 = K_2, bitdepth=bitdepth, scaleFix=scaleFix, avg_window=avg_window) ssim_scores[t] = mssim contrast_scores[t] = mcs ssim_maps[t] = ssim_map contrast_maps[t] = cs_map return ssim_scores, ssim_maps, contrast_scores, contrast_maps