Source code for skvideo.measure.msssim

from ..utils import *
from .ssim import *
import numpy as np
import scipy.ndimage

def compute_msssim(frame1, frame2, method='product'):
    extend_mode = 'constant'
    avg_window = np.array(gen_gauss_window(5, 1.5))
    K_1 = 0.01
    K_2 = 0.03
    level = 5
    weight1 = np.array([0.0448, 0.2856, 0.3001, 0.2363, 0.1333])
    weight2 = weight1.copy()
    weight2 /= np.sum(weight2)

    downsample_filter = np.ones(2, dtype=np.float32)/2.0

    im1 = frame1.astype(np.float32)
    im2 = frame2.astype(np.float32)

    overall_mssim1 = []
    overall_mssim2 = []
    for i in range(level):
      mssim_array, ssim_map_array, mcs_array, cs_map_array = ssim_full(im1, im2, K_1 = K_1, K_2 = K_2, avg_window = avg_window)
      mssim_array = mssim_array[0]
      ssim_map_array = ssim_map_array[0]
      mcs_array = mcs_array[0]
      cs_map_array = cs_map_array[0]
      filtered_im1 = scipy.ndimage.correlate1d(im1, downsample_filter, 0)
      filtered_im1 = scipy.ndimage.correlate1d(filtered_im1, downsample_filter, 1)
      filtered_im1 = filtered_im1[1:, 1:]

      filtered_im2 = scipy.ndimage.correlate1d(im2, downsample_filter, 0)
      filtered_im2 = scipy.ndimage.correlate1d(filtered_im2, downsample_filter, 1)
      filtered_im2 = filtered_im2[1:, 1:]

      im1 = filtered_im1[::2, ::2]
      im2 = filtered_im2[::2, ::2]

      if i != level-1:
        overall_mssim1.append(mcs_array**weight1[i])
        overall_mssim2.append(mcs_array*weight2[i])

    if method == "product":
      overall_mssim = np.product(overall_mssim1) * mssim_array
    else:
      overall_mssim = np.sum(overall_mssim2) + mssim_array

    return overall_mssim


[docs]def msssim(referenceVideoData, distortedVideoData, method='product'):
    """Computes Multiscale Structural Similarity (MS-SSIM) Index. [#f1]_

    Both video inputs are compared frame-by-frame to obtain T
    MS-SSIM measurements on the luminance channel.

    Parameters
    ----------
    referenceVideoData : ndarray
        Reference video, ndarray of dimension (T, M, N, C), (T, M, N), (M, N, C), or (M, N),
        where T is the number of frames, M is the height, N is width,
        and C is number of channels. Here C is only allowed to be 1.

    distortedVideoData : ndarray
        Distorted video, ndarray of dimension (T, M, N, C), (T, M, N), (M, N, C), or (M, N),
        where T is the number of frames, M is the height, N is width,
        and C is number of channels. Here C is only allowed to be 1.

    method : str
        Whether to use "product" (default) or to use "sum" for combing multiple scales into the single score.

    Returns
    -------
    msssim_array : ndarray
        The MS-SSIM results, ndarray of dimension (T,), where T
        is the number of frames

    References
    ----------

    .. [#f1] Z. Wang, E. P. Simoncelli and A. C. Bovik, "Multi-scale structural similarity for image quality assessment," IEEE Asilomar Conference Signals, Systems and Computers, Nov. 2003.

    """

    referenceVideoData = vshape(referenceVideoData)
    distortedVideoData = vshape(distortedVideoData)

    assert(referenceVideoData.shape == distortedVideoData.shape)

    T, M, N, C = referenceVideoData.shape

    assert C == 1, "MS-SSIM called with videos containing %d channels. Please supply only the luminance channel" % (C,)
    assert (M >= 176) & (N >= 176), "You supplied a resolution of %dx%d. MS-SSIM can only be used with videos large enough having multiple scales. Please use only with resolutions >= 176x176." % (M, N)

    scores = np.zeros(T, dtype=np.float32)
    for t in range(T):
        referenceFrame = referenceVideoData[t, :, :, 0].astype(np.float32)
        distortedFrame = distortedVideoData[t, :, :, 0].astype(np.float32)
    
        scores[t] = compute_msssim(referenceFrame, distortedFrame, method=method)

    return scores