from ..utils import *
import numpy as np
import scipy.ndimage
def _ssim_core(referenceVideoFrame, distortedVideoFrame, K_1, K_2, bitdepth, scaleFix, avg_window):
referenceVideoFrame = referenceVideoFrame.astype(np.float32)
distortedVideoFrame = distortedVideoFrame.astype(np.float32)
M, N = referenceVideoFrame.shape
extend_mode = 'constant'
if avg_window is None:
avg_window = gen_gauss_window(5, 1.5)
L = np.int(2**bitdepth - 1)
C1 = (K_1 * L)**2
C2 = (K_2 * L)**2
factor = np.int(np.max((1, np.round(np.min((M, N))/256.0))))
factor_lpf = np.ones((factor,factor), dtype=np.float32)
factor_lpf /= np.sum(factor_lpf)
if scaleFix:
M = np.int(np.round(np.float(M) / factor + 1e-9))
N = np.int(np.round(np.float(N) / factor + 1e-9))
mu1 = np.zeros((M, N), dtype=np.float32)
mu2 = np.zeros((M, N), dtype=np.float32)
var1 = np.zeros((M, N), dtype=np.float32)
var2 = np.zeros((M, N), dtype=np.float32)
var12 = np.zeros((M, N), dtype=np.float32)
# scale if enabled
if scaleFix and (factor > 1):
referenceVideoFrame = scipy.signal.correlate2d(referenceVideoFrame, factor_lpf, mode='same', boundary='symm')
distortedVideoFrame = scipy.signal.correlate2d(distortedVideoFrame, factor_lpf, mode='same', boundary='symm')
referenceVideoFrame = referenceVideoFrame[::factor, ::factor]
distortedVideoFrame = distortedVideoFrame[::factor, ::factor]
scipy.ndimage.correlate1d(referenceVideoFrame, avg_window, 0, mu1, mode=extend_mode)
scipy.ndimage.correlate1d(mu1, avg_window, 1, mu1, mode=extend_mode)
scipy.ndimage.correlate1d(distortedVideoFrame, avg_window, 0, mu2, mode=extend_mode)
scipy.ndimage.correlate1d(mu2, avg_window, 1, mu2, mode=extend_mode)
mu1_sq = mu1**2
mu2_sq = mu2**2
mu1_mu2 = mu1 * mu2
scipy.ndimage.correlate1d(referenceVideoFrame**2, avg_window, 0, var1, mode=extend_mode)
scipy.ndimage.correlate1d(var1, avg_window, 1, var1, mode=extend_mode)
scipy.ndimage.correlate1d(distortedVideoFrame**2, avg_window, 0, var2, mode=extend_mode)
scipy.ndimage.correlate1d(var2, avg_window, 1, var2, mode=extend_mode)
scipy.ndimage.correlate1d(referenceVideoFrame * distortedVideoFrame, avg_window, 0, var12, mode=extend_mode)
scipy.ndimage.correlate1d(var12, avg_window, 1, var12, mode=extend_mode)
sigma1_sq = var1 - mu1_sq
sigma2_sq = var2 - mu2_sq
sigma12 = var12 - mu1_mu2
ssim_map = ((2 * mu1_mu2 + C1) * (2 * sigma12 + C2)) / ((mu1_sq + mu2_sq + C1) * (sigma1_sq + sigma2_sq + C2))
cs_map = (2*sigma12 + C2)/(sigma1_sq + sigma2_sq + C2)
ssim_map = ssim_map[5:-5, 5:-5]
cs_map = cs_map[5:-5, 5:-5]
mssim = np.mean(ssim_map)
mcs = np.mean(cs_map)
return mssim, ssim_map, mcs, cs_map
[docs]def ssim(referenceVideoData, distortedVideoData, K_1 = 0.01, K_2 = 0.03, bitdepth=8, scaleFix=True, avg_window=None):
"""Computes Structural Similarity (SSIM) Index. [#f1]_
Both video inputs are compared frame-by-frame to obtain T
SSIM measurements on the luminance channel.
Parameters
----------
referenceVideoData : ndarray
Reference video, ndarray of dimension (T, M, N, C), (T, M, N), (M, N, C), or (M, N),
where T is the number of frames, M is the height, N is width,
and C is number of channels. Here C is only allowed to be 1.
distortedVideoData : ndarray
Distorted video, ndarray of dimension (T, M, N, C), (T, M, N), (M, N, C), or (M, N),
where T is the number of frames, M is the height, N is width,
and C is number of channels. Here C is only allowed to be 1.
K_1 : float
Luminance saturation weight
K_2 : float
Contrast saturation weight
bitdepth : int
The number of bits each pixel effectively has
scaleFix : bool
Whether to scale the input frame size based on assumed distance, to improve subjective correlation.
avg_window : ndarray
2-d averaging window, normalized to unit volume.
Returns
-------
ssim_array : ndarray
The ssim results, ndarray of dimension (T,), where T
is the number of frames
References
----------
.. [#f1] Z. Wang, A. C. Bovik, H. R. Sheikh, and E. P. Simoncelli, "Image quality assessment: From error measurement to structural similarity" IEEE Transactions on Image Processing, vol. 13, no. 1, Jan. 2004.
"""
referenceVideoData = vshape(referenceVideoData)
distortedVideoData = vshape(distortedVideoData)
assert(referenceVideoData.shape == distortedVideoData.shape)
T, M, N, C = referenceVideoData.shape
assert C == 1, "ssim called with videos containing %d channels. Please supply only the luminance channel" % (C,)
ssim_scores = np.zeros(T, dtype=np.float32)
for t in range(T):
mssim, ssim_map, mcs, cs_map = _ssim_core(referenceVideoData[t, :, :, 0], distortedVideoData[t, :, :, 0], K_1 = K_1, K_2 = K_2, bitdepth=bitdepth, scaleFix=scaleFix, avg_window=avg_window)
ssim_scores[t] = mssim
return ssim_scores
def ssim_full(referenceVideoData, distortedVideoData, K_1 = 0.01, K_2 = 0.03, bitdepth=8, scaleFix=True, avg_window=None):
"""Returns all parameters from the Structural Similarity (SSIM) Index. [#f1]_
Both video inputs are compared frame-by-frame to obtain T
SSIM measurements on the luminance channel.
Parameters
----------
referenceVideoData : ndarray
Reference video, ndarray of dimension (T, M, N, C), (T, M, N), (M, N, C), or (M, N),
where T is the number of frames, M is the height, N is width,
and C is number of channels. Here C is only allowed to be 1.
distortedVideoData : ndarray
Distorted video, ndarray of dimension (T, M, N, C), (T, M, N), (M, N, C), or (M, N),
where T is the number of frames, M is the height, N is width,
and C is number of channels. Here C is only allowed to be 1.
K_1 : float
Luminance saturation weight
K_2 : float
Contrast saturation weight
bitdepth : int
The number of bits each pixel effectively has
scaleFix : bool
Whether to scale the input frame size based on assumed distance, to improve subjective correlation.
avg_window : ndarray
2-d averaging window, normalized to unit volume.
Returns
-------
ssim_array : ndarray
The ssim results, ndarray of dimension (T,), where T
is the number of frames
ssim_map_array : ndarray
The ssim maps, ndarray of dimension (T,M-10, N-10), where T
is the number of frames, and MxN are the widthxheight
contrast_array : ndarray
The ssim result based on only on contrast (no luminance masking),
ndarray of dimension (T,), where T is the number of frames
contrast_map_array : ndarray
The ssim contrast-only maps, ndarray of dimension (T,M-10, N-10), where T
is the number of frames, and MxN are the widthxheight
References
----------
.. [#f1] Z. Wang, A. C. Bovik, H. R. Sheikh, and E. P. Simoncelli, "Image quality assessment: From error measurement to structural similarity" IEEE Transactions on Image Processing, vol. 13, no. 1, Jan. 2004.
"""
referenceVideoData = vshape(referenceVideoData)
distortedVideoData = vshape(distortedVideoData)
assert(referenceVideoData.shape == distortedVideoData.shape)
T, M, N, C = referenceVideoData.shape
assert C == 1, "ssim called with videos containing %d channels. Please supply only the luminance channel" % (C,)
ssim_maps = np.zeros((T, M-10, N-10), dtype=np.float32)
contrast_maps = np.zeros((T, M-10, N-10), dtype=np.float32)
ssim_scores = np.zeros(T, dtype=np.float32)
contrast_scores = np.zeros(T, dtype=np.float32)
for t in range(T):
mssim, ssim_map, mcs, cs_map = _ssim_core(referenceVideoData[t, :, :, 0], distortedVideoData[t, :, :, 0], K_1 = K_1, K_2 = K_2, bitdepth=bitdepth, scaleFix=scaleFix, avg_window=avg_window)
ssim_scores[t] = mssim
contrast_scores[t] = mcs
ssim_maps[t] = ssim_map
contrast_maps[t] = cs_map
return ssim_scores, ssim_maps, contrast_scores, contrast_maps