VTM中添加SSIM输出

简介

本文主要介绍了在VTM中计算每帧SSIM的方法和步骤

分析

主要解决的几个问题：

数据存在哪里？SSIM作为一种失真度量，可以存在PSNR,MSE相同的Analyze类中
存储之后会进行哪些操作？计算每帧的YUV SSIM，最后输出整体SSIM
在哪里调用？编码完一帧后，计算PSNR之后调用计算每帧SSIM；最后输出summary时输出整个序列的SSIM
步骤

Typedef.h 中加入宏

1	#define PrintSSIM 1

Analyze.h 中成员和函数

成员

ssim数组

Analyze.h 中，在class Analyze的private成员中加入double的ssim数组

1
2
3

#if PrintSSIM
  double    m_MSSSIM[MAX_NUM_COMPONENT];
#endif

函数声明

Analyze::addResult

Analyze.h 中，在class Analyze的private成员中加入新的addResult函数,传递SSIM参数。此处用于累加每帧SSIM用来输出最后sequence level的SSIM

#if PrintSSIM 
  void  addResult(double psnr[MAX_NUM_COMPONENT], double bits, const double MSEyuvframe[MAX_NUM_COMPONENT], const double MSSSIM[MAX_NUM_COMPONENT], bool isEncodeLtRef)
#else
  void  addResult( double psnr[MAX_NUM_COMPONENT], double bits, const double MSEyuvframe[MAX_NUM_COMPONENT]
    , bool isEncodeLtRef
  )
#endif

新增访问ssim函数：getMsssim

#if PrintSSIM
  double   getMsssim(ComponentID compID) const { return  m_MSSSIM[compID]; }
#endif
  double  getPsnr(ComponentID compID) const { return  m_dPSNRSum[compID];  }
  double  getBits()                   const { return  m_dAddBits;   }
  void    setBits(double numBits)     { m_dAddBits = numBits; }
  uint32_t    getNumPic()                 const { return  m_uiNumPic;   }

函数定义：

Analyze::addResult

Analyze.h 中，addResult，SSIM数据的累加更新：

    for(uint32_t i=0; i<MAX_NUM_COMPONENT; i++)
    {
      m_dPSNRSum[i] += psnr[i];
      m_MSEyuvframe[i] += MSEyuvframe[i];
#if PrintSSIM
      m_MSSSIM[i] += MSSSIM[i];
#endif
    }

Analyze::clear

Analyze.h 中，clear，SSIM数据归零：

      m_dPSNRSum[i] = 0;
      m_MSEyuvframe[i] = 0;
#if PrintSSIM
      m_MSSSIM[i] = 0;
#endif

Analyze::printout

Analyze.h 中，printout函数，负责输出最后的summary
一共8个地方需要添加：

Y-MS-SSIM标志位
具体SSIM的数值，
依据是否为printMSEBasedSNR
400和其他的yuv格式

8个地方的位置可以通过if (printSequenceMSE)来查找，直接放在if (printSequenceMSE)之前：

400：
#if PrintSSIM          
          msg(e_msg_level, "    Y-MS-SSIM");
#endif

#if PrintSSIM
          msg(e_msg_level, "    %8.6lf", getMsssim(COMPONENT_Y) / (double)getNumPic());
#endif

其他
#if PrintSSIM
              //printf("   Y-MS-SSIM    " "U-MS-SSIM    " "V-MS-SSIM ");
            msg(e_msg_level, "    Y-MS-SSIM     "  "U-MS-SSIM     "  "V-MS-SSIM     ");
#endif

#if PrintSSIM
            /*
            printf("    %8.6lf     " "%8.6lf     " "%8.6lf ",
              getMsssim(COMPONENT_Y) / (double)getNumPic(),
              getMsssim(COMPONENT_Cb) / (double)getNumPic(),
              getMsssim(COMPONENT_Cr) / (double)getNumPic());
              */
            msg(e_msg_level, "    %8.6lf     " "%8.6lf     " "%8.6lf ",
              getMsssim(COMPONENT_Y) / (double)getNumPic(),
              getMsssim(COMPONENT_Cb) / (double)getNumPic(),
              getMsssim(COMPONENT_Cr) / (double)getNumPic());
#endif

EncGOP中的成员和函数

成员

没有需要添加的成员

函数声明

新增EncGOP::xCalculateAddPSNR

EncGOP.h中新增xCalculateMSSSIM，用来计算图片的SSIM

#if PrintSSIM
  double xCalculateMSSSIM(const CPelBuf pcPic, const uint32_t orgStride, const CPelBuf cPicD, const uint32_t recStride, const uint32_t width, const uint32_t height, const uint32_t bitDepth);
#endif
  uint64_t xFindDistortionPlane(const CPelBuf& pic0, const CPelBuf& pic1, const uint32_t rshift

函数定义

EncGOP::xCalculateAddPSNR

一共有2个地方

调用函数计算ssim
重载analyze模块的addResult函数

calculate PSNR之后和#if EXTENSION_360_VIDEO之后

#if PrintSSIM
  double  MSSSIM[MAX_NUM_COMPONENT] = { 0,0,0 };
  for (int chan = 0; chan<::getNumberValidComponents(formatD); chan++)
  {
    const ComponentID compID = ComponentID(chan);

    const CPelBuf&    p = picC.get(compID);
    const CPelBuf&    o = org.get(compID);
    const int   orgStride = o.stride;

    const int   recStride = p.stride;
    const uint32_t   width = p.width - (m_pcEncLib->getPad(0) >> ::getComponentScaleX(compID, format));
    const uint32_t   height = p.height - (m_pcEncLib->getPad(1) >> (!!bPicIsField + ::getComponentScaleY(compID, format)));
    const uint32_t    bitDepth = sps.getBitDepth(toChannelType(compID));

    MSSSIM[compID] = xCalculateMSSSIM(o, orgStride, p, recStride, width, height, bitDepth);
  }
#endif

调用addPSNR时,选择新函数传递ssim成员,
all,I,P,B一共4个

#if PrintSSIM
  m_gcAnalyzeAll.addResult(dPSNR, (double)uibits, MSEyuvframe, MSSSIM, isEncodeLtRef);
#else
  m_gcAnalyzeAll.addResult(dPSNR, (double)uibits, MSEyuvframe
    , isEncodeLtRef
  );
#endif


#if PrintSSIM
    m_gcAnalyzeI.addResult(dPSNR, (double)uibits, MSEyuvframe, MSSSIM, isEncodeLtRef);
#else
    m_gcAnalyzeI.addResult(dPSNR, (double)uibits, MSEyuvframe
      , isEncodeLtRef
    );
#endif

#if PrintSSIM
    m_gcAnalyzeP.addResult(dPSNR, (double)uibits, MSEyuvframe, MSSSIM, isEncodeLtRef);
#else
    m_gcAnalyzeP.addResult(dPSNR, (double)uibits, MSEyuvframe
      , isEncodeLtRef
    );
#endif

#if PrintSSIM
    m_gcAnalyzeB.addResult(dPSNR, (double)uibits, MSEyuvframe, MSSSIM, isEncodeLtRef);
#else
    m_gcAnalyzeB.addResult(dPSNR, (double)uibits, MSEyuvframe
      , isEncodeLtRef
    );
#endif

#if PrintSSIM
    m_gcAnalyzeB.addResult(dPSNRWeighted, (double)uibits, MSEyuvframeWeighted, MSSSIM, isEncodeLtRef);
#else
    m_gcAnalyzeWPSNR.addResult(dPSNRWeighted, (double)uibits, MSEyuvframeWeighted, isEncodeLtRef);
#endif

输出每帧信息，if( printFrameMSE )之前

#if PrintSSIM
    printf(" [MS-SSIM Y %1.6lf    U %1.6lf    V %1.6lf]", MSSSIM[COMPONENT_Y], MSSSIM[COMPONENT_Cb], MSSSIM[COMPONENT_Cr]);
#endif

    if( printFrameMSE )
    {
      msg( NOTICE, " [Y MSE %6.4lf  U MSE %6.4lf  V MSE %6.4lf]", MSEyuvframe[COMPONENT_Y], MSEyuvframe[COMPONENT_Cb], MSEyuvframe[COMPONENT_Cr] );
    }

EncGOP::xCalculateMSSSIM

在EncGOP::xCalculateAddPSNR之后，定义xCalculateMSSSIM

#if PrintSSIM
double EncGOP::xCalculateMSSSIM(const CPelBuf pOrg, const uint32_t orgStride, const CPelBuf pRec, const uint32_t recStride, const uint32_t width, const uint32_t height, const uint32_t bitDepth)
{
  const int MAX_MSSSIM_SCALE = 5;
  const int WEIGHTING_MID_TAP = 5;
  const int WEIGHTING_SIZE = WEIGHTING_MID_TAP * 2 + 1;

  uint32_t maxScale;

  // For low resolution videos determine number of scales 
  if (width < 22 || height < 22)
  {
    maxScale = 1;
  }
  else if (width < 44 || height < 44)
  {
    maxScale = 2;
  }
  else if (width < 88 || height < 88)
  {
    maxScale = 3;
  }
  else if (width < 176 || height < 176)
  {
    maxScale = 4;
  }
  else
  {
    maxScale = 5;
  }

  assert(maxScale>0 && maxScale <= MAX_MSSSIM_SCALE);

  //Normalized Gaussian mask design, 11*11, s.d. 1.5
  double weights[WEIGHTING_SIZE][WEIGHTING_SIZE];
  {
    double coeffSum = 0.0;
    for (int y = 0; y<WEIGHTING_SIZE; y++)
    {
      for (int x = 0; x<WEIGHTING_SIZE; x++)
      {
        weights[y][x] = exp(-((y - WEIGHTING_MID_TAP)*(y - WEIGHTING_MID_TAP) + (x - WEIGHTING_MID_TAP)*(x - WEIGHTING_MID_TAP)) / (WEIGHTING_MID_TAP - 0.5));
        coeffSum += weights[y][x];
      }
    }

    for (int y = 0; y<WEIGHTING_SIZE; y++)
    {
      for (int x = 0; x<WEIGHTING_SIZE; x++)
      {
        weights[y][x] /= coeffSum;
      }
    }
  }

  //Resolution based weights
  const double exponentWeights[MAX_MSSSIM_SCALE][MAX_MSSSIM_SCALE] = { { 1.0,    0,      0,      0,      0 },
  { 0.1356, 0.8644, 0,      0,      0 },
  { 0.0711, 0.4530, 0.4760, 0,      0 },
  { 0.0517, 0.3295, 0.3462, 0.2726, 0 },
  { 0.0448, 0.2856, 0.3001, 0.2363, 0.1333 } };

  //Downsampling of data:
  std::vector<double> original[MAX_MSSSIM_SCALE];
  std::vector<double> recon[MAX_MSSSIM_SCALE];

  for (uint32_t scale = 0; scale<maxScale; scale++)
  {
    const int scaledHeight = height >> scale;
    const int scaledWidth = width >> scale;
    original[scale].resize(scaledHeight*scaledWidth, double(0));
    recon[scale].resize(scaledHeight*scaledWidth, double(0));
  }

  // Initial [0] arrays to be a copy of the source data (but stored in array "Double", not Pel array).
  for (int y = 0; y<height; y++)
  {
    for (int x = 0; x<width; x++)
    {
      original[0][y*width + x] = pOrg.buf[y*orgStride + x];
      recon[0][y*width + x] = pRec.buf[y*recStride + x];
      
    }
  }

  // Set up other arrays to be average value of each 2x2 sample.
  for (uint32_t scale = 1; scale<maxScale; scale++)
  {
    const int scaledHeight = height >> scale;
    const int scaledWidth = width >> scale;
    for (int y = 0; y<scaledHeight; y++)
    {
      for (int x = 0; x<scaledWidth; x++)
      {
        original[scale][y*scaledWidth + x] = (original[scale - 1][2 * y   *(2 * scaledWidth) + 2 * x] +
          original[scale - 1][2 * y   *(2 * scaledWidth) + 2 * x + 1] +
          original[scale - 1][(2 * y + 1)*(2 * scaledWidth) + 2 * x] +
          original[scale - 1][(2 * y + 1)*(2 * scaledWidth) + 2 * x + 1]) / 4.0;
        recon[scale][y*scaledWidth + x] = (recon[scale - 1][2 * y   *(2 * scaledWidth) + 2 * x] +
          recon[scale - 1][2 * y   *(2 * scaledWidth) + 2 * x + 1] +
          recon[scale - 1][(2 * y + 1)*(2 * scaledWidth) + 2 * x] +
          recon[scale - 1][(2 * y + 1)*(2 * scaledWidth) + 2 * x + 1]) / 4.0;
      }
    }
  }

  // Calculate MS-SSIM:
  const uint32_t   maxValue = (1 << bitDepth) - 1;
  const double c1 = (0.01*maxValue)*(0.01*maxValue);
  const double c2 = (0.03*maxValue)*(0.03*maxValue);

  double finalMSSSIM = 1.0;

  for (uint32_t scale = 0; scale<maxScale; scale++)
  {
    const int scaledHeight = height >> scale;
    const int scaledWidth = width >> scale;
    const int blocksPerRow = scaledWidth - WEIGHTING_SIZE + 1;
    const int blocksPerColumn = scaledHeight - WEIGHTING_SIZE + 1;
    const int totalBlocks = blocksPerRow * blocksPerColumn;

    double meanSSIM = 0.0;

    for (int blockIndexY = 0; blockIndexY<blocksPerColumn; blockIndexY++)
    {
      for (int blockIndexX = 0; blockIndexX<blocksPerRow; blockIndexX++)
      {
        double muOrg = 0.0;
        double muRec = 0.0;
        double muOrigSqr = 0.0;
        double muRecSqr = 0.0;
        double muOrigMultRec = 0.0;

        for (int y = 0; y<WEIGHTING_SIZE; y++)
        {
          for (int x = 0; x<WEIGHTING_SIZE; x++)
          {
            const double gaussianWeight = weights[y][x];
            const int    sampleOffset = (blockIndexY + y)*scaledWidth + (blockIndexX + x);
            const double orgPel = original[scale][sampleOffset];
            const double recPel = recon[scale][sampleOffset];

            muOrg += orgPel * gaussianWeight;
            muRec += recPel * gaussianWeight;
            muOrigSqr += orgPel * orgPel*gaussianWeight;
            muRecSqr += recPel * recPel*gaussianWeight;
            muOrigMultRec += orgPel * recPel*gaussianWeight;
          }
        }

        const double sigmaSqrOrig = muOrigSqr - (muOrg*muOrg);
        const double sigmaSqrRec = muRecSqr - (muRec*muRec);
        const double sigmaOrigRec = muOrigMultRec - (muOrg*muRec);

        double blockSSIMVal = ((2.0*sigmaOrigRec + c2) / (sigmaSqrOrig + sigmaSqrRec + c2));
        if (scale == maxScale - 1)
        {
          blockSSIMVal *= (2.0*muOrg*muRec + c1) / (muOrg*muOrg + muRec * muRec + c1);
        }

        meanSSIM += blockSSIMVal;
      }
    }

    meanSSIM /= totalBlocks;

    finalMSSSIM *= pow(meanSSIM, exponentWeights[maxScale - 1][scale]);
  }

  return finalMSSSIM;
}
#endif

EncGOP::xCalculateInterlacedAddPSNR

xCalculateInterlacedAddPSNR中暂时不修改，因为现在不需要field coding。但需要增加两项内容避免报错：

计算ssim,在计算psnr之后和uint32_t uibits = 0;之前：

#if PrintSSIM
  //===== calculate MS-SSIM =====
  double MSSSIM[MAX_NUM_COMPONENT] = { 0,0,0 };
  /*
  
    for (int chan = 0; chan<numValidComponents; chan++)
    {
      const ComponentID ch = ComponentID(chan);
      assert(acPicRecFields[0]->getWidth(ch) == acPicRecFields[1]->getWidth(ch));
      assert(acPicRecFields[0]->getHeight(ch) == acPicRecFields[1]->getHeight(ch));

      double sumOverFieldsMSSSIM = 0.0;
      const uint32_t width = acPicRecFields[0].get(ch).width - (m_pcEncLib->getPad(0) >> ::getComponentScaleX(ch, format));
      const uint32_t height = acPicRecFields[0].get(ch).height - ((m_pcEncLib->getPad(1) >> 1) >> ::getComponentScaleY(ch, format));

      for (uint32_t fieldNum = 0; fieldNum<2; fieldNum++)
      {
        Picture    pcPic = *apcPicOrgFields[fieldNum];
        PelUnitBuf pcPicD = acPicRecFields[fieldNum];
        const CPelBuf&    p = pcPic.get(ch);
        const CPelBuf&    o = pcPicD.get(ch);
        const Pel*  pOrg = (conversion != IPCOLOURSPACE_UNCHANGED) ? pcPic.getPicYuvTrueOrg()->getAddr(ch) : pcPic->getPicYuvOrg()->getAddr(ch);
        const uint32_t   orgStride = (conversion != IPCOLOURSPACE_UNCHANGED) ? pcPic->getPicYuvTrueOrg()->getStride(ch) : pcPic->getPicYuvOrg()->getStride(ch);
        Pel*        pRec = pcPicD->getAddr(ch);
        const uint32_t   recStride = pcPicD->getStride(ch);
        const uint32_t  bitDepth = sps.getBitDepth(toChannelType(ch));

        sumOverFieldsMSSSIM += xCalculateMSSSIM(pOrg, orgStride, pRec, recStride, width, height, bitDepth);
      }

      MSSSIM[ch] = sumOverFieldsMSSSIM / 2;
    }
    */
#endif

addResult，和先前非field coding编码一样

#if PrintSSIM
  m_gcAnalyzeAll_in.addResult(dPSNR, (double)uibits, MSEyuvframe, MSSSIM, isEncodeLtRef);
#else
  m_gcAnalyzeAll_in.addResult(dPSNR, (double)uibits, MSEyuvframe
    , isEncodeLtRef
  );
#endif