Import Upstream version 0.16.0

author: qinxialei <xialeiqin@gmail.com> 2020-10-29 11:26:59 +0800
committer: qinxialei <xialeiqin@gmail.com> 2020-10-29 11:26:59 +0800
commit: e8d277081293b6fb2a5d469616baaa7a06f52496 (patch)
tree: 1179bb07d3927d1837d4a90bd81b2034c4c696a9 /src/dsp/film_grain.cc
download: libgav1-e8d277081293b6fb2a5d469616baaa7a06f52496.tar.gz
libgav1-e8d277081293b6fb2a5d469616baaa7a06f52496.tar.bz2
libgav1-e8d277081293b6fb2a5d469616baaa7a06f52496.zip
1 files changed, 870 insertions, 0 deletions
diff --git a/src/dsp/film_grain.cc b/src/dsp/film_grain.cc
new file mode 100644
index 0000000..41d1dd0
--- /dev/null
+++ b/src/dsp/film_grain.cc
@@ -0,0 +1,870 @@
+// Copyright 2019 The libgav1 Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "src/dsp/film_grain.h"
+
+#include <algorithm>
+#include <cassert>
+#include <cstddef>
+#include <cstdint>
+#include <cstring>
+#include <new>
+
+#include "src/dsp/common.h"
+#include "src/dsp/constants.h"
+#include "src/dsp/dsp.h"
+#include "src/dsp/film_grain_common.h"
+#include "src/utils/array_2d.h"
+#include "src/utils/common.h"
+#include "src/utils/compiler_attributes.h"
+#include "src/utils/logging.h"
+
+namespace libgav1 {
+namespace dsp {
+namespace film_grain {
+namespace {
+
+// Making this a template function prevents it from adding to code size when it
+// is not placed in the DSP table. Most functions in the dsp directory change
+// behavior by bitdepth, but because this one doesn't, it receives a dummy
+// parameter with one enforced value, ensuring only one copy is made.
+template <int singleton>
+void InitializeScalingLookupTable_C(
+    int num_points, const uint8_t point_value[], const uint8_t point_scaling[],
+    uint8_t scaling_lut[kScalingLookupTableSize]) {
+  static_assert(singleton == 0,
+                "Improper instantiation of InitializeScalingLookupTable_C. "
+                "There should be only one copy of this function.");
+  if (num_points == 0) {
+    memset(scaling_lut, 0, sizeof(scaling_lut[0]) * kScalingLookupTableSize);
+    return;
+  }
+  static_assert(sizeof(scaling_lut[0]) == 1, "");
+  memset(scaling_lut, point_scaling[0], point_value[0]);
+  for (int i = 0; i < num_points - 1; ++i) {
+    const int delta_y = point_scaling[i + 1] - point_scaling[i];
+    const int delta_x = point_value[i + 1] - point_value[i];
+    const int delta = delta_y * ((65536 + (delta_x >> 1)) / delta_x);
+    for (int x = 0; x < delta_x; ++x) {
+      const int v = point_scaling[i] + ((x * delta + 32768) >> 16);
+      assert(v >= 0 && v <= UINT8_MAX);
+      scaling_lut[point_value[i] + x] = v;
+    }
+  }
+  const uint8_t last_point_value = point_value[num_points - 1];
+  memset(&scaling_lut[last_point_value], point_scaling[num_points - 1],
+         kScalingLookupTableSize - last_point_value);
+}
+
+// Section 7.18.3.5.
+// Performs a piecewise linear interpolation into the scaling table.
+template <int bitdepth>
+int ScaleLut(const uint8_t scaling_lut[kScalingLookupTableSize], int index) {
+  const int shift = bitdepth - 8;
+  const int quotient = index >> shift;
+  const int remainder = index - (quotient << shift);
+  if (bitdepth == 8) {
+    assert(quotient < kScalingLookupTableSize);
+    return scaling_lut[quotient];
+  }
+  assert(quotient + 1 < kScalingLookupTableSize);
+  const int start = scaling_lut[quotient];
+  const int end = scaling_lut[quotient + 1];
+  return start + RightShiftWithRounding((end - start) * remainder, shift);
+}
+
+// Applies an auto-regressive filter to the white noise in luma_grain.
+template <int bitdepth, typename GrainType>
+void ApplyAutoRegressiveFilterToLumaGrain_C(const FilmGrainParams& params,
+                                            void* luma_grain_buffer) {
+  auto* luma_grain = static_cast<GrainType*>(luma_grain_buffer);
+  const int grain_min = GetGrainMin<bitdepth>();
+  const int grain_max = GetGrainMax<bitdepth>();
+  const int auto_regression_coeff_lag = params.auto_regression_coeff_lag;
+  assert(auto_regression_coeff_lag > 0 && auto_regression_coeff_lag <= 3);
+  // A pictorial representation of the auto-regressive filter for various values
+  // of auto_regression_coeff_lag. The letter 'O' represents the current sample.
+  // (The filter always operates on the current sample with filter
+  // coefficient 1.) The letters 'X' represent the neighboring samples that the
+  // filter operates on.
+  //
+  // auto_regression_coeff_lag == 3:
+  //   X X X X X X X
+  //   X X X X X X X
+  //   X X X X X X X
+  //   X X X O
+  // auto_regression_coeff_lag == 2:
+  //     X X X X X
+  //     X X X X X
+  //     X X O
+  // auto_regression_coeff_lag == 1:
+  //       X X X
+  //       X O
+  // auto_regression_coeff_lag == 0:
+  //         O
+  //
+  // Note that if auto_regression_coeff_lag is 0, the filter is the identity
+  // filter and therefore can be skipped. This implementation assumes it is not
+  // called in that case.
+  const int shift = params.auto_regression_shift;
+  for (int y = kAutoRegressionBorder; y < kLumaHeight; ++y) {
+    for (int x = kAutoRegressionBorder; x < kLumaWidth - kAutoRegressionBorder;
+         ++x) {
+      int sum = 0;
+      int pos = 0;
+      int delta_row = -auto_regression_coeff_lag;
+      // The last iteration (delta_row == 0) is shorter and is handled
+      // separately.
+      do {
+        int delta_column = -auto_regression_coeff_lag;
+        do {
+          const int coeff = params.auto_regression_coeff_y[pos];
+          sum += luma_grain[(y + delta_row) * kLumaWidth + (x + delta_column)] *
+                 coeff;
+          ++pos;
+        } while (++delta_column <= auto_regression_coeff_lag);
+      } while (++delta_row < 0);
+      // Last iteration: delta_row == 0.
+      {
+        int delta_column = -auto_regression_coeff_lag;
+        do {
+          const int coeff = params.auto_regression_coeff_y[pos];
+          sum += luma_grain[y * kLumaWidth + (x + delta_column)] * coeff;
+          ++pos;
+        } while (++delta_column < 0);
+      }
+      luma_grain[y * kLumaWidth + x] = Clip3(
+          luma_grain[y * kLumaWidth + x] + RightShiftWithRounding(sum, shift),
+          grain_min, grain_max);
+    }
+  }
+}
+
+template <int bitdepth, typename GrainType, int auto_regression_coeff_lag,
+          bool use_luma>
+void ApplyAutoRegressiveFilterToChromaGrains_C(const FilmGrainParams& params,
+                                               const void* luma_grain_buffer,
+                                               int subsampling_x,
+                                               int subsampling_y,
+                                               void* u_grain_buffer,
+                                               void* v_grain_buffer) {
+  static_assert(
+      auto_regression_coeff_lag >= 0 && auto_regression_coeff_lag <= 3,
+      "Unsupported autoregression lag for chroma.");
+  const auto* luma_grain = static_cast<const GrainType*>(luma_grain_buffer);
+  const int grain_min = GetGrainMin<bitdepth>();
+  const int grain_max = GetGrainMax<bitdepth>();
+  auto* u_grain = static_cast<GrainType*>(u_grain_buffer);
+  auto* v_grain = static_cast<GrainType*>(v_grain_buffer);
+  const int shift = params.auto_regression_shift;
+  const int chroma_height =
+      (subsampling_y == 0) ? kMaxChromaHeight : kMinChromaHeight;
+  const int chroma_width =
+      (subsampling_x == 0) ? kMaxChromaWidth : kMinChromaWidth;
+  for (int y = kAutoRegressionBorder; y < chroma_height; ++y) {
+    const int luma_y =
+        ((y - kAutoRegressionBorder) << subsampling_y) + kAutoRegressionBorder;
+    for (int x = kAutoRegressionBorder;
+         x < chroma_width - kAutoRegressionBorder; ++x) {
+      int sum_u = 0;
+      int sum_v = 0;
+      int pos = 0;
+      int delta_row = -auto_regression_coeff_lag;
+      do {
+        int delta_column = -auto_regression_coeff_lag;
+        do {
+          if (delta_row == 0 && delta_column == 0) {
+            break;
+          }
+          const int coeff_u = params.auto_regression_coeff_u[pos];
+          const int coeff_v = params.auto_regression_coeff_v[pos];
+          sum_u +=
+              u_grain[(y + delta_row) * chroma_width + (x + delta_column)] *
+              coeff_u;
+          sum_v +=
+              v_grain[(y + delta_row) * chroma_width + (x + delta_column)] *
+              coeff_v;
+          ++pos;
+        } while (++delta_column <= auto_regression_coeff_lag);
+      } while (++delta_row <= 0);
+      if (use_luma) {
+        int luma = 0;
+        const int luma_x = ((x - kAutoRegressionBorder) << subsampling_x) +
+                           kAutoRegressionBorder;
+        int i = 0;
+        do {
+          int j = 0;
+          do {
+            luma += luma_grain[(luma_y + i) * kLumaWidth + (luma_x + j)];
+          } while (++j <= subsampling_x);
+        } while (++i <= subsampling_y);
+        luma = SubsampledValue(luma, subsampling_x + subsampling_y);
+        const int coeff_u = params.auto_regression_coeff_u[pos];
+        const int coeff_v = params.auto_regression_coeff_v[pos];
+        sum_u += luma * coeff_u;
+        sum_v += luma * coeff_v;
+      }
+      u_grain[y * chroma_width + x] = Clip3(
+          u_grain[y * chroma_width + x] + RightShiftWithRounding(sum_u, shift),
+          grain_min, grain_max);
+      v_grain[y * chroma_width + x] = Clip3(
+          v_grain[y * chroma_width + x] + RightShiftWithRounding(sum_v, shift),
+          grain_min, grain_max);
+    }
+  }
+}
+
+// This implementation is for the condition overlap_flag == false.
+template <int bitdepth, typename GrainType>
+void ConstructNoiseStripes_C(const void* grain_buffer, int grain_seed,
+                             int width, int height, int subsampling_x,
+                             int subsampling_y, void* noise_stripes_buffer) {
+  auto* noise_stripes =
+      static_cast<Array2DView<GrainType>*>(noise_stripes_buffer);
+  const auto* grain = static_cast<const GrainType*>(grain_buffer);
+  const int half_width = DivideBy2(width + 1);
+  const int half_height = DivideBy2(height + 1);
+  assert(half_width > 0);
+  assert(half_height > 0);
+  static_assert(kLumaWidth == kMaxChromaWidth,
+                "kLumaWidth width should be equal to kMaxChromaWidth");
+  const int grain_width =
+      (subsampling_x == 0) ? kMaxChromaWidth : kMinChromaWidth;
+  const int plane_width = (width + subsampling_x) >> subsampling_x;
+  constexpr int kNoiseStripeHeight = 34;
+  int luma_num = 0;
+  int y = 0;
+  do {
+    GrainType* const noise_stripe = (*noise_stripes)[luma_num];
+    uint16_t seed = grain_seed;
+    seed ^= ((luma_num * 37 + 178) & 255) << 8;
+    seed ^= ((luma_num * 173 + 105) & 255);
+    int x = 0;
+    do {
+      const int rand = GetFilmGrainRandomNumber(8, &seed);
+      const int offset_x = rand >> 4;
+      const int offset_y = rand & 15;
+      const int plane_offset_x =
+          (subsampling_x != 0) ? 6 + offset_x : 9 + offset_x * 2;
+      const int plane_offset_y =
+          (subsampling_y != 0) ? 6 + offset_y : 9 + offset_y * 2;
+      int i = 0;
+      do {
+        // Section 7.18.3.5 says:
+        //   noiseStripe[ lumaNum ][ 0 ] is 34 samples high and w samples
+        //   wide (a few additional samples across are actually written to
+        //   the array, but these are never read) ...
+        //
+        // Note: The warning in the parentheses also applies to
+        // noiseStripe[ lumaNum ][ 1 ] and noiseStripe[ lumaNum ][ 2 ].
+        //
+        // Writes beyond the width of each row could happen below. To
+        // prevent those writes, we clip the number of pixels to copy against
+        // the remaining width.
+        // TODO(petersonab): Allocate aligned stripes with extra width to cover
+        // the size of the final stripe block, then remove this call to min.
+        const int copy_size =
+            std::min(kNoiseStripeHeight >> subsampling_x,
+                     plane_width - (x << (1 - subsampling_x)));
+        memcpy(&noise_stripe[i * plane_width + (x << (1 - subsampling_x))],
+               &grain[(plane_offset_y + i) * grain_width + plane_offset_x],
+               copy_size * sizeof(noise_stripe[0]));
+      } while (++i < (kNoiseStripeHeight >> subsampling_y));
+      x += 16;
+    } while (x < half_width);
+
+    ++luma_num;
+    y += 16;
+  } while (y < half_height);
+}
+
+// This implementation is for the condition overlap_flag == true.
+template <int bitdepth, typename GrainType>
+void ConstructNoiseStripesWithOverlap_C(const void* grain_buffer,
+                                        int grain_seed, int width, int height,
+                                        int subsampling_x, int subsampling_y,
+                                        void* noise_stripes_buffer) {
+  auto* noise_stripes =
+      static_cast<Array2DView<GrainType>*>(noise_stripes_buffer);
+  const auto* grain = static_cast<const GrainType*>(grain_buffer);
+  const int half_width = DivideBy2(width + 1);
+  const int half_height = DivideBy2(height + 1);
+  assert(half_width > 0);
+  assert(half_height > 0);
+  static_assert(kLumaWidth == kMaxChromaWidth,
+                "kLumaWidth width should be equal to kMaxChromaWidth");
+  const int grain_width =
+      (subsampling_x == 0) ? kMaxChromaWidth : kMinChromaWidth;
+  const int plane_width = (width + subsampling_x) >> subsampling_x;
+  constexpr int kNoiseStripeHeight = 34;
+  int luma_num = 0;
+  int y = 0;
+  do {
+    GrainType* const noise_stripe = (*noise_stripes)[luma_num];
+    uint16_t seed = grain_seed;
+    seed ^= ((luma_num * 37 + 178) & 255) << 8;
+    seed ^= ((luma_num * 173 + 105) & 255);
+    // Begin special iteration for x == 0.
+    const int rand = GetFilmGrainRandomNumber(8, &seed);
+    const int offset_x = rand >> 4;
+    const int offset_y = rand & 15;
+    const int plane_offset_x =
+        (subsampling_x != 0) ? 6 + offset_x : 9 + offset_x * 2;
+    const int plane_offset_y =
+        (subsampling_y != 0) ? 6 + offset_y : 9 + offset_y * 2;
+    // The overlap computation only occurs when x > 0, so it is omitted here.
+    int i = 0;
+    do {
+      // TODO(petersonab): Allocate aligned stripes with extra width to cover
+      // the size of the final stripe block, then remove this call to min.
+      const int copy_size =
+          std::min(kNoiseStripeHeight >> subsampling_x, plane_width);
+      memcpy(&noise_stripe[i * plane_width],
+             &grain[(plane_offset_y + i) * grain_width + plane_offset_x],
+             copy_size * sizeof(noise_stripe[0]));
+    } while (++i < (kNoiseStripeHeight >> subsampling_y));
+    // End special iteration for x == 0.
+    for (int x = 16; x < half_width; x += 16) {
+      const int rand = GetFilmGrainRandomNumber(8, &seed);
+      const int offset_x = rand >> 4;
+      const int offset_y = rand & 15;
+      const int plane_offset_x =
+          (subsampling_x != 0) ? 6 + offset_x : 9 + offset_x * 2;
+      const int plane_offset_y =
+          (subsampling_y != 0) ? 6 + offset_y : 9 + offset_y * 2;
+      int i = 0;
+      do {
+        int j = 0;
+        int grain_sample =
+            grain[(plane_offset_y + i) * grain_width + plane_offset_x];
+        // The first pixel(s) of each segment of the noise_stripe are subject to
+        // the "overlap" computation.
+        if (subsampling_x == 0) {
+          // Corresponds to the line in the spec:
+          // if (j < 2 && x > 0)
+          // j = 0
+          int old = noise_stripe[i * plane_width + x * 2];
+          grain_sample = old * 27 + grain_sample * 17;
+          grain_sample =
+              Clip3(RightShiftWithRounding(grain_sample, 5),
+                    GetGrainMin<bitdepth>(), GetGrainMax<bitdepth>());
+          noise_stripe[i * plane_width + x * 2] = grain_sample;
+
+          // This check prevents overwriting for the iteration j = 1. The
+          // continue applies to the i-loop.
+          if (x * 2 + 1 >= plane_width) continue;
+          // j = 1
+          grain_sample =
+              grain[(plane_offset_y + i) * grain_width + plane_offset_x + 1];
+          old = noise_stripe[i * plane_width + x * 2 + 1];
+          grain_sample = old * 17 + grain_sample * 27;
+          grain_sample =
+              Clip3(RightShiftWithRounding(grain_sample, 5),
+                    GetGrainMin<bitdepth>(), GetGrainMax<bitdepth>());
+          noise_stripe[i * plane_width + x * 2 + 1] = grain_sample;
+          j = 2;
+        } else {
+          // Corresponds to the line in the spec:
+          // if (j == 0 && x > 0)
+          const int old = noise_stripe[i * plane_width + x];
+          grain_sample = old * 23 + grain_sample * 22;
+          grain_sample =
+              Clip3(RightShiftWithRounding(grain_sample, 5),
+                    GetGrainMin<bitdepth>(), GetGrainMax<bitdepth>());
+          noise_stripe[i * plane_width + x] = grain_sample;
+          j = 1;
+        }
+        // The following covers the rest of the loop over j as described in the
+        // spec.
+        //
+        // Section 7.18.3.5 says:
+        //   noiseStripe[ lumaNum ][ 0 ] is 34 samples high and w samples
+        //   wide (a few additional samples across are actually written to
+        //   the array, but these are never read) ...
+        //
+        // Note: The warning in the parentheses also applies to
+        // noiseStripe[ lumaNum ][ 1 ] and noiseStripe[ lumaNum ][ 2 ].
+        //
+        // Writes beyond the width of each row could happen below. To
+        // prevent those writes, we clip the number of pixels to copy against
+        // the remaining width.
+        // TODO(petersonab): Allocate aligned stripes with extra width to cover
+        // the size of the final stripe block, then remove this call to min.
+        const int copy_size =
+            std::min(kNoiseStripeHeight >> subsampling_x,
+                     plane_width - (x << (1 - subsampling_x))) -
+            j;
+        memcpy(&noise_stripe[i * plane_width + (x << (1 - subsampling_x)) + j],
+               &grain[(plane_offset_y + i) * grain_width + plane_offset_x + j],
+               copy_size * sizeof(noise_stripe[0]));
+      } while (++i < (kNoiseStripeHeight >> subsampling_y));
+    }
+
+    ++luma_num;
+    y += 16;
+  } while (y < half_height);
+}
+
+template <int bitdepth, typename GrainType>
+inline void WriteOverlapLine_C(const GrainType* noise_stripe_row,
+                               const GrainType* noise_stripe_row_prev,
+                               int plane_width, int grain_coeff, int old_coeff,
+                               GrainType* noise_image_row) {
+  int x = 0;
+  do {
+    int grain = noise_stripe_row[x];
+    const int old = noise_stripe_row_prev[x];
+    grain = old * old_coeff + grain * grain_coeff;
+    grain = Clip3(RightShiftWithRounding(grain, 5), GetGrainMin<bitdepth>(),
+                  GetGrainMax<bitdepth>());
+    noise_image_row[x] = grain;
+  } while (++x < plane_width);
+}
+
+template <int bitdepth, typename GrainType>
+void ConstructNoiseImageOverlap_C(const void* noise_stripes_buffer, int width,
+                                  int height, int subsampling_x,
+                                  int subsampling_y, void* noise_image_buffer) {
+  const auto* noise_stripes =
+      static_cast<const Array2DView<GrainType>*>(noise_stripes_buffer);
+  auto* noise_image = static_cast<Array2D<GrainType>*>(noise_image_buffer);
+  const int plane_width = (width + subsampling_x) >> subsampling_x;
+  const int plane_height = (height + subsampling_y) >> subsampling_y;
+  const int stripe_height = 32 >> subsampling_y;
+  const int stripe_mask = stripe_height - 1;
+  int y = stripe_height;
+  int luma_num = 1;
+  if (subsampling_y == 0) {
+    // Begin complete stripes section. This is when we are guaranteed to have
+    // two overlap rows in each stripe.
+    for (; y < (plane_height & ~stripe_mask); ++luma_num, y += stripe_height) {
+      const GrainType* noise_stripe = (*noise_stripes)[luma_num];
+      const GrainType* noise_stripe_prev = (*noise_stripes)[luma_num - 1];
+      // First overlap row.
+      WriteOverlapLine_C<bitdepth>(noise_stripe,
+                                   &noise_stripe_prev[32 * plane_width],
+                                   plane_width, 17, 27, (*noise_image)[y]);
+      // Second overlap row.
+      WriteOverlapLine_C<bitdepth>(&noise_stripe[plane_width],
+                                   &noise_stripe_prev[(32 + 1) * plane_width],
+                                   plane_width, 27, 17, (*noise_image)[y + 1]);
+    }
+    // End complete stripes section.
+
+    const int remaining_height = plane_height - y;
+    // Either one partial stripe remains (remaining_height  > 0),
+    // OR image is less than one stripe high (remaining_height < 0),
+    // OR all stripes are completed (remaining_height == 0).
+    if (remaining_height <= 0) {
+      return;
+    }
+    const GrainType* noise_stripe = (*noise_stripes)[luma_num];
+    const GrainType* noise_stripe_prev = (*noise_stripes)[luma_num - 1];
+    WriteOverlapLine_C<bitdepth>(noise_stripe,
+                                 &noise_stripe_prev[32 * plane_width],
+                                 plane_width, 17, 27, (*noise_image)[y]);
+
+    // Check if second overlap row is in the image.
+    if (remaining_height > 1) {
+      WriteOverlapLine_C<bitdepth>(&noise_stripe[plane_width],
+                                   &noise_stripe_prev[(32 + 1) * plane_width],
+                                   plane_width, 27, 17, (*noise_image)[y + 1]);
+    }
+  } else {  // |subsampling_y| == 1
+    // No special checks needed for partial stripes, because if one exists, the
+    // first and only overlap row is guaranteed to exist.
+    for (; y < plane_height; ++luma_num, y += stripe_height) {
+      const GrainType* noise_stripe = (*noise_stripes)[luma_num];
+      const GrainType* noise_stripe_prev = (*noise_stripes)[luma_num - 1];
+      WriteOverlapLine_C<bitdepth>(noise_stripe,
+                                   &noise_stripe_prev[16 * plane_width],
+                                   plane_width, 22, 23, (*noise_image)[y]);
+    }
+  }
+}
+
+template <int bitdepth, typename GrainType, typename Pixel>
+void BlendNoiseWithImageLuma_C(
+    const void* noise_image_ptr, int min_value, int max_luma, int scaling_shift,
+    int width, int height, int start_height,
+    const uint8_t scaling_lut_y[kScalingLookupTableSize],
+    const void* source_plane_y, ptrdiff_t source_stride_y, void* dest_plane_y,
+    ptrdiff_t dest_stride_y) {
+  const auto* noise_image =
+      static_cast<const Array2D<GrainType>*>(noise_image_ptr);
+  const auto* in_y = static_cast<const Pixel*>(source_plane_y);
+  source_stride_y /= sizeof(Pixel);
+  auto* out_y = static_cast<Pixel*>(dest_plane_y);
+  dest_stride_y /= sizeof(Pixel);
+
+  int y = 0;
+  do {
+    int x = 0;
+    do {
+      const int orig = in_y[y * source_stride_y + x];
+      int noise = noise_image[kPlaneY][y + start_height][x];
+      noise = RightShiftWithRounding(
+          ScaleLut<bitdepth>(scaling_lut_y, orig) * noise, scaling_shift);
+      out_y[y * dest_stride_y + x] = Clip3(orig + noise, min_value, max_luma);
+    } while (++x < width);
+  } while (++y < height);
+}
+
+// This function is for the case params_.chroma_scaling_from_luma == false.
+template <int bitdepth, typename GrainType, typename Pixel>
+void BlendNoiseWithImageChroma_C(
+    Plane plane, const FilmGrainParams& params, const void* noise_image_ptr,
+    int min_value, int max_chroma, int width, int height, int start_height,
+    int subsampling_x, int subsampling_y,
+    const uint8_t scaling_lut_uv[kScalingLookupTableSize],
+    const void* source_plane_y, ptrdiff_t source_stride_y,
+    const void* source_plane_uv, ptrdiff_t source_stride_uv,
+    void* dest_plane_uv, ptrdiff_t dest_stride_uv) {
+  const auto* noise_image =
+      static_cast<const Array2D<GrainType>*>(noise_image_ptr);
+
+  const int chroma_width = (width + subsampling_x) >> subsampling_x;
+  const int chroma_height = (height + subsampling_y) >> subsampling_y;
+
+  const auto* in_y = static_cast<const Pixel*>(source_plane_y);
+  source_stride_y /= sizeof(Pixel);
+  const auto* in_uv = static_cast<const Pixel*>(source_plane_uv);
+  source_stride_uv /= sizeof(Pixel);
+  auto* out_uv = static_cast<Pixel*>(dest_plane_uv);
+  dest_stride_uv /= sizeof(Pixel);
+
+  const int offset = (plane == kPlaneU) ? params.u_offset : params.v_offset;
+  const int luma_multiplier =
+      (plane == kPlaneU) ? params.u_luma_multiplier : params.v_luma_multiplier;
+  const int multiplier =
+      (plane == kPlaneU) ? params.u_multiplier : params.v_multiplier;
+
+  const int scaling_shift = params.chroma_scaling;
+  start_height >>= subsampling_y;
+  int y = 0;
+  do {
+    int x = 0;
+    do {
+      const int luma_x = x << subsampling_x;
+      const int luma_y = y << subsampling_y;
+      const int luma_next_x = std::min(luma_x + 1, width - 1);
+      int average_luma;
+      if (subsampling_x != 0) {
+        average_luma = RightShiftWithRounding(
+            in_y[luma_y * source_stride_y + luma_x] +
+                in_y[luma_y * source_stride_y + luma_next_x],
+            1);
+      } else {
+        average_luma = in_y[luma_y * source_stride_y + luma_x];
+      }
+      const int orig = in_uv[y * source_stride_uv + x];
+      const int combined = average_luma * luma_multiplier + orig * multiplier;
+      const int merged =
+          Clip3((combined >> 6) + LeftShift(offset, bitdepth - 8), 0,
+                (1 << bitdepth) - 1);
+      int noise = noise_image[plane][y + start_height][x];
+      noise = RightShiftWithRounding(
+          ScaleLut<bitdepth>(scaling_lut_uv, merged) * noise, scaling_shift);
+      out_uv[y * dest_stride_uv + x] =
+          Clip3(orig + noise, min_value, max_chroma);
+    } while (++x < chroma_width);
+  } while (++y < chroma_height);
+}
+
+// This function is for the case params_.chroma_scaling_from_luma == true.
+// This further implies that scaling_lut_u == scaling_lut_v == scaling_lut_y.
+template <int bitdepth, typename GrainType, typename Pixel>
+void BlendNoiseWithImageChromaWithCfl_C(
+    Plane plane, const FilmGrainParams& params, const void* noise_image_ptr,
+    int min_value, int max_chroma, int width, int height, int start_height,
+    int subsampling_x, int subsampling_y,
+    const uint8_t scaling_lut[kScalingLookupTableSize],
+    const void* source_plane_y, ptrdiff_t source_stride_y,
+    const void* source_plane_uv, ptrdiff_t source_stride_uv,
+    void* dest_plane_uv, ptrdiff_t dest_stride_uv) {
+  const auto* noise_image =
+      static_cast<const Array2D<GrainType>*>(noise_image_ptr);
+  const auto* in_y = static_cast<const Pixel*>(source_plane_y);
+  source_stride_y /= sizeof(Pixel);
+  const auto* in_uv = static_cast<const Pixel*>(source_plane_uv);
+  source_stride_uv /= sizeof(Pixel);
+  auto* out_uv = static_cast<Pixel*>(dest_plane_uv);
+  dest_stride_uv /= sizeof(Pixel);
+
+  const int chroma_width = (width + subsampling_x) >> subsampling_x;
+  const int chroma_height = (height + subsampling_y) >> subsampling_y;
+  const int scaling_shift = params.chroma_scaling;
+  start_height >>= subsampling_y;
+  int y = 0;
+  do {
+    int x = 0;
+    do {
+      const int luma_x = x << subsampling_x;
+      const int luma_y = y << subsampling_y;
+      const int luma_next_x = std::min(luma_x + 1, width - 1);
+      int average_luma;
+      if (subsampling_x != 0) {
+        average_luma = RightShiftWithRounding(
+            in_y[luma_y * source_stride_y + luma_x] +
+                in_y[luma_y * source_stride_y + luma_next_x],
+            1);
+      } else {
+        average_luma = in_y[luma_y * source_stride_y + luma_x];
+      }
+      const int orig_uv = in_uv[y * source_stride_uv + x];
+      int noise_uv = noise_image[plane][y + start_height][x];
+      noise_uv = RightShiftWithRounding(
+          ScaleLut<bitdepth>(scaling_lut, average_luma) * noise_uv,
+          scaling_shift);
+      out_uv[y * dest_stride_uv + x] =
+          Clip3(orig_uv + noise_uv, min_value, max_chroma);
+    } while (++x < chroma_width);
+  } while (++y < chroma_height);
+}
+
+void Init8bpp() {
+  Dsp* const dsp = dsp_internal::GetWritableDspTable(8);
+  assert(dsp != nullptr);
+#if LIBGAV1_ENABLE_ALL_DSP_FUNCTIONS
+  // LumaAutoRegressionFunc
+  dsp->film_grain.luma_auto_regression[0] =
+      ApplyAutoRegressiveFilterToLumaGrain_C<8, int8_t>;
+  dsp->film_grain.luma_auto_regression[1] =
+      ApplyAutoRegressiveFilterToLumaGrain_C<8, int8_t>;
+  dsp->film_grain.luma_auto_regression[2] =
+      ApplyAutoRegressiveFilterToLumaGrain_C<8, int8_t>;
+
+  // ChromaAutoRegressionFunc
+  // Chroma autoregression should never be called when lag is 0 and use_luma is
+  // false.
+  dsp->film_grain.chroma_auto_regression[0][0] = nullptr;
+  dsp->film_grain.chroma_auto_regression[0][1] =
+      ApplyAutoRegressiveFilterToChromaGrains_C<8, int8_t, 1, false>;
+  dsp->film_grain.chroma_auto_regression[0][2] =
+      ApplyAutoRegressiveFilterToChromaGrains_C<8, int8_t, 2, false>;
+  dsp->film_grain.chroma_auto_regression[0][3] =
+      ApplyAutoRegressiveFilterToChromaGrains_C<8, int8_t, 3, false>;
+  dsp->film_grain.chroma_auto_regression[1][0] =
+      ApplyAutoRegressiveFilterToChromaGrains_C<8, int8_t, 0, true>;
+  dsp->film_grain.chroma_auto_regression[1][1] =
+      ApplyAutoRegressiveFilterToChromaGrains_C<8, int8_t, 1, true>;
+  dsp->film_grain.chroma_auto_regression[1][2] =
+      ApplyAutoRegressiveFilterToChromaGrains_C<8, int8_t, 2, true>;
+  dsp->film_grain.chroma_auto_regression[1][3] =
+      ApplyAutoRegressiveFilterToChromaGrains_C<8, int8_t, 3, true>;
+
+  // ConstructNoiseStripesFunc
+  dsp->film_grain.construct_noise_stripes[0] =
+      ConstructNoiseStripes_C<8, int8_t>;
+  dsp->film_grain.construct_noise_stripes[1] =
+      ConstructNoiseStripesWithOverlap_C<8, int8_t>;
+
+  // ConstructNoiseImageOverlapFunc
+  dsp->film_grain.construct_noise_image_overlap =
+      ConstructNoiseImageOverlap_C<8, int8_t>;
+
+  // InitializeScalingLutFunc
+  dsp->film_grain.initialize_scaling_lut = InitializeScalingLookupTable_C<0>;
+
+  // BlendNoiseWithImageLumaFunc
+  dsp->film_grain.blend_noise_luma =
+      BlendNoiseWithImageLuma_C<8, int8_t, uint8_t>;
+
+  // BlendNoiseWithImageChromaFunc
+  dsp->film_grain.blend_noise_chroma[0] =
+      BlendNoiseWithImageChroma_C<8, int8_t, uint8_t>;
+  dsp->film_grain.blend_noise_chroma[1] =
+      BlendNoiseWithImageChromaWithCfl_C<8, int8_t, uint8_t>;
+#else  // !LIBGAV1_ENABLE_ALL_DSP_FUNCTIONS
+  static_cast<void>(dsp);
+#ifndef LIBGAV1_Dsp8bpp_FilmGrainAutoregressionLuma
+  dsp->film_grain.luma_auto_regression[0] =
+      ApplyAutoRegressiveFilterToLumaGrain_C<8, int8_t>;
+  dsp->film_grain.luma_auto_regression[1] =
+      ApplyAutoRegressiveFilterToLumaGrain_C<8, int8_t>;
+  dsp->film_grain.luma_auto_regression[2] =
+      ApplyAutoRegressiveFilterToLumaGrain_C<8, int8_t>;
+#endif
+#ifndef LIBGAV1_Dsp8bpp_FilmGrainAutoregressionChroma
+  // Chroma autoregression should never be called when lag is 0 and use_luma is
+  // false.
+  dsp->film_grain.chroma_auto_regression[0][0] = nullptr;
+  dsp->film_grain.chroma_auto_regression[0][1] =
+      ApplyAutoRegressiveFilterToChromaGrains_C<8, int8_t, 1, false>;
+  dsp->film_grain.chroma_auto_regression[0][2] =
+      ApplyAutoRegressiveFilterToChromaGrains_C<8, int8_t, 2, false>;
+  dsp->film_grain.chroma_auto_regression[0][3] =
+      ApplyAutoRegressiveFilterToChromaGrains_C<8, int8_t, 3, false>;
+  dsp->film_grain.chroma_auto_regression[1][0] =
+      ApplyAutoRegressiveFilterToChromaGrains_C<8, int8_t, 0, true>;
+  dsp->film_grain.chroma_auto_regression[1][1] =
+      ApplyAutoRegressiveFilterToChromaGrains_C<8, int8_t, 1, true>;
+  dsp->film_grain.chroma_auto_regression[1][2] =
+      ApplyAutoRegressiveFilterToChromaGrains_C<8, int8_t, 2, true>;
+  dsp->film_grain.chroma_auto_regression[1][3] =
+      ApplyAutoRegressiveFilterToChromaGrains_C<8, int8_t, 3, true>;
+#endif
+#ifndef LIBGAV1_Dsp8bpp_FilmGrainConstructNoiseStripes
+  dsp->film_grain.construct_noise_stripes[0] =
+      ConstructNoiseStripes_C<8, int8_t>;
+  dsp->film_grain.construct_noise_stripes[1] =
+      ConstructNoiseStripesWithOverlap_C<8, int8_t>;
+#endif
+#ifndef LIBGAV1_Dsp8bpp_FilmGrainConstructNoiseImageOverlap
+  dsp->film_grain.construct_noise_image_overlap =
+      ConstructNoiseImageOverlap_C<8, int8_t>;
+#endif
+#ifndef LIBGAV1_Dsp8bpp_FilmGrainInitializeScalingLutFunc
+  dsp->film_grain.initialize_scaling_lut = InitializeScalingLookupTable_C<0>;
+#endif
+#ifndef LIBGAV1_Dsp8bpp_FilmGrainBlendNoiseLuma
+  dsp->film_grain.blend_noise_luma =
+      BlendNoiseWithImageLuma_C<8, int8_t, uint8_t>;
+#endif
+#ifndef LIBGAV1_Dsp8bpp_FilmGrainBlendNoiseChroma
+  dsp->film_grain.blend_noise_chroma[0] =
+      BlendNoiseWithImageChroma_C<8, int8_t, uint8_t>;
+#endif
+#ifndef LIBGAV1_Dsp8bpp_FilmGrainBlendNoiseChromaWithCfl
+  dsp->film_grain.blend_noise_chroma[1] =
+      BlendNoiseWithImageChromaWithCfl_C<8, int8_t, uint8_t>;
+#endif
+#endif  // LIBGAV1_ENABLE_ALL_DSP_FUNCTIONS
+}
+
+#if LIBGAV1_MAX_BITDEPTH >= 10
+void Init10bpp() {
+  Dsp* const dsp = dsp_internal::GetWritableDspTable(10);
+  assert(dsp != nullptr);
+#if LIBGAV1_ENABLE_ALL_DSP_FUNCTIONS
+
+  // LumaAutoRegressionFunc
+  dsp->film_grain.luma_auto_regression[0] =
+      ApplyAutoRegressiveFilterToLumaGrain_C<10, int16_t>;
+  dsp->film_grain.luma_auto_regression[1] =
+      ApplyAutoRegressiveFilterToLumaGrain_C<10, int16_t>;
+  dsp->film_grain.luma_auto_regression[2] =
+      ApplyAutoRegressiveFilterToLumaGrain_C<10, int16_t>;
+
+  // ChromaAutoRegressionFunc
+  // Chroma autoregression should never be called when lag is 0 and use_luma is
+  // false.
+  dsp->film_grain.chroma_auto_regression[0][0] = nullptr;
+  dsp->film_grain.chroma_auto_regression[0][1] =
+      ApplyAutoRegressiveFilterToChromaGrains_C<10, int16_t, 1, false>;
+  dsp->film_grain.chroma_auto_regression[0][2] =
+      ApplyAutoRegressiveFilterToChromaGrains_C<10, int16_t, 2, false>;
+  dsp->film_grain.chroma_auto_regression[0][3] =
+      ApplyAutoRegressiveFilterToChromaGrains_C<10, int16_t, 3, false>;
+  dsp->film_grain.chroma_auto_regression[1][0] =
+      ApplyAutoRegressiveFilterToChromaGrains_C<10, int16_t, 0, true>;
+  dsp->film_grain.chroma_auto_regression[1][1] =
+      ApplyAutoRegressiveFilterToChromaGrains_C<10, int16_t, 1, true>;
+  dsp->film_grain.chroma_auto_regression[1][2] =
+      ApplyAutoRegressiveFilterToChromaGrains_C<10, int16_t, 2, true>;
+  dsp->film_grain.chroma_auto_regression[1][3] =
+      ApplyAutoRegressiveFilterToChromaGrains_C<10, int16_t, 3, true>;
+
+  // ConstructNoiseStripesFunc
+  dsp->film_grain.construct_noise_stripes[0] =
+      ConstructNoiseStripes_C<10, int16_t>;
+  dsp->film_grain.construct_noise_stripes[1] =
+      ConstructNoiseStripesWithOverlap_C<10, int16_t>;
+
+  // ConstructNoiseImageOverlapFunc
+  dsp->film_grain.construct_noise_image_overlap =
+      ConstructNoiseImageOverlap_C<10, int16_t>;
+
+  // InitializeScalingLutFunc
+  dsp->film_grain.initialize_scaling_lut = InitializeScalingLookupTable_C<0>;
+
+  // BlendNoiseWithImageLumaFunc
+  dsp->film_grain.blend_noise_luma =
+      BlendNoiseWithImageLuma_C<10, int16_t, uint16_t>;
+
+  // BlendNoiseWithImageChromaFunc
+  dsp->film_grain.blend_noise_chroma[0] =
+      BlendNoiseWithImageChroma_C<10, int16_t, uint16_t>;
+  dsp->film_grain.blend_noise_chroma[1] =
+      BlendNoiseWithImageChromaWithCfl_C<10, int16_t, uint16_t>;
+#else  // !LIBGAV1_ENABLE_ALL_DSP_FUNCTIONS
+  static_cast<void>(dsp);
+#ifndef LIBGAV1_Dsp10bpp_FilmGrainAutoregressionLuma
+  dsp->film_grain.luma_auto_regression[0] =
+      ApplyAutoRegressiveFilterToLumaGrain_C<10, int16_t>;
+  dsp->film_grain.luma_auto_regression[1] =
+      ApplyAutoRegressiveFilterToLumaGrain_C<10, int16_t>;
+  dsp->film_grain.luma_auto_regression[2] =
+      ApplyAutoRegressiveFilterToLumaGrain_C<10, int16_t>;
+#endif
+#ifndef LIBGAV1_Dsp10bpp_FilmGrainAutoregressionChroma
+  // Chroma autoregression should never be called when lag is 0 and use_luma is
+  // false.
+  dsp->film_grain.chroma_auto_regression[0][0] = nullptr;
+  dsp->film_grain.chroma_auto_regression[0][1] =
+      ApplyAutoRegressiveFilterToChromaGrains_C<10, int16_t, 1, false>;
+  dsp->film_grain.chroma_auto_regression[0][2] =
+      ApplyAutoRegressiveFilterToChromaGrains_C<10, int16_t, 2, false>;
+  dsp->film_grain.chroma_auto_regression[0][3] =
+      ApplyAutoRegressiveFilterToChromaGrains_C<10, int16_t, 3, false>;
+  dsp->film_grain.chroma_auto_regression[1][0] =
+      ApplyAutoRegressiveFilterToChromaGrains_C<10, int16_t, 0, true>;
+  dsp->film_grain.chroma_auto_regression[1][1] =
+      ApplyAutoRegressiveFilterToChromaGrains_C<10, int16_t, 1, true>;
+  dsp->film_grain.chroma_auto_regression[1][2] =
+      ApplyAutoRegressiveFilterToChromaGrains_C<10, int16_t, 2, true>;
+  dsp->film_grain.chroma_auto_regression[1][3] =
+      ApplyAutoRegressiveFilterToChromaGrains_C<10, int16_t, 3, true>;
+#endif
+#ifndef LIBGAV1_Dsp10bpp_FilmGrainConstructNoiseStripes
+  dsp->film_grain.construct_noise_stripes[0] =
+      ConstructNoiseStripes_C<10, int16_t>;
+  dsp->film_grain.construct_noise_stripes[1] =
+      ConstructNoiseStripesWithOverlap_C<10, int16_t>;
+#endif
+#ifndef LIBGAV1_Dsp10bpp_FilmGrainConstructNoiseImageOverlap
+  dsp->film_grain.construct_noise_image_overlap =
+      ConstructNoiseImageOverlap_C<10, int16_t>;
+#endif
+#ifndef LIBGAV1_Dsp10bpp_FilmGrainInitializeScalingLutFunc
+  dsp->film_grain.initialize_scaling_lut = InitializeScalingLookupTable_C<0>;
+#endif
+#ifndef LIBGAV1_Dsp10bpp_FilmGrainBlendNoiseLuma
+  dsp->film_grain.blend_noise_luma =
+      BlendNoiseWithImageLuma_C<10, int16_t, uint16_t>;
+#endif
+#ifndef LIBGAV1_Dsp10bpp_FilmGrainBlendNoiseChroma
+  dsp->film_grain.blend_noise_chroma[0] =
+      BlendNoiseWithImageChroma_C<10, int16_t, uint16_t>;
+#endif
+#ifndef LIBGAV1_Dsp10bpp_FilmGrainBlendNoiseChromaWithCfl
+  dsp->film_grain.blend_noise_chroma[1] =
+      BlendNoiseWithImageChromaWithCfl_C<10, int16_t, uint16_t>;
+#endif
+#endif  // LIBGAV1_ENABLE_ALL_DSP_FUNCTIONS
+}
+#endif  // LIBGAV1_MAX_BITDEPTH >= 10
+
+}  // namespace
+}  // namespace film_grain
+
+void FilmGrainInit_C() {
+  film_grain::Init8bpp();
+#if LIBGAV1_MAX_BITDEPTH >= 10
+  film_grain::Init10bpp();
+#endif
+}
+
+}  // namespace dsp
+}  // namespace libgav1
author	qinxialei <xialeiqin@gmail.com>	2020-10-29 11:26:59 +0800
committer	qinxialei <xialeiqin@gmail.com>	2020-10-29 11:26:59 +0800
commit	e8d277081293b6fb2a5d469616baaa7a06f52496 (patch)
tree	1179bb07d3927d1837d4a90bd81b2034c4c696a9 /src/dsp/film_grain.cc
download	libgav1-e8d277081293b6fb2a5d469616baaa7a06f52496.tar.gz libgav1-e8d277081293b6fb2a5d469616baaa7a06f52496.tar.bz2 libgav1-e8d277081293b6fb2a5d469616baaa7a06f52496.zip