// Copyright 2020 The libgav1 Authors // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. #include #include "src/post_filter.h" #include "src/utils/blocking_counter.h" #include "src/utils/compiler_attributes.h" #include "src/utils/constants.h" namespace libgav1 { namespace { constexpr int kStep64x64 = 16; // =64/4. constexpr int kCdefSkip = 8; constexpr uint8_t kCdefUvDirection[2][2][8] = { {{0, 1, 2, 3, 4, 5, 6, 7}, {1, 2, 2, 2, 3, 4, 6, 0}}, {{7, 0, 2, 4, 5, 6, 6, 6}, {0, 1, 2, 3, 4, 5, 6, 7}}}; constexpr int kCdefBorderRows[2][4] = {{0, 1, 62, 63}, {0, 1, 30, 31}}; template void CopyRowForCdef(const Pixel* src, int block_width, int unit_width, bool is_frame_left, bool is_frame_right, uint16_t* const dst, const Pixel* left_border = nullptr) { if (sizeof(src[0]) == sizeof(dst[0])) { if (is_frame_left) { Memset(dst - kCdefBorder, kCdefLargeValue, kCdefBorder); } else if (left_border == nullptr) { memcpy(dst - kCdefBorder, src - kCdefBorder, kCdefBorder * sizeof(dst[0])); } else { memcpy(dst - kCdefBorder, left_border, kCdefBorder * sizeof(dst[0])); } memcpy(dst, src, block_width * sizeof(dst[0])); if (is_frame_right) { Memset(dst + block_width, kCdefLargeValue, unit_width + kCdefBorder - block_width); } else { memcpy(dst + block_width, src + block_width, (unit_width + kCdefBorder - block_width) * sizeof(dst[0])); } return; } if (is_frame_left) { for (int x = -kCdefBorder; x < 0; ++x) { dst[x] = static_cast(kCdefLargeValue); } } else if (left_border == nullptr) { for (int x = -kCdefBorder; x < 0; ++x) { dst[x] = src[x]; } } else { for (int x = -kCdefBorder; x < 0; ++x) { dst[x] = left_border[x + kCdefBorder]; } } for (int x = 0; x < block_width; ++x) { dst[x] = src[x]; } for (int x = block_width; x < unit_width + kCdefBorder; ++x) { dst[x] = is_frame_right ? static_cast(kCdefLargeValue) : src[x]; } } // GCC 13.x will report a false positive from the call to // ApplyCdefForOneSuperBlockRowHelper() with a nullptr in // ApplyCdefForOneSuperBlockRow(). The call to CopyPixels() in // ApplyCdefForOneUnit() is only made when thread_pool_ != nullptr and // border_columns[][] is a valid pointer. #if defined(__GNUC__) && !defined(__clang__) #pragma GCC diagnostic push #pragma GCC diagnostic ignored "-Warray-bounds" #pragma GCC diagnostic ignored "-Wstringop-overflow" #endif // For |height| rows, copy |width| pixels of size |pixel_size| from |src| to // |dst|. void CopyPixels(const uint8_t* src, int src_stride, uint8_t* dst, int dst_stride, int width, int height, size_t pixel_size) { assert(src != nullptr); assert(dst != nullptr); assert(height > 0); int y = height; do { memcpy(dst, src, width * pixel_size); src += src_stride; dst += dst_stride; } while (--y != 0); } #if defined(__GNUC__) && !defined(__clang__) #pragma GCC diagnostic pop #endif } // namespace void PostFilter::SetupCdefBorder(int row4x4) { assert(row4x4 >= 0); assert(DoCdef()); int plane = kPlaneY; do { const ptrdiff_t src_stride = frame_buffer_.stride(plane); const ptrdiff_t dst_stride = cdef_border_.stride(plane); const int row_offset = DivideBy4(row4x4); const int num_pixels = SubsampledValue( MultiplyBy4(frame_header_.columns4x4), subsampling_x_[plane]); const int row_width = num_pixels << pixel_size_log2_; const int plane_height = SubsampledValue(MultiplyBy4(frame_header_.rows4x4), subsampling_y_[plane]); for (int i = 0; i < 4; ++i) { const int row = kCdefBorderRows[subsampling_y_[plane]][i]; const int absolute_row = (MultiplyBy4(row4x4) >> subsampling_y_[plane]) + row; if (absolute_row >= plane_height) break; const uint8_t* src = GetSourceBuffer(static_cast(plane), row4x4, 0) + row * src_stride; uint8_t* dst = cdef_border_.data(plane) + dst_stride * (row_offset + i); memcpy(dst, src, row_width); } } while (++plane < planes_); } template void PostFilter::PrepareCdefBlock(int block_width4x4, int block_height4x4, int row4x4, int column4x4, uint16_t* cdef_source, ptrdiff_t cdef_stride, const bool y_plane, const uint8_t border_columns[kMaxPlanes][256], bool use_border_columns) { assert(y_plane || planes_ == kMaxPlanes); const int max_planes = y_plane ? 1 : kMaxPlanes; const int8_t subsampling_x = y_plane ? 0 : subsampling_x_[kPlaneU]; const int8_t subsampling_y = y_plane ? 0 : subsampling_y_[kPlaneU]; const int start_x = MultiplyBy4(column4x4) >> subsampling_x; const int start_y = MultiplyBy4(row4x4) >> subsampling_y; const int plane_width = SubsampledValue(frame_header_.width, subsampling_x); const int plane_height = SubsampledValue(frame_header_.height, subsampling_y); const int block_width = MultiplyBy4(block_width4x4) >> subsampling_x; const int block_height = MultiplyBy4(block_height4x4) >> subsampling_y; // unit_width, unit_height are the same as block_width, block_height unless // it reaches the frame boundary, where block_width < 64 or // block_height < 64. unit_width, unit_height guarantee we build blocks on // a multiple of 8. const int unit_width = Align(block_width, 8 >> subsampling_x); const int unit_height = Align(block_height, 8 >> subsampling_y); const bool is_frame_left = column4x4 == 0; const bool is_frame_right = start_x + block_width >= plane_width; const bool is_frame_top = row4x4 == 0; const bool is_frame_bottom = start_y + block_height >= plane_height; const int y_offset = is_frame_top ? 0 : kCdefBorder; const int cdef_border_row_offset = DivideBy4(row4x4) - (is_frame_top ? 0 : 2); for (int plane = y_plane ? kPlaneY : kPlaneU; plane < max_planes; ++plane) { uint16_t* cdef_src = cdef_source + static_cast(plane == kPlaneV) * kCdefUnitSizeWithBorders * kCdefUnitSizeWithBorders; const int src_stride = frame_buffer_.stride(plane) / sizeof(Pixel); const Pixel* src_buffer = reinterpret_cast(source_buffer_[plane]) + (start_y - y_offset) * src_stride + start_x; const int cdef_border_stride = cdef_border_.stride(plane) / sizeof(Pixel); const Pixel* cdef_border = (thread_pool_ == nullptr) ? nullptr : reinterpret_cast(cdef_border_.data(plane)) + cdef_border_row_offset * cdef_border_stride + start_x; // All the copying code will use negative indices for populating the left // border. So the starting point is set to kCdefBorder. cdef_src += kCdefBorder; // Copy the top 2 rows as follows; // If is_frame_top is true, both the rows are set to kCdefLargeValue. // Otherwise: // If multi-threaded filtering is off, the rows are copied from // |src_buffer|. // Otherwise, the rows are copied from |cdef_border|. if (is_frame_top) { for (int y = 0; y < kCdefBorder; ++y) { Memset(cdef_src - kCdefBorder, kCdefLargeValue, unit_width + 2 * kCdefBorder); cdef_src += cdef_stride; } } else { const Pixel* top_border = (thread_pool_ == nullptr) ? src_buffer : cdef_border; const int top_border_stride = (thread_pool_ == nullptr) ? src_stride : cdef_border_stride; for (int y = 0; y < kCdefBorder; ++y) { CopyRowForCdef(top_border, block_width, unit_width, is_frame_left, is_frame_right, cdef_src); top_border += top_border_stride; cdef_src += cdef_stride; // We need to increment |src_buffer| and |cdef_border| in this loop to // set them up for the subsequent loops below. src_buffer += src_stride; cdef_border += cdef_border_stride; } } // Copy the body as follows; // If multi-threaded filtering is off or if is_frame_bottom is true, all the // rows are copied from |src_buffer|. // Otherwise, the first |block_height|-kCdefBorder rows are copied from // |src_buffer| and the last kCdefBorder rows are coped from |cdef_border|. int y = block_height; const int y_threshold = (thread_pool_ == nullptr || is_frame_bottom) ? 0 : kCdefBorder; const Pixel* left_border = (thread_pool_ == nullptr || !use_border_columns) ? nullptr : reinterpret_cast(border_columns[plane]); do { CopyRowForCdef(src_buffer, block_width, unit_width, is_frame_left, is_frame_right, cdef_src, left_border); cdef_src += cdef_stride; src_buffer += src_stride; if (left_border != nullptr) left_border += kCdefBorder; } while (--y != y_threshold); if (y > 0) { assert(y == kCdefBorder); // |cdef_border| now points to the top 2 rows of the current block. For // the next loop, we need it to point to the bottom 2 rows of the // current block. So increment it by 2 rows. cdef_border += MultiplyBy2(cdef_border_stride); for (int i = 0; i < kCdefBorder; ++i) { CopyRowForCdef(cdef_border, block_width, unit_width, is_frame_left, is_frame_right, cdef_src); cdef_src += cdef_stride; cdef_border += cdef_border_stride; } } // Copy the bottom 2 rows as follows; // If is_frame_bottom is true, both the rows are set to kCdefLargeValue. // Otherwise: // If multi-threaded filtering is off, the rows are copied from // |src_buffer|. // Otherwise, the rows are copied from |cdef_border|. y = 0; if (is_frame_bottom) { do { Memset(cdef_src - kCdefBorder, kCdefLargeValue, unit_width + 2 * kCdefBorder); cdef_src += cdef_stride; } while (++y < kCdefBorder + unit_height - block_height); } else { const Pixel* bottom_border = (thread_pool_ == nullptr) ? src_buffer : cdef_border; const int bottom_border_stride = (thread_pool_ == nullptr) ? src_stride : cdef_border_stride; do { CopyRowForCdef(bottom_border, block_width, unit_width, is_frame_left, is_frame_right, cdef_src); bottom_border += bottom_border_stride; cdef_src += cdef_stride; } while (++y < kCdefBorder + unit_height - block_height); } } } template void PostFilter::ApplyCdefForOneUnit(uint16_t* cdef_block, const int index, const int block_width4x4, const int block_height4x4, const int row4x4_start, const int column4x4_start, uint8_t border_columns[2][kMaxPlanes][256], bool use_border_columns[2][2]) { // Cdef operates in 8x8 blocks (4x4 for chroma with subsampling). static constexpr int kStep = 8; static constexpr int kStep4x4 = 2; int cdef_buffer_row_base_stride[kMaxPlanes]; uint8_t* cdef_buffer_row_base[kMaxPlanes]; int src_buffer_row_base_stride[kMaxPlanes]; const uint8_t* src_buffer_row_base[kMaxPlanes]; const uint16_t* cdef_src_row_base[kMaxPlanes]; int cdef_src_row_base_stride[kMaxPlanes]; int column_step[kMaxPlanes]; assert(planes_ == kMaxPlanesMonochrome || planes_ == kMaxPlanes); int plane = kPlaneY; do { cdef_buffer_row_base[plane] = GetCdefBuffer(static_cast(plane), row4x4_start, column4x4_start); cdef_buffer_row_base_stride[plane] = frame_buffer_.stride(plane) * (kStep >> subsampling_y_[plane]); src_buffer_row_base[plane] = GetSourceBuffer(static_cast(plane), row4x4_start, column4x4_start); src_buffer_row_base_stride[plane] = frame_buffer_.stride(plane) * (kStep >> subsampling_y_[plane]); cdef_src_row_base[plane] = cdef_block + static_cast(plane == kPlaneV) * kCdefUnitSizeWithBorders * kCdefUnitSizeWithBorders + kCdefBorder * kCdefUnitSizeWithBorders + kCdefBorder; cdef_src_row_base_stride[plane] = kCdefUnitSizeWithBorders * (kStep >> subsampling_y_[plane]); column_step[plane] = (kStep >> subsampling_x_[plane]) * sizeof(Pixel); } while (++plane < planes_); // |border_columns| contains two buffers. In each call to this function, we // will use one of them as the "destination" for the current call. And the // other one as the "source" for the current call (which would have been the // "destination" of the previous call). We will use the src_index to populate // the borders which were backed up in the previous call. We will use the // dst_index to populate the borders to be used in the next call. const int border_columns_src_index = DivideBy16(column4x4_start) & 1; const int border_columns_dst_index = border_columns_src_index ^ 1; if (index == -1) { if (thread_pool_ == nullptr) { int plane = kPlaneY; do { CopyPixels(src_buffer_row_base[plane], frame_buffer_.stride(plane), cdef_buffer_row_base[plane], frame_buffer_.stride(plane), MultiplyBy4(block_width4x4) >> subsampling_x_[plane], MultiplyBy4(block_height4x4) >> subsampling_y_[plane], sizeof(Pixel)); } while (++plane < planes_); } use_border_columns[border_columns_dst_index][0] = false; use_border_columns[border_columns_dst_index][1] = false; return; } const bool is_frame_right = MultiplyBy4(column4x4_start + block_width4x4) >= frame_header_.width; if (!is_frame_right && thread_pool_ != nullptr) { // Backup the last 2 columns for use in the next iteration. use_border_columns[border_columns_dst_index][0] = true; const uint8_t* src_line = GetSourceBuffer(kPlaneY, row4x4_start, column4x4_start + block_width4x4) - kCdefBorder * sizeof(Pixel); assert(border_columns != nullptr); CopyPixels(src_line, frame_buffer_.stride(kPlaneY), border_columns[border_columns_dst_index][kPlaneY], kCdefBorder * sizeof(Pixel), kCdefBorder, MultiplyBy4(block_height4x4), sizeof(Pixel)); } PrepareCdefBlock( block_width4x4, block_height4x4, row4x4_start, column4x4_start, cdef_block, kCdefUnitSizeWithBorders, true, (border_columns != nullptr) ? border_columns[border_columns_src_index] : nullptr, use_border_columns[border_columns_src_index][0]); // Stored direction used during the u/v pass. If bit 3 is set, then block is // a skip. uint8_t direction_y[8 * 8]; int y_index = 0; const uint8_t y_primary_strength = frame_header_.cdef.y_primary_strength[index]; const uint8_t y_secondary_strength = frame_header_.cdef.y_secondary_strength[index]; // y_strength_index is 0 for both primary and secondary strengths being // non-zero, 1 for primary only, 2 for secondary only. This will be updated // with y_primary_strength after variance is applied. int y_strength_index = static_cast(y_secondary_strength == 0); const bool compute_direction_and_variance = (y_primary_strength | frame_header_.cdef.uv_primary_strength[index]) != 0; const uint8_t* skip_row = &cdef_skip_[row4x4_start >> 1][column4x4_start >> 4]; const int skip_stride = cdef_skip_.columns(); int row4x4 = row4x4_start; do { uint8_t* cdef_buffer_base = cdef_buffer_row_base[kPlaneY]; const uint8_t* src_buffer_base = src_buffer_row_base[kPlaneY]; const uint16_t* cdef_src_base = cdef_src_row_base[kPlaneY]; int column4x4 = column4x4_start; if (*skip_row == 0) { for (int i = 0; i < DivideBy2(block_width4x4); ++i, ++y_index) { direction_y[y_index] = kCdefSkip; } if (thread_pool_ == nullptr) { CopyPixels(src_buffer_base, frame_buffer_.stride(kPlaneY), cdef_buffer_base, frame_buffer_.stride(kPlaneY), 64, kStep, sizeof(Pixel)); } } else { do { const int block_width = kStep; const int block_height = kStep; const int cdef_stride = frame_buffer_.stride(kPlaneY); uint8_t* const cdef_buffer = cdef_buffer_base; const uint16_t* const cdef_src = cdef_src_base; const int src_stride = frame_buffer_.stride(kPlaneY); const uint8_t* const src_buffer = src_buffer_base; const uint8_t skip_shift = (column4x4 >> 1) & 0x7; const bool skip = ((*skip_row >> skip_shift) & 1) == 0; if (skip) { // No cdef filtering. direction_y[y_index] = kCdefSkip; if (thread_pool_ == nullptr) { CopyPixels(src_buffer, src_stride, cdef_buffer, cdef_stride, block_width, block_height, sizeof(Pixel)); } } else { // Zero out residual skip flag. direction_y[y_index] = 0; int variance = 0; if (compute_direction_and_variance) { if (thread_pool_ == nullptr || row4x4 + kStep4x4 < row4x4_start + block_height4x4) { dsp_.cdef_direction(src_buffer, src_stride, &direction_y[y_index], &variance); } else if (sizeof(Pixel) == 2) { dsp_.cdef_direction(cdef_src, kCdefUnitSizeWithBorders * 2, &direction_y[y_index], &variance); } else { // If we are in the last row4x4 for this unit, then the last two // input rows have to come from |cdef_border_|. Since we already // have |cdef_src| populated correctly, use that as the input // for the direction process. uint8_t direction_src[8][8]; const uint16_t* cdef_src_line = cdef_src; for (auto& direction_src_line : direction_src) { for (int i = 0; i < 8; ++i) { direction_src_line[i] = cdef_src_line[i]; } cdef_src_line += kCdefUnitSizeWithBorders; } dsp_.cdef_direction(direction_src, 8, &direction_y[y_index], &variance); } } const int direction = (y_primary_strength == 0) ? 0 : direction_y[y_index]; const int variance_strength = ((variance >> 6) != 0) ? std::min(FloorLog2(variance >> 6), 12) : 0; const uint8_t primary_strength = (variance != 0) ? (y_primary_strength * (4 + variance_strength) + 8) >> 4 : 0; if ((primary_strength | y_secondary_strength) == 0) { if (thread_pool_ == nullptr) { CopyPixels(src_buffer, src_stride, cdef_buffer, cdef_stride, block_width, block_height, sizeof(Pixel)); } } else { const int strength_index = y_strength_index | (static_cast(primary_strength == 0) << 1); dsp_.cdef_filters[1][strength_index]( cdef_src, kCdefUnitSizeWithBorders, block_height, primary_strength, y_secondary_strength, frame_header_.cdef.damping, direction, cdef_buffer, cdef_stride); } } cdef_buffer_base += column_step[kPlaneY]; src_buffer_base += column_step[kPlaneY]; cdef_src_base += column_step[kPlaneY] / sizeof(Pixel); column4x4 += kStep4x4; y_index++; } while (column4x4 < column4x4_start + block_width4x4); } cdef_buffer_row_base[kPlaneY] += cdef_buffer_row_base_stride[kPlaneY]; src_buffer_row_base[kPlaneY] += src_buffer_row_base_stride[kPlaneY]; cdef_src_row_base[kPlaneY] += cdef_src_row_base_stride[kPlaneY]; skip_row += skip_stride; row4x4 += kStep4x4; } while (row4x4 < row4x4_start + block_height4x4); if (planes_ == kMaxPlanesMonochrome) { return; } const uint8_t uv_primary_strength = frame_header_.cdef.uv_primary_strength[index]; const uint8_t uv_secondary_strength = frame_header_.cdef.uv_secondary_strength[index]; if ((uv_primary_strength | uv_secondary_strength) == 0) { if (thread_pool_ == nullptr) { for (int plane = kPlaneU; plane <= kPlaneV; ++plane) { CopyPixels(src_buffer_row_base[plane], frame_buffer_.stride(plane), cdef_buffer_row_base[plane], frame_buffer_.stride(plane), MultiplyBy4(block_width4x4) >> subsampling_x_[plane], MultiplyBy4(block_height4x4) >> subsampling_y_[plane], sizeof(Pixel)); } } use_border_columns[border_columns_dst_index][1] = false; return; } if (!is_frame_right && thread_pool_ != nullptr) { use_border_columns[border_columns_dst_index][1] = true; for (int plane = kPlaneU; plane <= kPlaneV; ++plane) { // Backup the last 2 columns for use in the next iteration. const uint8_t* src_line = GetSourceBuffer(static_cast(plane), row4x4_start, column4x4_start + block_width4x4) - kCdefBorder * sizeof(Pixel); CopyPixels(src_line, frame_buffer_.stride(plane), border_columns[border_columns_dst_index][plane], kCdefBorder * sizeof(Pixel), kCdefBorder, MultiplyBy4(block_height4x4) >> subsampling_y_[plane], sizeof(Pixel)); } } PrepareCdefBlock( block_width4x4, block_height4x4, row4x4_start, column4x4_start, cdef_block, kCdefUnitSizeWithBorders, false, (border_columns != nullptr) ? border_columns[border_columns_src_index] : nullptr, use_border_columns[border_columns_src_index][1]); // uv_strength_index is 0 for both primary and secondary strengths being // non-zero, 1 for primary only, 2 for secondary only. const int uv_strength_index = (static_cast(uv_primary_strength == 0) << 1) | static_cast(uv_secondary_strength == 0); for (int plane = kPlaneU; plane <= kPlaneV; ++plane) { const int8_t subsampling_x = subsampling_x_[plane]; const int8_t subsampling_y = subsampling_y_[plane]; const int block_width = kStep >> subsampling_x; const int block_height = kStep >> subsampling_y; int row4x4 = row4x4_start; y_index = 0; do { uint8_t* cdef_buffer_base = cdef_buffer_row_base[plane]; const uint8_t* src_buffer_base = src_buffer_row_base[plane]; const uint16_t* cdef_src_base = cdef_src_row_base[plane]; int column4x4 = column4x4_start; do { const int cdef_stride = frame_buffer_.stride(plane); uint8_t* const cdef_buffer = cdef_buffer_base; const int src_stride = frame_buffer_.stride(plane); const uint8_t* const src_buffer = src_buffer_base; const uint16_t* const cdef_src = cdef_src_base; const bool skip = (direction_y[y_index] & kCdefSkip) != 0; int dual_cdef = 0; if (skip) { // No cdef filtering. if (thread_pool_ == nullptr) { CopyPixels(src_buffer, src_stride, cdef_buffer, cdef_stride, block_width, block_height, sizeof(Pixel)); } } else { // Make sure block pair is not out of bounds. if (column4x4 + (kStep4x4 * 2) <= column4x4_start + block_width4x4) { // Enable dual processing if subsampling_x is 1. dual_cdef = subsampling_x; } int direction = (uv_primary_strength == 0) ? 0 : kCdefUvDirection[subsampling_x][subsampling_y] [direction_y[y_index]]; if (dual_cdef != 0) { if (uv_primary_strength && direction_y[y_index] != direction_y[y_index + 1]) { // Disable dual processing if the second block of the pair does // not have the same direction. dual_cdef = 0; } // Disable dual processing if the second block of the pair is a // skip. if (direction_y[y_index + 1] == kCdefSkip) { dual_cdef = 0; } } // Block width is 8 if either dual_cdef is true or subsampling_x == 0. const int width_index = dual_cdef | (subsampling_x ^ 1); dsp_.cdef_filters[width_index][uv_strength_index]( cdef_src, kCdefUnitSizeWithBorders, block_height, uv_primary_strength, uv_secondary_strength, frame_header_.cdef.damping - 1, direction, cdef_buffer, cdef_stride); } // When dual_cdef is set, the above cdef_filter() will process 2 blocks, // so adjust the pointers and indexes for 2 blocks. cdef_buffer_base += column_step[plane] << dual_cdef; src_buffer_base += column_step[plane] << dual_cdef; cdef_src_base += (column_step[plane] / sizeof(Pixel)) << dual_cdef; column4x4 += kStep4x4 << dual_cdef; y_index += 1 << dual_cdef; } while (column4x4 < column4x4_start + block_width4x4); cdef_buffer_row_base[plane] += cdef_buffer_row_base_stride[plane]; src_buffer_row_base[plane] += src_buffer_row_base_stride[plane]; cdef_src_row_base[plane] += cdef_src_row_base_stride[plane]; row4x4 += kStep4x4; } while (row4x4 < row4x4_start + block_height4x4); } } void PostFilter::ApplyCdefForOneSuperBlockRowHelper( uint16_t* cdef_block, uint8_t border_columns[2][kMaxPlanes][256], int row4x4, int block_height4x4) { bool use_border_columns[2][2] = {}; const bool non_zero_index = frame_header_.cdef.bits > 0; const int8_t* cdef_index = non_zero_index ? cdef_index_[DivideBy16(row4x4)] : nullptr; int column4x4 = 0; do { const int index = non_zero_index ? *cdef_index++ : 0; const int block_width4x4 = std::min(kStep64x64, frame_header_.columns4x4 - column4x4); #if LIBGAV1_MAX_BITDEPTH >= 10 if (bitdepth_ >= 10) { ApplyCdefForOneUnit(cdef_block, index, block_width4x4, block_height4x4, row4x4, column4x4, border_columns, use_border_columns); } else // NOLINT #endif // LIBGAV1_MAX_BITDEPTH >= 10 { ApplyCdefForOneUnit(cdef_block, index, block_width4x4, block_height4x4, row4x4, column4x4, border_columns, use_border_columns); } column4x4 += kStep64x64; } while (column4x4 < frame_header_.columns4x4); } void PostFilter::ApplyCdefForOneSuperBlockRow(int row4x4_start, int sb4x4, bool is_last_row) { assert(row4x4_start >= 0); assert(DoCdef()); int row4x4 = row4x4_start; const int row4x4_limit = row4x4_start + sb4x4; do { if (row4x4 >= frame_header_.rows4x4) return; // Apply cdef for the last 8 rows of the previous superblock row. // One exception: If the superblock size is 128x128 and is_last_row is true, // then we simply apply cdef for the entire superblock row without any lag. // In that case, apply cdef for the previous superblock row only during the // first iteration (row4x4 == row4x4_start). if (row4x4 > 0 && (!is_last_row || row4x4 == row4x4_start)) { assert(row4x4 >= 16); ApplyCdefForOneSuperBlockRowHelper(cdef_block_, nullptr, row4x4 - 2, 2); } // Apply cdef for the current superblock row. If this is the last superblock // row we apply cdef for all the rows, otherwise we leave out the last 8 // rows. const int block_height4x4 = std::min(kStep64x64, frame_header_.rows4x4 - row4x4); const int height4x4 = block_height4x4 - (is_last_row ? 0 : 2); if (height4x4 > 0) { ApplyCdefForOneSuperBlockRowHelper(cdef_block_, nullptr, row4x4, height4x4); } row4x4 += kStep64x64; } while (row4x4 < row4x4_limit); } void PostFilter::ApplyCdefWorker(std::atomic* row4x4_atomic) { int row4x4; uint16_t cdef_block[kCdefUnitSizeWithBorders * kCdefUnitSizeWithBorders * 2]; // Each border_column buffer has to store 64 rows and 2 columns for each // plane. For 10bit, that is 64*2*2 = 256 bytes. alignas(kMaxAlignment) uint8_t border_columns[2][kMaxPlanes][256]; while ((row4x4 = row4x4_atomic->fetch_add( kStep64x64, std::memory_order_relaxed)) < frame_header_.rows4x4) { const int block_height4x4 = std::min(kStep64x64, frame_header_.rows4x4 - row4x4); ApplyCdefForOneSuperBlockRowHelper(cdef_block, border_columns, row4x4, block_height4x4); } } } // namespace libgav1