aboutsummaryrefslogtreecommitdiff
path: root/src/tile_scratch_buffer.h
blob: 828f55094dcc1abe19107253f722cca87cf6cb64 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
/*
 * Copyright 2019 The libgav1 Authors
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

#ifndef LIBGAV1_SRC_TILE_SCRATCH_BUFFER_H_
#define LIBGAV1_SRC_TILE_SCRATCH_BUFFER_H_

#include <cstddef>
#include <cstdint>
#include <cstring>
#include <memory>
#include <mutex>  // NOLINT (unapproved c++11 header)
#include <new>
#include <utility>

#include "src/dsp/constants.h"
#include "src/utils/common.h"
#include "src/utils/compiler_attributes.h"
#include "src/utils/constants.h"
#include "src/utils/memory.h"
#include "src/utils/stack.h"

namespace libgav1 {

// Buffer to facilitate decoding a superblock.
struct TileScratchBuffer : public MaxAlignedAllocable {
  static constexpr int kBlockDecodedStride = 34;

  LIBGAV1_MUST_USE_RESULT bool Init(int bitdepth) {
#if LIBGAV1_MAX_BITDEPTH >= 10
    const int pixel_size = (bitdepth == 8) ? 1 : 2;
#else
    assert(bitdepth == 8);
    static_cast<void>(bitdepth);
    const int pixel_size = 1;
#endif

    static_assert(kConvolveScaleBorderRight >= kConvolveBorderRight, "");
    constexpr int unaligned_convolve_buffer_stride =
        kMaxScaledSuperBlockSizeInPixels + kConvolveBorderLeftTop +
        kConvolveScaleBorderRight;
    convolve_block_buffer_stride = Align<ptrdiff_t>(
        unaligned_convolve_buffer_stride * pixel_size, kMaxAlignment);
    constexpr int convolve_buffer_height = kMaxScaledSuperBlockSizeInPixels +
                                           kConvolveBorderLeftTop +
                                           kConvolveBorderBottom;

    convolve_block_buffer = MakeAlignedUniquePtr<uint8_t>(
        kMaxAlignment, convolve_buffer_height * convolve_block_buffer_stride);
#if LIBGAV1_MSAN
    // Quiet msan warnings in ConvolveScale2D_NEON(). Set with random non-zero
    // value to aid in future debugging.
    memset(convolve_block_buffer.get(), 0x66,
           convolve_buffer_height * convolve_block_buffer_stride);
#endif

    return convolve_block_buffer != nullptr;
  }

  // kCompoundPredictionTypeDiffWeighted prediction mode needs a mask of the
  // prediction block size. This buffer is used to store that mask. The masks
  // will be created for the Y plane and will be re-used for the U & V planes.
  alignas(kMaxAlignment) uint8_t weight_mask[kMaxSuperBlockSizeSquareInPixels];

  // For each instance of the TileScratchBuffer, only one of the following
  // buffers will be used at any given time, so it is ok to share them in a
  // union.
  union {
    // Buffers used for prediction process.
    // Compound prediction calculations always output 16-bit values. Depending
    // on the bitdepth the values may be treated as int16_t or uint16_t. See
    // src/dsp/convolve.cc and src/dsp/warp.cc for explanations.
    // Inter/intra calculations output Pixel values.
    // These buffers always use width as the stride. This enables packing the
    // values in and simplifies loads/stores for small values.

    // 10/12 bit compound prediction and 10/12 bit inter/intra prediction.
    alignas(kMaxAlignment) uint16_t
        prediction_buffer[2][kMaxSuperBlockSizeSquareInPixels];
    // 8 bit compound prediction buffer.
    alignas(kMaxAlignment) int16_t
        compound_prediction_buffer_8bpp[2][kMaxSuperBlockSizeSquareInPixels];

    // Union usage note: This is used only by functions in the "intra"
    // prediction path.
    //
    // Buffer used for storing subsampled luma samples needed for CFL
    // prediction. This buffer is used to avoid repetition of the subsampling
    // for the V plane when it is already done for the U plane.
    int16_t cfl_luma_buffer[kCflLumaBufferStride][kCflLumaBufferStride];
  };

  // Buffer used for convolve. The maximum size required for this buffer is:
  //  maximum block height (with scaling and border) = 2 * 128 + 3 + 4 = 263.
  //  maximum block stride (with scaling and border aligned to 16) =
  //     (2 * 128 + 3 + 8 + 5) * pixel_size = 272 * pixel_size.
  //  Where pixel_size is (bitdepth == 8) ? 1 : 2.
  // Has an alignment of kMaxAlignment when allocated.
  AlignedUniquePtr<uint8_t> convolve_block_buffer;
  ptrdiff_t convolve_block_buffer_stride;

  // Flag indicating whether the data in |cfl_luma_buffer| is valid.
  bool cfl_luma_buffer_valid;

  // Equivalent to BlockDecoded array in the spec. This stores the decoded
  // state of every 4x4 block in a superblock. It has 1 row/column border on
  // all 4 sides (hence the 34x34 dimension instead of 32x32). Note that the
  // spec uses "-1" as an index to access the left and top borders. In the
  // code, we treat the index (1, 1) as equivalent to the spec's (0, 0). So
  // all accesses into this array will be offset by +1 when compared with the
  // spec.
  bool block_decoded[kMaxPlanes][kBlockDecodedStride][kBlockDecodedStride];
};

class TileScratchBufferPool {
 public:
  void Reset(int bitdepth) {
    if (bitdepth_ == bitdepth) return;
#if LIBGAV1_MAX_BITDEPTH >= 10
    if (bitdepth_ == 8 && bitdepth != 8) {
      // We are going from a pixel size of 1 to a pixel size of 2. So invalidate
      // the stack.
      std::lock_guard<std::mutex> lock(mutex_);
      while (!buffers_.Empty()) {
        buffers_.Pop();
      }
    }
#endif
    bitdepth_ = bitdepth;
  }

  std::unique_ptr<TileScratchBuffer> Get() {
    std::lock_guard<std::mutex> lock(mutex_);
    if (buffers_.Empty()) {
      std::unique_ptr<TileScratchBuffer> scratch_buffer(new (std::nothrow)
                                                            TileScratchBuffer);
      if (scratch_buffer == nullptr || !scratch_buffer->Init(bitdepth_)) {
        return nullptr;
      }
      return scratch_buffer;
    }
    return buffers_.Pop();
  }

  void Release(std::unique_ptr<TileScratchBuffer> scratch_buffer) {
    std::lock_guard<std::mutex> lock(mutex_);
    buffers_.Push(std::move(scratch_buffer));
  }

 private:
  std::mutex mutex_;
  // We will never need more than kMaxThreads scratch buffers since that is the
  // maximum amount of work that will be done at any given time.
  Stack<std::unique_ptr<TileScratchBuffer>, kMaxThreads> buffers_
      LIBGAV1_GUARDED_BY(mutex_);
  int bitdepth_ = 0;
};

}  // namespace libgav1

#endif  // LIBGAV1_SRC_TILE_SCRATCH_BUFFER_H_