AOMedia AV1 Codec
nonrd_opt.h
1 /*
2  * Copyright (c) 2022, Alliance for Open Media. All rights reserved.
3  *
4  * This source code is subject to the terms of the BSD 2 Clause License and
5  * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
6  * was not distributed with this source code in the LICENSE file, you can
7  * obtain it at www.aomedia.org/license/software. If the Alliance for Open
8  * Media Patent License 1.0 was not distributed with this source code in the
9  * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
10  */
11 
12 #ifndef AOM_AV1_ENCODER_NONRD_OPT_H_
13 #define AOM_AV1_ENCODER_NONRD_OPT_H_
14 
15 #include "av1/encoder/context_tree.h"
16 #include "av1/encoder/rdopt_utils.h"
17 #include "av1/encoder/rdopt.h"
18 
19 #define RTC_INTER_MODES (4)
20 #define RTC_INTRA_MODES (4)
21 #define RTC_MODES (AOMMAX(RTC_INTER_MODES, RTC_INTRA_MODES))
22 #define CALC_BIASED_RDCOST(rdcost) (7 * (rdcost) >> 3)
23 #define NUM_COMP_INTER_MODES_RT (6)
24 #define NUM_INTER_MODES 12
25 #define CAP_TX_SIZE_FOR_BSIZE_GT32(tx_mode_search_type, bsize) \
26  (((tx_mode_search_type) != ONLY_4X4 && (bsize) > BLOCK_32X32) ? true : false)
27 #define TX_SIZE_FOR_BSIZE_GT32 (TX_16X16)
28 #define FILTER_SEARCH_SIZE 2
29 #if !CONFIG_REALTIME_ONLY
30 #define MOTION_MODE_SEARCH_SIZE 2
31 #endif
32 
33 extern int g_pick_inter_mode_cnt;
35 typedef struct {
36  uint8_t *data;
37  int stride;
38  int in_use;
39 } PRED_BUFFER;
40 
41 typedef struct {
42  PRED_BUFFER *best_pred;
43  PREDICTION_MODE best_mode;
44  TX_SIZE best_tx_size;
45  TX_TYPE tx_type;
46  MV_REFERENCE_FRAME best_ref_frame;
47  MV_REFERENCE_FRAME best_second_ref_frame;
48  uint8_t best_mode_skip_txfm;
49  uint8_t best_mode_initial_skip_flag;
50  int_interpfilters best_pred_filter;
51  MOTION_MODE best_motion_mode;
52  WarpedMotionParams wm_params;
53  int num_proj_ref;
54  PALETTE_MODE_INFO pmi;
55  int64_t best_sse;
56 } BEST_PICKMODE;
57 
58 typedef struct {
59  MV_REFERENCE_FRAME ref_frame;
60  PREDICTION_MODE pred_mode;
61 } REF_MODE;
62 
63 typedef struct {
64  MV_REFERENCE_FRAME ref_frame[2];
65  PREDICTION_MODE pred_mode;
66 } COMP_REF_MODE;
67 
68 struct estimate_block_intra_args {
69  AV1_COMP *cpi;
70  MACROBLOCK *x;
71  PREDICTION_MODE mode;
72  int skippable;
73  RD_STATS *rdc;
74  unsigned int best_sad;
75  bool prune_mode_based_on_sad;
76  bool prune_palette_sad;
77 };
83 typedef struct {
85  BEST_PICKMODE best_pickmode;
87  RD_STATS this_rdc;
89  RD_STATS best_rdc;
91  int64_t uv_dist[RTC_INTER_MODES][REF_FRAMES];
93  struct buf_2d yv12_mb[REF_FRAMES][MAX_MB_PLANE];
95  unsigned int vars[RTC_INTER_MODES][REF_FRAMES];
97  unsigned int ref_costs_single[REF_FRAMES];
99  int_mv frame_mv[MB_MODE_COUNT][REF_FRAMES];
101  int_mv frame_mv_best[MB_MODE_COUNT][REF_FRAMES];
103  int single_inter_mode_costs[RTC_INTER_MODES][REF_FRAMES];
105  int use_ref_frame_mask[REF_FRAMES];
107  uint8_t mode_checked[MB_MODE_COUNT][REF_FRAMES];
109  bool use_scaled_ref_frame[REF_FRAMES];
111 
112 static const uint8_t b_width_log2_lookup[BLOCK_SIZES] = { 0, 0, 1, 1, 1, 2,
113  2, 2, 3, 3, 3, 4,
114  4, 4, 5, 5 };
115 static const uint8_t b_height_log2_lookup[BLOCK_SIZES] = { 0, 1, 0, 1, 2, 1,
116  2, 3, 2, 3, 4, 3,
117  4, 5, 4, 5 };
118 
119 static const PREDICTION_MODE intra_mode_list[] = { DC_PRED, V_PRED, H_PRED,
120  SMOOTH_PRED };
121 
122 static const PREDICTION_MODE inter_mode_list[] = { NEARESTMV, NEARMV, GLOBALMV,
123  NEWMV };
124 
125 static const THR_MODES mode_idx[REF_FRAMES][RTC_MODES] = {
126  { THR_DC, THR_V_PRED, THR_H_PRED, THR_SMOOTH },
127  { THR_NEARESTMV, THR_NEARMV, THR_GLOBALMV, THR_NEWMV },
128  { THR_NEARESTL2, THR_NEARL2, THR_GLOBALL2, THR_NEWL2 },
129  { THR_NEARESTL3, THR_NEARL3, THR_GLOBALL3, THR_NEWL3 },
130  { THR_NEARESTG, THR_NEARG, THR_GLOBALG, THR_NEWG },
131  { THR_NEARESTB, THR_NEARB, THR_GLOBALB, THR_NEWB },
132  { THR_NEARESTA2, THR_NEARA2, THR_GLOBALA2, THR_NEWA2 },
133  { THR_NEARESTA, THR_NEARA, THR_GLOBALA, THR_NEWA },
134 };
135 
136 // GLOBALMV in the set below is in fact ZEROMV as we don't do global ME in RT
137 // mode
138 static const REF_MODE ref_mode_set[NUM_INTER_MODES] = {
139  { LAST_FRAME, NEARESTMV }, { LAST_FRAME, NEARMV },
140  { LAST_FRAME, GLOBALMV }, { LAST_FRAME, NEWMV },
141  { GOLDEN_FRAME, NEARESTMV }, { GOLDEN_FRAME, NEARMV },
142  { GOLDEN_FRAME, GLOBALMV }, { GOLDEN_FRAME, NEWMV },
143  { ALTREF_FRAME, NEARESTMV }, { ALTREF_FRAME, NEARMV },
144  { ALTREF_FRAME, GLOBALMV }, { ALTREF_FRAME, NEWMV },
145 };
146 
147 static const COMP_REF_MODE comp_ref_mode_set[NUM_COMP_INTER_MODES_RT] = {
148  { { LAST_FRAME, GOLDEN_FRAME }, GLOBAL_GLOBALMV },
149  { { LAST_FRAME, GOLDEN_FRAME }, NEAREST_NEARESTMV },
150  { { LAST_FRAME, LAST2_FRAME }, GLOBAL_GLOBALMV },
151  { { LAST_FRAME, LAST2_FRAME }, NEAREST_NEARESTMV },
152  { { LAST_FRAME, ALTREF_FRAME }, GLOBAL_GLOBALMV },
153  { { LAST_FRAME, ALTREF_FRAME }, NEAREST_NEARESTMV },
154 };
155 
156 static const int_interpfilters filters_ref_set[9] = {
157  [0].as_filters = { EIGHTTAP_REGULAR, EIGHTTAP_REGULAR },
158  [1].as_filters = { EIGHTTAP_SMOOTH, EIGHTTAP_SMOOTH },
159  [2].as_filters = { EIGHTTAP_REGULAR, EIGHTTAP_SMOOTH },
160  [3].as_filters = { EIGHTTAP_SMOOTH, EIGHTTAP_REGULAR },
161  [4].as_filters = { MULTITAP_SHARP, MULTITAP_SHARP },
162  [5].as_filters = { EIGHTTAP_REGULAR, MULTITAP_SHARP },
163  [6].as_filters = { MULTITAP_SHARP, EIGHTTAP_REGULAR },
164  [7].as_filters = { EIGHTTAP_SMOOTH, MULTITAP_SHARP },
165  [8].as_filters = { MULTITAP_SHARP, EIGHTTAP_SMOOTH }
166 };
167 
168 enum {
169  // INTER_ALL = (1 << NEARESTMV) | (1 << NEARMV) | (1 << NEWMV),
170  INTER_NEAREST = (1 << NEARESTMV),
171  INTER_NEAREST_NEW = (1 << NEARESTMV) | (1 << NEWMV),
172  INTER_NEAREST_NEAR = (1 << NEARESTMV) | (1 << NEARMV),
173  INTER_NEAR_NEW = (1 << NEARMV) | (1 << NEWMV),
174 };
175 
176 // The original scan order (default_scan_8x8) is modified according to the extra
177 // transpose in hadamard c implementation, i.e., aom_hadamard_lp_8x8_c and
178 // aom_hadamard_8x8_c.
179 DECLARE_ALIGNED(16, static const int16_t, default_scan_8x8_transpose[64]) = {
180  0, 8, 1, 2, 9, 16, 24, 17, 10, 3, 4, 11, 18, 25, 32, 40,
181  33, 26, 19, 12, 5, 6, 13, 20, 27, 34, 41, 48, 56, 49, 42, 35,
182  28, 21, 14, 7, 15, 22, 29, 36, 43, 50, 57, 58, 51, 44, 37, 30,
183  23, 31, 38, 45, 52, 59, 60, 53, 46, 39, 47, 54, 61, 62, 55, 63
184 };
185 
186 // The original scan order (av1_default_iscan_8x8) is modified to match
187 // hadamard AVX2 implementation, i.e., aom_hadamard_lp_8x8_avx2 and
188 // aom_hadamard_8x8_avx2. Since hadamard AVX2 implementation will modify the
189 // order of coefficients, such that the normal scan order is no longer
190 // guaranteed to scan low coefficients first, therefore we modify the scan order
191 // accordingly.
192 // Note that this one has to be used together with default_scan_8x8_transpose.
193 DECLARE_ALIGNED(16, static const int16_t,
194  av1_default_iscan_8x8_transpose[64]) = {
195  0, 2, 3, 9, 10, 20, 21, 35, 1, 4, 8, 11, 19, 22, 34, 36,
196  5, 7, 12, 18, 23, 33, 37, 48, 6, 13, 17, 24, 32, 38, 47, 49,
197  14, 16, 25, 31, 39, 46, 50, 57, 15, 26, 30, 40, 45, 51, 56, 58,
198  27, 29, 41, 44, 52, 55, 59, 62, 28, 42, 43, 53, 54, 60, 61, 63
199 };
200 
201 // The original scan order (default_scan_16x16) is modified according to the
202 // extra transpose in hadamard c implementation in lp case, i.e.,
203 // aom_hadamard_lp_16x16_c.
204 DECLARE_ALIGNED(16, static const int16_t,
205  default_scan_lp_16x16_transpose[256]) = {
206  0, 8, 2, 4, 10, 16, 24, 18, 12, 6, 64, 14, 20, 26, 32,
207  40, 34, 28, 22, 72, 66, 68, 74, 80, 30, 36, 42, 48, 56, 50,
208  44, 38, 88, 82, 76, 70, 128, 78, 84, 90, 96, 46, 52, 58, 1,
209  9, 3, 60, 54, 104, 98, 92, 86, 136, 130, 132, 138, 144, 94, 100,
210  106, 112, 62, 5, 11, 17, 25, 19, 13, 7, 120, 114, 108, 102, 152,
211  146, 140, 134, 192, 142, 148, 154, 160, 110, 116, 122, 65, 15, 21, 27,
212  33, 41, 35, 29, 23, 73, 67, 124, 118, 168, 162, 156, 150, 200, 194,
213  196, 202, 208, 158, 164, 170, 176, 126, 69, 75, 81, 31, 37, 43, 49,
214  57, 51, 45, 39, 89, 83, 77, 71, 184, 178, 172, 166, 216, 210, 204,
215  198, 206, 212, 218, 224, 174, 180, 186, 129, 79, 85, 91, 97, 47, 53,
216  59, 61, 55, 105, 99, 93, 87, 137, 131, 188, 182, 232, 226, 220, 214,
217  222, 228, 234, 240, 190, 133, 139, 145, 95, 101, 107, 113, 63, 121, 115,
218  109, 103, 153, 147, 141, 135, 248, 242, 236, 230, 238, 244, 250, 193, 143,
219  149, 155, 161, 111, 117, 123, 125, 119, 169, 163, 157, 151, 201, 195, 252,
220  246, 254, 197, 203, 209, 159, 165, 171, 177, 127, 185, 179, 173, 167, 217,
221  211, 205, 199, 207, 213, 219, 225, 175, 181, 187, 189, 183, 233, 227, 221,
222  215, 223, 229, 235, 241, 191, 249, 243, 237, 231, 239, 245, 251, 253, 247,
223  255
224 };
225 
226 #if CONFIG_AV1_HIGHBITDEPTH
227 // The original scan order (default_scan_16x16) is modified according to the
228 // extra shift in hadamard c implementation in fp case, i.e.,
229 // aom_hadamard_16x16_c. Note that 16x16 lp and fp hadamard generate different
230 // outputs, so we handle them separately.
231 DECLARE_ALIGNED(16, static const int16_t,
232  default_scan_fp_16x16_transpose[256]) = {
233  0, 4, 2, 8, 6, 16, 20, 18, 12, 10, 64, 14, 24, 22, 32,
234  36, 34, 28, 26, 68, 66, 72, 70, 80, 30, 40, 38, 48, 52, 50,
235  44, 42, 84, 82, 76, 74, 128, 78, 88, 86, 96, 46, 56, 54, 1,
236  5, 3, 60, 58, 100, 98, 92, 90, 132, 130, 136, 134, 144, 94, 104,
237  102, 112, 62, 9, 7, 17, 21, 19, 13, 11, 116, 114, 108, 106, 148,
238  146, 140, 138, 192, 142, 152, 150, 160, 110, 120, 118, 65, 15, 25, 23,
239  33, 37, 35, 29, 27, 69, 67, 124, 122, 164, 162, 156, 154, 196, 194,
240  200, 198, 208, 158, 168, 166, 176, 126, 73, 71, 81, 31, 41, 39, 49,
241  53, 51, 45, 43, 85, 83, 77, 75, 180, 178, 172, 170, 212, 210, 204,
242  202, 206, 216, 214, 224, 174, 184, 182, 129, 79, 89, 87, 97, 47, 57,
243  55, 61, 59, 101, 99, 93, 91, 133, 131, 188, 186, 228, 226, 220, 218,
244  222, 232, 230, 240, 190, 137, 135, 145, 95, 105, 103, 113, 63, 117, 115,
245  109, 107, 149, 147, 141, 139, 244, 242, 236, 234, 238, 248, 246, 193, 143,
246  153, 151, 161, 111, 121, 119, 125, 123, 165, 163, 157, 155, 197, 195, 252,
247  250, 254, 201, 199, 209, 159, 169, 167, 177, 127, 181, 179, 173, 171, 213,
248  211, 205, 203, 207, 217, 215, 225, 175, 185, 183, 189, 187, 229, 227, 221,
249  219, 223, 233, 231, 241, 191, 245, 243, 237, 235, 239, 249, 247, 253, 251,
250  255
251 };
252 #endif
253 
254 // The original scan order (av1_default_iscan_16x16) is modified to match
255 // hadamard AVX2 implementation, i.e., aom_hadamard_lp_16x16_avx2.
256 // Since hadamard AVX2 implementation will modify the order of coefficients,
257 // such that the normal scan order is no longer guaranteed to scan low
258 // coefficients first, therefore we modify the scan order accordingly. Note that
259 // this one has to be used together with default_scan_lp_16x16_transpose.
260 DECLARE_ALIGNED(16, static const int16_t,
261  av1_default_iscan_lp_16x16_transpose[256]) = {
262  0, 44, 2, 46, 3, 63, 9, 69, 1, 45, 4, 64, 8, 68, 11,
263  87, 5, 65, 7, 67, 12, 88, 18, 94, 6, 66, 13, 89, 17, 93,
264  24, 116, 14, 90, 16, 92, 25, 117, 31, 123, 15, 91, 26, 118, 30,
265  122, 41, 148, 27, 119, 29, 121, 42, 149, 48, 152, 28, 120, 43, 150,
266  47, 151, 62, 177, 10, 86, 20, 96, 21, 113, 35, 127, 19, 95, 22,
267  114, 34, 126, 37, 144, 23, 115, 33, 125, 38, 145, 52, 156, 32, 124,
268  39, 146, 51, 155, 58, 173, 40, 147, 50, 154, 59, 174, 73, 181, 49,
269  153, 60, 175, 72, 180, 83, 198, 61, 176, 71, 179, 84, 199, 98, 202,
270  70, 178, 85, 200, 97, 201, 112, 219, 36, 143, 54, 158, 55, 170, 77,
271  185, 53, 157, 56, 171, 76, 184, 79, 194, 57, 172, 75, 183, 80, 195,
272  102, 206, 74, 182, 81, 196, 101, 205, 108, 215, 82, 197, 100, 204, 109,
273  216, 131, 223, 99, 203, 110, 217, 130, 222, 140, 232, 111, 218, 129, 221,
274  141, 233, 160, 236, 128, 220, 142, 234, 159, 235, 169, 245, 78, 193, 104,
275  208, 105, 212, 135, 227, 103, 207, 106, 213, 134, 226, 136, 228, 107, 214,
276  133, 225, 137, 229, 164, 240, 132, 224, 138, 230, 163, 239, 165, 241, 139,
277  231, 162, 238, 166, 242, 189, 249, 161, 237, 167, 243, 188, 248, 190, 250,
278  168, 244, 187, 247, 191, 251, 210, 254, 186, 246, 192, 252, 209, 253, 211,
279  255
280 };
281 
282 #if CONFIG_AV1_HIGHBITDEPTH
283 // The original scan order (av1_default_iscan_16x16) is modified to match
284 // hadamard AVX2 implementation, i.e., aom_hadamard_16x16_avx2.
285 // Since hadamard AVX2 implementation will modify the order of coefficients,
286 // such that the normal scan order is no longer guaranteed to scan low
287 // coefficients first, therefore we modify the scan order accordingly. Note that
288 // this one has to be used together with default_scan_fp_16x16_transpose.
289 DECLARE_ALIGNED(16, static const int16_t,
290  av1_default_iscan_fp_16x16_transpose[256]) = {
291  0, 44, 2, 46, 1, 45, 4, 64, 3, 63, 9, 69, 8, 68, 11,
292  87, 5, 65, 7, 67, 6, 66, 13, 89, 12, 88, 18, 94, 17, 93,
293  24, 116, 14, 90, 16, 92, 15, 91, 26, 118, 25, 117, 31, 123, 30,
294  122, 41, 148, 27, 119, 29, 121, 28, 120, 43, 150, 42, 149, 48, 152,
295  47, 151, 62, 177, 10, 86, 20, 96, 19, 95, 22, 114, 21, 113, 35,
296  127, 34, 126, 37, 144, 23, 115, 33, 125, 32, 124, 39, 146, 38, 145,
297  52, 156, 51, 155, 58, 173, 40, 147, 50, 154, 49, 153, 60, 175, 59,
298  174, 73, 181, 72, 180, 83, 198, 61, 176, 71, 179, 70, 178, 85, 200,
299  84, 199, 98, 202, 97, 201, 112, 219, 36, 143, 54, 158, 53, 157, 56,
300  171, 55, 170, 77, 185, 76, 184, 79, 194, 57, 172, 75, 183, 74, 182,
301  81, 196, 80, 195, 102, 206, 101, 205, 108, 215, 82, 197, 100, 204, 99,
302  203, 110, 217, 109, 216, 131, 223, 130, 222, 140, 232, 111, 218, 129, 221,
303  128, 220, 142, 234, 141, 233, 160, 236, 159, 235, 169, 245, 78, 193, 104,
304  208, 103, 207, 106, 213, 105, 212, 135, 227, 134, 226, 136, 228, 107, 214,
305  133, 225, 132, 224, 138, 230, 137, 229, 164, 240, 163, 239, 165, 241, 139,
306  231, 162, 238, 161, 237, 167, 243, 166, 242, 189, 249, 188, 248, 190, 250,
307  168, 244, 187, 247, 186, 246, 192, 252, 191, 251, 210, 254, 209, 253, 211,
308  255
309 };
310 #endif
311 
312 // For entropy coding, IDTX shares the scan orders of the other 2D-transforms,
313 // but the fastest way to calculate the IDTX transform (i.e. no transposes)
314 // results in coefficients that are a transposition of the entropy coding
315 // versions. These tables are used as substitute for the scan order for the
316 // faster version of IDTX.
317 
318 // Must be used together with av1_fast_idtx_iscan_4x4
319 DECLARE_ALIGNED(16, static const int16_t,
320  av1_fast_idtx_scan_4x4[16]) = { 0, 1, 4, 8, 5, 2, 3, 6,
321  9, 12, 13, 10, 7, 11, 14, 15 };
322 
323 // Must be used together with av1_fast_idtx_scan_4x4
324 DECLARE_ALIGNED(16, static const int16_t,
325  av1_fast_idtx_iscan_4x4[16]) = { 0, 1, 5, 6, 2, 4, 7, 12,
326  3, 8, 11, 13, 9, 10, 14, 15 };
327 
328 static const SCAN_ORDER av1_fast_idtx_scan_order_4x4 = {
329  av1_fast_idtx_scan_4x4, av1_fast_idtx_iscan_4x4
330 };
331 
332 // Must be used together with av1_fast_idtx_iscan_8x8
333 DECLARE_ALIGNED(16, static const int16_t, av1_fast_idtx_scan_8x8[64]) = {
334  0, 1, 8, 16, 9, 2, 3, 10, 17, 24, 32, 25, 18, 11, 4, 5,
335  12, 19, 26, 33, 40, 48, 41, 34, 27, 20, 13, 6, 7, 14, 21, 28,
336  35, 42, 49, 56, 57, 50, 43, 36, 29, 22, 15, 23, 30, 37, 44, 51,
337  58, 59, 52, 45, 38, 31, 39, 46, 53, 60, 61, 54, 47, 55, 62, 63
338 };
339 
340 // Must be used together with av1_fast_idtx_scan_8x8
341 DECLARE_ALIGNED(16, static const int16_t, av1_fast_idtx_iscan_8x8[64]) = {
342  0, 1, 5, 6, 14, 15, 27, 28, 2, 4, 7, 13, 16, 26, 29, 42,
343  3, 8, 12, 17, 25, 30, 41, 43, 9, 11, 18, 24, 31, 40, 44, 53,
344  10, 19, 23, 32, 39, 45, 52, 54, 20, 22, 33, 38, 46, 51, 55, 60,
345  21, 34, 37, 47, 50, 56, 59, 61, 35, 36, 48, 49, 57, 58, 62, 63
346 };
347 
348 static const SCAN_ORDER av1_fast_idtx_scan_order_8x8 = {
349  av1_fast_idtx_scan_8x8, av1_fast_idtx_iscan_8x8
350 };
351 
352 // Must be used together with av1_fast_idtx_iscan_16x16
353 DECLARE_ALIGNED(16, static const int16_t, av1_fast_idtx_scan_16x16[256]) = {
354  0, 1, 16, 32, 17, 2, 3, 18, 33, 48, 64, 49, 34, 19, 4,
355  5, 20, 35, 50, 65, 80, 96, 81, 66, 51, 36, 21, 6, 7, 22,
356  37, 52, 67, 82, 97, 112, 128, 113, 98, 83, 68, 53, 38, 23, 8,
357  9, 24, 39, 54, 69, 84, 99, 114, 129, 144, 160, 145, 130, 115, 100,
358  85, 70, 55, 40, 25, 10, 11, 26, 41, 56, 71, 86, 101, 116, 131,
359  146, 161, 176, 192, 177, 162, 147, 132, 117, 102, 87, 72, 57, 42, 27,
360  12, 13, 28, 43, 58, 73, 88, 103, 118, 133, 148, 163, 178, 193, 208,
361  224, 209, 194, 179, 164, 149, 134, 119, 104, 89, 74, 59, 44, 29, 14,
362  15, 30, 45, 60, 75, 90, 105, 120, 135, 150, 165, 180, 195, 210, 225,
363  240, 241, 226, 211, 196, 181, 166, 151, 136, 121, 106, 91, 76, 61, 46,
364  31, 47, 62, 77, 92, 107, 122, 137, 152, 167, 182, 197, 212, 227, 242,
365  243, 228, 213, 198, 183, 168, 153, 138, 123, 108, 93, 78, 63, 79, 94,
366  109, 124, 139, 154, 169, 184, 199, 214, 229, 244, 245, 230, 215, 200, 185,
367  170, 155, 140, 125, 110, 95, 111, 126, 141, 156, 171, 186, 201, 216, 231,
368  246, 247, 232, 217, 202, 187, 172, 157, 142, 127, 143, 158, 173, 188, 203,
369  218, 233, 248, 249, 234, 219, 204, 189, 174, 159, 175, 190, 205, 220, 235,
370  250, 251, 236, 221, 206, 191, 207, 222, 237, 252, 253, 238, 223, 239, 254,
371  255
372 };
373 
374 // Must be used together with av1_fast_idtx_scan_16x16
375 DECLARE_ALIGNED(16, static const int16_t, av1_fast_idtx_iscan_16x16[256]) = {
376  0, 1, 5, 6, 14, 15, 27, 28, 44, 45, 65, 66, 90, 91, 119,
377  120, 2, 4, 7, 13, 16, 26, 29, 43, 46, 64, 67, 89, 92, 118,
378  121, 150, 3, 8, 12, 17, 25, 30, 42, 47, 63, 68, 88, 93, 117,
379  122, 149, 151, 9, 11, 18, 24, 31, 41, 48, 62, 69, 87, 94, 116,
380  123, 148, 152, 177, 10, 19, 23, 32, 40, 49, 61, 70, 86, 95, 115,
381  124, 147, 153, 176, 178, 20, 22, 33, 39, 50, 60, 71, 85, 96, 114,
382  125, 146, 154, 175, 179, 200, 21, 34, 38, 51, 59, 72, 84, 97, 113,
383  126, 145, 155, 174, 180, 199, 201, 35, 37, 52, 58, 73, 83, 98, 112,
384  127, 144, 156, 173, 181, 198, 202, 219, 36, 53, 57, 74, 82, 99, 111,
385  128, 143, 157, 172, 182, 197, 203, 218, 220, 54, 56, 75, 81, 100, 110,
386  129, 142, 158, 171, 183, 196, 204, 217, 221, 234, 55, 76, 80, 101, 109,
387  130, 141, 159, 170, 184, 195, 205, 216, 222, 233, 235, 77, 79, 102, 108,
388  131, 140, 160, 169, 185, 194, 206, 215, 223, 232, 236, 245, 78, 103, 107,
389  132, 139, 161, 168, 186, 193, 207, 214, 224, 231, 237, 244, 246, 104, 106,
390  133, 138, 162, 167, 187, 192, 208, 213, 225, 230, 238, 243, 247, 252, 105,
391  134, 137, 163, 166, 188, 191, 209, 212, 226, 229, 239, 242, 248, 251, 253,
392  135, 136, 164, 165, 189, 190, 210, 211, 227, 228, 240, 241, 249, 250, 254,
393  255
394 };
395 
396 // Indicates the blocks for which RD model should be based on special logic
397 static inline int get_model_rd_flag(const AV1_COMP *cpi, const MACROBLOCKD *xd,
398  BLOCK_SIZE bsize) {
399  const AV1_COMMON *const cm = &cpi->common;
400  const int large_block = bsize >= BLOCK_32X32;
401  // Only enable for low bitdepth to mitigate issue: b/303023614.
402  return cpi->oxcf.rc_cfg.mode == AOM_CBR && large_block &&
403  !cyclic_refresh_segment_id_boosted(xd->mi[0]->segment_id) &&
404  cm->quant_params.base_qindex && !cpi->oxcf.use_highbitdepth;
405 }
431 static inline void find_predictors(
432  AV1_COMP *cpi, MACROBLOCK *x, MV_REFERENCE_FRAME ref_frame,
433  int_mv frame_mv[MB_MODE_COUNT][REF_FRAMES],
434  struct buf_2d yv12_mb[8][MAX_MB_PLANE], BLOCK_SIZE bsize,
435  int force_skip_low_temp_var, int skip_pred_mv, bool *use_scaled_ref_frame) {
436  AV1_COMMON *const cm = &cpi->common;
437  MACROBLOCKD *const xd = &x->e_mbd;
438  MB_MODE_INFO *const mbmi = xd->mi[0];
439  MB_MODE_INFO_EXT *const mbmi_ext = &x->mbmi_ext;
440  const YV12_BUFFER_CONFIG *ref = get_ref_frame_yv12_buf(cm, ref_frame);
441  const bool ref_is_scaled =
442  ref->y_crop_height != cm->height || ref->y_crop_width != cm->width;
443  const YV12_BUFFER_CONFIG *scaled_ref =
444  av1_get_scaled_ref_frame(cpi, ref_frame);
445  const YV12_BUFFER_CONFIG *yv12 =
446  ref_is_scaled && scaled_ref ? scaled_ref : ref;
447  const int num_planes = av1_num_planes(cm);
448  x->pred_mv_sad[ref_frame] = INT_MAX;
449  x->pred_mv0_sad[ref_frame] = INT_MAX;
450  x->pred_mv1_sad[ref_frame] = INT_MAX;
451  frame_mv[NEWMV][ref_frame].as_int = INVALID_MV;
452  // TODO(kyslov) this needs various further optimizations. to be continued..
453  assert(yv12 != NULL);
454  if (yv12 != NULL) {
455  struct scale_factors *const sf =
456  scaled_ref ? NULL : get_ref_scale_factors(cm, ref_frame);
457  av1_setup_pred_block(xd, yv12_mb[ref_frame], yv12, sf, sf, num_planes);
458  av1_find_mv_refs(cm, xd, mbmi, ref_frame, mbmi_ext->ref_mv_count,
459  xd->ref_mv_stack, xd->weight, NULL, mbmi_ext->global_mvs,
460  mbmi_ext->mode_context);
461  // TODO(Ravi): Populate mbmi_ext->ref_mv_stack[ref_frame][4] and
462  // mbmi_ext->weight[ref_frame][4] inside av1_find_mv_refs.
463  av1_copy_usable_ref_mv_stack_and_weight(xd, mbmi_ext, ref_frame);
464  av1_find_best_ref_mvs_from_stack(
465  cm->features.allow_high_precision_mv, mbmi_ext, ref_frame,
466  &frame_mv[NEARESTMV][ref_frame], &frame_mv[NEARMV][ref_frame], 0);
467  frame_mv[GLOBALMV][ref_frame] = mbmi_ext->global_mvs[ref_frame];
468  // Early exit for non-LAST frame if force_skip_low_temp_var is set.
469  if (!ref_is_scaled && bsize >= BLOCK_8X8 && !skip_pred_mv &&
470  !(force_skip_low_temp_var && ref_frame != LAST_FRAME)) {
471  av1_mv_pred(cpi, x, yv12_mb[ref_frame][0].buf, yv12->y_stride, ref_frame,
472  bsize);
473  }
474  }
476  av1_count_overlappable_neighbors(cm, xd);
477  }
478  mbmi->num_proj_ref = 1;
479  *use_scaled_ref_frame = ref_is_scaled && scaled_ref;
480 }
481 
482 static inline void init_mbmi_nonrd(MB_MODE_INFO *mbmi,
483  PREDICTION_MODE pred_mode,
484  MV_REFERENCE_FRAME ref_frame0,
485  MV_REFERENCE_FRAME ref_frame1,
486  const AV1_COMMON *cm) {
487  PALETTE_MODE_INFO *const pmi = &mbmi->palette_mode_info;
488  mbmi->ref_mv_idx = 0;
489  mbmi->mode = pred_mode;
490  mbmi->uv_mode = UV_DC_PRED;
491  mbmi->ref_frame[0] = ref_frame0;
492  mbmi->ref_frame[1] = ref_frame1;
493  pmi->palette_size[PLANE_TYPE_Y] = 0;
494  pmi->palette_size[PLANE_TYPE_UV] = 0;
495  mbmi->filter_intra_mode_info.use_filter_intra = 0;
496  mbmi->mv[0].as_int = mbmi->mv[1].as_int = 0;
497  mbmi->motion_mode = SIMPLE_TRANSLATION;
498  mbmi->num_proj_ref = 1;
499  mbmi->interintra_mode = 0;
500  set_default_interp_filters(mbmi, cm->features.interp_filter);
501 }
502 
503 static inline void init_estimate_block_intra_args(
504  struct estimate_block_intra_args *args, AV1_COMP *cpi, MACROBLOCK *x) {
505  args->cpi = cpi;
506  args->x = x;
507  args->mode = DC_PRED;
508  args->skippable = 1;
509  args->rdc = 0;
510  args->best_sad = UINT_MAX;
511  args->prune_mode_based_on_sad = false;
512  args->prune_palette_sad = false;
513 }
514 
515 static inline int get_pred_buffer(PRED_BUFFER *p, int len) {
516  for (int buf_idx = 0; buf_idx < len; buf_idx++) {
517  if (!p[buf_idx].in_use) {
518  p[buf_idx].in_use = 1;
519  return buf_idx;
520  }
521  }
522  return -1;
523 }
524 
525 static inline bool prune_palette_testing_inter(AV1_COMP *cpi,
526  unsigned int source_variance) {
527  return (cpi->oxcf.tune_cfg.content == AOM_CONTENT_SCREEN &&
528  cpi->oxcf.speed >= 11 && cpi->rc.high_source_sad &&
529  cpi->sf.rt_sf.rc_compute_spatial_var_sc &&
530  cpi->rc.frame_spatial_variance < 1200 &&
531  cpi->rc.perc_spatial_flat_blocks < 5 &&
532  cpi->rc.percent_blocks_with_motion > 98 && source_variance < 4000);
533 }
534 
535 static inline void free_pred_buffer(PRED_BUFFER *p) {
536  if (p != NULL) p->in_use = 0;
537 }
538 
539 #if CONFIG_INTERNAL_STATS
540 static inline void store_coding_context_nonrd(MACROBLOCK *x,
541  PICK_MODE_CONTEXT *ctx,
542  int mode_index) {
543 #else
544 static inline void store_coding_context_nonrd(MACROBLOCK *x,
545  PICK_MODE_CONTEXT *ctx) {
546 #endif // CONFIG_INTERNAL_STATS
547  MACROBLOCKD *const xd = &x->e_mbd;
548  TxfmSearchInfo *txfm_info = &x->txfm_search_info;
549 
550  // Take a snapshot of the coding context so it can be
551  // restored if we decide to encode this way
552  ctx->rd_stats.skip_txfm = txfm_info->skip_txfm;
553 
554  ctx->skippable = txfm_info->skip_txfm;
555 #if CONFIG_INTERNAL_STATS
556  ctx->best_mode_index = mode_index;
557 #endif // CONFIG_INTERNAL_STATS
558  ctx->mic = *xd->mi[0];
559  ctx->skippable = txfm_info->skip_txfm;
560  av1_copy_mbmi_ext_to_mbmi_ext_frame(&ctx->mbmi_ext_best, &x->mbmi_ext,
561  av1_ref_frame_type(xd->mi[0]->ref_frame));
562 }
563 
564 void av1_block_yrd(MACROBLOCK *x, RD_STATS *this_rdc, int *skippable,
565  BLOCK_SIZE bsize, TX_SIZE tx_size);
566 
567 void av1_block_yrd_idtx(MACROBLOCK *x, const uint8_t *const pred_buf,
568  int pred_stride, RD_STATS *this_rdc, int *skippable,
569  BLOCK_SIZE bsize, TX_SIZE tx_size);
570 
571 int64_t av1_model_rd_for_sb_uv(AV1_COMP *cpi, BLOCK_SIZE plane_bsize,
572  MACROBLOCK *x, MACROBLOCKD *xd,
573  RD_STATS *this_rdc, int start_plane,
574  int stop_plane);
575 
576 void av1_estimate_block_intra(int plane, int block, int row, int col,
577  BLOCK_SIZE plane_bsize, TX_SIZE tx_size,
578  void *arg);
579 
580 void av1_estimate_intra_mode(AV1_COMP *cpi, MACROBLOCK *x, BLOCK_SIZE bsize,
581  int best_early_term, unsigned int ref_cost_intra,
582  int reuse_prediction, struct buf_2d *orig_dst,
583  PRED_BUFFER *tmp_buffers,
584  PRED_BUFFER **this_mode_pred, RD_STATS *best_rdc,
585  BEST_PICKMODE *best_pickmode,
586  PICK_MODE_CONTEXT *ctx,
587  unsigned int *best_sad_norm);
588 
589 #endif // AOM_AV1_ENCODER_NONRD_OPT_H_
@ AOM_CBR
Definition: aom_encoder.h:187
static void find_predictors(AV1_COMP *cpi, MACROBLOCK *x, MV_REFERENCE_FRAME ref_frame, int_mv frame_mv[MB_MODE_COUNT][REF_FRAMES], struct buf_2d yv12_mb[8][3], BLOCK_SIZE bsize, int force_skip_low_temp_var, int skip_pred_mv, bool *use_scaled_ref_frame)
Finds predicted motion vectors for a block.
Definition: nonrd_opt.h:431
Top level common structure used by both encoder and decoder.
Definition: av1_common_int.h:757
int width
Definition: av1_common_int.h:782
FeatureFlags features
Definition: av1_common_int.h:912
CommonQuantParams quant_params
Definition: av1_common_int.h:929
int height
Definition: av1_common_int.h:783
RateControlCfg rc_cfg
Definition: encoder.h:955
Top level encoder structure.
Definition: encoder.h:2878
RATE_CONTROL rc
Definition: encoder.h:3085
SPEED_FEATURES sf
Definition: encoder.h:3105
AV1EncoderConfig oxcf
Definition: encoder.h:2926
AV1_COMMON common
Definition: encoder.h:2921
int base_qindex
Definition: av1_common_int.h:620
InterpFilter interp_filter
Definition: av1_common_int.h:414
bool switchable_motion_mode
Definition: av1_common_int.h:412
bool allow_high_precision_mv
Definition: av1_common_int.h:374
Structure to store parameters and statistics used in non-rd inter mode evaluation.
Definition: nonrd_opt.h:83
RD_STATS this_rdc
Structure to RD cost of current mode.
Definition: nonrd_opt.h:87
RD_STATS best_rdc
Pointer to the RD Cost for the best mode found so far.
Definition: nonrd_opt.h:89
BEST_PICKMODE best_pickmode
Structure to hold best inter mode data.
Definition: nonrd_opt.h:85
Extended mode info derived from mbmi.
Definition: block.h:222
int_mv global_mvs[REF_FRAMES]
Global mvs.
Definition: block.h:231
int16_t mode_context[MODE_CTX_REF_FRAMES]
Context used to encode the current mode.
Definition: block.h:233
uint8_t ref_mv_count[MODE_CTX_REF_FRAMES]
Number of ref mvs in the drl.
Definition: block.h:229
Stores the prediction/txfm mode of the current coding block.
Definition: blockd.h:222
int_mv mv[2]
The motion vectors used by the current inter mode.
Definition: blockd.h:244
PREDICTION_MODE mode
The prediction mode used.
Definition: blockd.h:232
UV_PREDICTION_MODE uv_mode
The UV mode when intra is used.
Definition: blockd.h:234
PALETTE_MODE_INFO palette_mode_info
Stores the size and colors of palette mode.
Definition: blockd.h:280
uint8_t segment_id
The segment id.
Definition: blockd.h:310
uint8_t ref_mv_idx
Which ref_mv to use.
Definition: blockd.h:314
MV_REFERENCE_FRAME ref_frame[2]
The reference frames for the MV.
Definition: blockd.h:246
FILTER_INTRA_MODE_INFO filter_intra_mode_info
The type of filter intra mode used (if applicable).
Definition: blockd.h:274
MOTION_MODE motion_mode
The motion mode used by the inter prediction.
Definition: blockd.h:250
uint8_t num_proj_ref
Number of samples used by warp causal.
Definition: blockd.h:252
INTERINTRA_MODE interintra_mode
The type of intra mode used by inter-intra.
Definition: blockd.h:259
enum aom_rc_mode mode
Definition: encoder.h:609
REAL_TIME_SPEED_FEATURES rt_sf
Definition: speed_features.h:2008
Stores various encoding/search decisions related to txfm search.
Definition: block.h:526
uint8_t skip_txfm
Whether to skip transform and quantization on a partition block level.
Definition: block.h:528
Encoder's parameters related to the current coding block.
Definition: block.h:878
MACROBLOCKD e_mbd
Decoder's view of current coding block.
Definition: block.h:896
int pred_mv1_sad[REF_FRAMES]
The sad of the 2nd mv ref (near).
Definition: block.h:1115
int pred_mv0_sad[REF_FRAMES]
The sad of the 1st mv ref (nearest).
Definition: block.h:1113
TxfmSearchInfo txfm_search_info
Results of the txfm searches that have been done.
Definition: block.h:1311
int pred_mv_sad[REF_FRAMES]
Sum absolute distortion of the predicted mv for each ref frame.
Definition: block.h:1105
MB_MODE_INFO_EXT mbmi_ext
Derived coding information.
Definition: block.h:903
Variables related to current coding block.
Definition: blockd.h:570
uint16_t weight[MODE_CTX_REF_FRAMES][MAX_REF_MV_STACK_SIZE]
Definition: blockd.h:781
CANDIDATE_MV ref_mv_stack[MODE_CTX_REF_FRAMES][MAX_REF_MV_STACK_SIZE]
Definition: blockd.h:776
MB_MODE_INFO ** mi
Definition: blockd.h:617
YV12 frame buffer data structure.
Definition: yv12config.h:46