stereosgbm_mod.cpp

Stefan Hahn, 2012-07-26 08:53 am

Download (47.1 kB)

 
1
/*M///////////////////////////////////////////////////////////////////////////////////////
2
//
3
//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
4
//
5
//  By downloading, copying, installing or using the software you agree to this license.
6
//  If you do not agree to this license, do not download, install,
7
//  copy or use the software.
8
//
9
//
10
//                           License Agreement
11
//                For Open Source Computer Vision Library
12
//
13
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
14
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
15
// Third party copyrights are property of their respective owners.
16
//
17
// Redistribution and use in source and binary forms, with or without modification,
18
// are permitted provided that the following conditions are met:
19
//
20
//   * Redistribution's of source code must retain the above copyright notice,
21
//     this list of conditions and the following disclaimer.
22
//
23
//   * Redistribution's in binary form must reproduce the above copyright notice,
24
//     this list of conditions and the following disclaimer in the documentation
25
//     and/or other materials provided with the distribution.
26
//
27
//   * The name of the copyright holders may not be used to endorse or promote products
28
//     derived from this software without specific prior written permission.
29
//
30
// This software is provided by the copyright holders and contributors "as is" and
31
// any express or implied warranties, including, but not limited to, the implied
32
// warranties of merchantability and fitness for a particular purpose are disclaimed.
33
// In no event shall the Intel Corporation or contributors be liable for any direct,
34
// indirect, incidental, special, exemplary, or consequential damages
35
// (including, but not limited to, procurement of substitute goods or services;
36
// loss of use, data, or profits; or business interruption) however caused
37
// and on any theory of liability, whether in contract, strict liability,
38
// or tort (including negligence or otherwise) arising in any way out of
39
// the use of this software, even if advised of the possibility of such damage.
40
//
41
//M*/
42
43
/*
44
 This is a variation of
45
 "Stereo Processing by Semiglobal Matching and Mutual Information"
46
 by Heiko Hirschmuller.
47
48
 We match blocks rather than individual pixels, thus the algorithm is called
49
 SGBM (Semi-global block matching)
50
 */
51
52
#include "precomp.hpp"
53
#include <limits.h>
54
55
namespace cv
56
{
57
58
typedef uchar PixType;
59
typedef short CostType;
60
typedef short DispType;
61
62
enum { NR = 16, NR2 = NR/2 };
63
64
StereoSGBM::StereoSGBM()
65
{
66
    minDisparity = numberOfDisparities = 0;
67
    SADWindowSize = 0;
68
    P1 = P2 = 0;
69
    disp12MaxDiff = 0;
70
    preFilterCap = 0;
71
    uniquenessRatio = 0;
72
    speckleWindowSize = 0;
73
    speckleRange = 0;
74
    fullDP = false;
75
}
76
77
78
StereoSGBM::StereoSGBM( int _minDisparity, int _numDisparities, int _SADWindowSize,
79
                   int _P1, int _P2, int _disp12MaxDiff, int _preFilterCap,
80
                   int _uniquenessRatio, int _speckleWindowSize, int _speckleRange,
81
                   bool _fullDP )
82
{
83
    minDisparity = _minDisparity;
84
    numberOfDisparities = _numDisparities;
85
    SADWindowSize = _SADWindowSize;
86
    P1 = _P1;
87
    P2 = _P2;
88
    disp12MaxDiff = _disp12MaxDiff;
89
    preFilterCap = _preFilterCap;
90
    uniquenessRatio = _uniquenessRatio;
91
    speckleWindowSize = _speckleWindowSize;
92
    speckleRange = _speckleRange;
93
    fullDP = _fullDP;
94
}
95
96
97
StereoSGBM::~StereoSGBM()
98
{
99
}
100
101
/*
102
 For each pixel row1[x], max(-maxD, 0) <= minX <= x < maxX <= width - max(0, -minD),
103
 and for each disparity minD<=d<maxD the function
104
 computes the cost (cost[(x-minX)*(maxD - minD) + (d - minD)]), depending on the difference between
105
 row1[x] and row2[x-d]. The subpixel algorithm from
106
 "Depth Discontinuities by Pixel-to-Pixel Stereo" by Stan Birchfield and C. Tomasi
107
 is used, hence the suffix BT.
108
109
 the temporary buffer should contain width2*2 elements
110
 */
111
static void calcPixelCostBT( const Mat& img1, const Mat& img2, int y,
112
                            int minD, int maxD, CostType* cost,
113
                            PixType* buf, const PixType* tab,
114
                            int tabOfs, int )
115
{
116
    int x, c, width = img1.cols, cn = img1.channels();
117
    int minX1 = max(maxD, 0), maxX1 = width + min(minD, 0);
118
    int minX2 = max(minX1 - maxD, 0), maxX2 = min(maxX1 - minD, width);
119
    int D = maxD - minD, width1 = maxX1 - minX1, width2 = maxX2 - minX2;
120
    const PixType *row1 = img1.ptr<PixType>(y), *row2 = img2.ptr<PixType>(y);
121
    PixType *buffer = buf;
122
    PixType *prow1 = buffer + width2*2, *prow2 = prow1 + width*cn*2;
123
124
    tab += tabOfs;
125
126
    for( c = 0; c < cn*2; c++ )
127
    {
128
        prow1[width*c] = prow1[width*c + width-1] =
129
        prow2[width*c] = prow2[width*c + width-1] = tab[0];
130
    }
131
132
    int n1 = y > 0 ? -(int)img1.step : 0, s1 = y < img1.rows-1 ? (int)img1.step : 0;
133
    int n2 = y > 0 ? -(int)img2.step : 0, s2 = y < img2.rows-1 ? (int)img2.step : 0;
134
135
    if( cn == 1 )
136
    {
137
        for( x = 1; x < width-1; x++ )
138
        {
139
            prow1[x] = tab[(row1[x+1] - row1[x-1])*2 + row1[x+n1+1] - row1[x+n1-1] + row1[x+s1+1] - row1[x+s1-1]];
140
            prow2[width-1-x] = tab[(row2[x+1] - row2[x-1])*2 + row2[x+n2+1] - row2[x+n2-1] + row2[x+s2+1] - row2[x+s2-1]];
141
142
            prow1[x+width] = row1[x];
143
            prow2[width-1-x+width] = row2[x];
144
        }
145
    }
146
    else
147
    {
148
        for( x = 1; x < width-1; x++ )
149
        {
150
            prow1[x] = tab[(row1[x*3+3] - row1[x*3-3])*2 + row1[x*3+n1+3] - row1[x*3+n1-3] + row1[x*3+s1+3] - row1[x*3+s1-3]];
151
            prow1[x+width] = tab[(row1[x*3+4] - row1[x*3-2])*2 + row1[x*3+n1+4] - row1[x*3+n1-2] + row1[x*3+s1+4] - row1[x*3+s1-2]];
152
            prow1[x+width*2] = tab[(row1[x*3+5] - row1[x*3-1])*2 + row1[x*3+n1+5] - row1[x*3+n1-1] + row1[x*3+s1+5] - row1[x*3+s1-1]];
153
154
            prow2[width-1-x] = tab[(row2[x*3+3] - row2[x*3-3])*2 + row2[x*3+n2+3] - row2[x*3+n2-3] + row2[x*3+s2+3] - row2[x*3+s2-3]];
155
            prow2[width-1-x+width] = tab[(row2[x*3+4] - row2[x*3-2])*2 + row2[x*3+n2+4] - row2[x*3+n2-2] + row2[x*3+s2+4] - row2[x*3+s2-2]];
156
            prow2[width-1-x+width*2] = tab[(row2[x*3+5] - row2[x*3-1])*2 + row2[x*3+n2+5] - row2[x*3+n2-1] + row2[x*3+s2+5] - row2[x*3+s2-1]];
157
158
            prow1[x+width*3] = row1[x*3];
159
            prow1[x+width*4] = row1[x*3+1];
160
            prow1[x+width*5] = row1[x*3+2];
161
162
            prow2[width-1-x+width*3] = row2[x*3];
163
            prow2[width-1-x+width*4] = row2[x*3+1];
164
            prow2[width-1-x+width*5] = row2[x*3+2];
165
        }
166
    }
167
168
    memset( cost, 0, width1*D*sizeof(cost[0]) );
169
170
    buffer -= minX2;
171
    cost -= minX1*D + minD; // simplify the cost indices inside the loop
172
173
#if CV_SSE2
174
    volatile bool useSIMD = checkHardwareSupport(CV_CPU_SSE2);
175
#endif
176
177
#if 1
178
    for( c = 0; c < cn*2; c++, prow1 += width, prow2 += width )
179
    {
180
        int diff_scale = c < cn ? 0 : 2;
181
182
        // precompute
183
        //   v0 = min(row2[x-1/2], row2[x], row2[x+1/2]) and
184
        //   v1 = max(row2[x-1/2], row2[x], row2[x+1/2]) and
185
        for( x = minX2; x < maxX2; x++ )
186
        {
187
            int v = prow2[x];
188
            int vl = x > 0 ? (v + prow2[x-1])/2 : v;
189
            int vr = x < width-1 ? (v + prow2[x+1])/2 : v;
190
            int v0 = min(vl, vr); v0 = min(v0, v);
191
            int v1 = max(vl, vr); v1 = max(v1, v);
192
            buffer[x] = (PixType)v0;
193
            buffer[x + width2] = (PixType)v1;
194
        }
195
196
        for( x = minX1; x < maxX1; x++ )
197
        {
198
            int u = prow1[x];
199
            int ul = x > 0 ? (u + prow1[x-1])/2 : u;
200
            int ur = x < width-1 ? (u + prow1[x+1])/2 : u;
201
            int u0 = min(ul, ur); u0 = min(u0, u);
202
            int u1 = max(ul, ur); u1 = max(u1, u);
203
204
        #if CV_SSE2
205
            if( useSIMD )
206
            {
207
                __m128i _u = _mm_set1_epi8((char)u), _u0 = _mm_set1_epi8((char)u0);
208
                __m128i _u1 = _mm_set1_epi8((char)u1), z = _mm_setzero_si128();
209
                __m128i ds = _mm_cvtsi32_si128(diff_scale);
210
211
                for( int d = minD; d < maxD; d += 16 )
212
                {
213
                    __m128i _v = _mm_loadu_si128((const __m128i*)(prow2 + width-x-1 + d));
214
                    __m128i _v0 = _mm_loadu_si128((const __m128i*)(buffer + width-x-1 + d));
215
                    __m128i _v1 = _mm_loadu_si128((const __m128i*)(buffer + width-x-1 + d + width2));
216
                    __m128i c0 = _mm_max_epu8(_mm_subs_epu8(_u, _v1), _mm_subs_epu8(_v0, _u));
217
                    __m128i c1 = _mm_max_epu8(_mm_subs_epu8(_v, _u1), _mm_subs_epu8(_u0, _v));
218
                    __m128i diff = _mm_min_epu8(c0, c1);
219
220
                    c0 = _mm_load_si128((__m128i*)(cost + x*D + d));
221
                    c1 = _mm_load_si128((__m128i*)(cost + x*D + d + 8));
222
223
                    _mm_store_si128((__m128i*)(cost + x*D + d), _mm_adds_epi16(c0, _mm_srl_epi16(_mm_unpacklo_epi8(diff,z), ds)));
224
                    _mm_store_si128((__m128i*)(cost + x*D + d + 8), _mm_adds_epi16(c1, _mm_srl_epi16(_mm_unpackhi_epi8(diff,z), ds)));
225
                }
226
            }
227
            else
228
        #endif
229
            {
230
                for( int d = minD; d < maxD; d++ )
231
                {
232
                    int v = prow2[width-x-1 + d];
233
                    int v0 = buffer[width-x-1 + d];
234
                    int v1 = buffer[width-x-1 + d + width2];
235
                    int c0 = max(0, u - v1); c0 = max(c0, v0 - u);
236
                    int c1 = max(0, v - u1); c1 = max(c1, u0 - v);
237
238
                    cost[x*D + d] = (CostType)(cost[x*D+d] + (min(c0, c1) >> diff_scale));
239
                }
240
            }
241
        }
242
    }
243
#else
244
    for( c = 0; c < cn*2; c++, prow1 += width, prow2 += width )
245
    {
246
        for( x = minX1; x < maxX1; x++ )
247
        {
248
            int u = prow1[x];
249
        #if CV_SSE2
250
            if( useSIMD )
251
            {
252
                __m128i _u = _mm_set1_epi8(u), z = _mm_setzero_si128();
253
254
                for( int d = minD; d < maxD; d += 16 )
255
                {
256
                    __m128i _v = _mm_loadu_si128((const __m128i*)(prow2 + width-1-x + d));
257
                    __m128i diff = _mm_adds_epu8(_mm_subs_epu8(_u,_v), _mm_subs_epu8(_v,_u));
258
                    __m128i c0 = _mm_load_si128((__m128i*)(cost + x*D + d));
259
                    __m128i c1 = _mm_load_si128((__m128i*)(cost + x*D + d + 8));
260
261
                    _mm_store_si128((__m128i*)(cost + x*D + d), _mm_adds_epi16(c0, _mm_unpacklo_epi8(diff,z)));
262
                    _mm_store_si128((__m128i*)(cost + x*D + d + 8), _mm_adds_epi16(c1, _mm_unpackhi_epi8(diff,z)));
263
                }
264
            }
265
            else
266
        #endif
267
            {
268
                for( int d = minD; d < maxD; d++ )
269
                {
270
                    int v = prow2[width-1-x + d];
271
                    cost[x*D + d] = (CostType)(cost[x*D + d] + (CostType)std::abs(u - v));
272
                }
273
            }
274
        }
275
    }
276
#endif
277
}
278
279
280
/*
281
 computes disparity for "roi" in img1 w.r.t. img2 and write it to disp1buf.
282
 that is, disp1buf(x, y)=d means that img1(x+roi.x, y+roi.y) ~ img2(x+roi.x-d, y+roi.y).
283
 minD <= d < maxD.
284
 disp2full is the reverse disparity map, that is:
285
 disp2full(x+roi.x,y+roi.y)=d means that img2(x+roi.x, y+roi.y) ~ img1(x+roi.x+d, y+roi.y)
286
287
 note that disp1buf will have the same size as the roi and
288
 disp2full will have the same size as img1 (or img2).
289
 On exit disp2buf is not the final disparity, it is an intermediate result that becomes
290
 final after all the tiles are processed.
291
292
 the disparity in disp1buf is written with sub-pixel accuracy
293
 (4 fractional bits, see CvStereoSGBM::DISP_SCALE),
294
 using quadratic interpolation, while the disparity in disp2buf
295
 is written as is, without interpolation.
296
297
 disp2cost also has the same size as img1 (or img2).
298
 It contains the minimum current cost, used to find the best disparity, corresponding to the minimal cost.
299
 */
300
static void computeDisparitySGBM( const Mat& img1, const Mat& img2,
301
                                 Mat& disp1, const StereoSGBM& params,
302
                                 Mat& buffer )
303
{
304
#if CV_SSE2
305
    static const uchar LSBTab[] =
306
    {
307
        0, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
308
        5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
309
        6, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
310
        5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
311
        7, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
312
        5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
313
        6, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
314
        5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0
315
    };
316
317
    volatile bool useSIMD = checkHardwareSupport(CV_CPU_SSE2);
318
#endif
319
320
    const int ALIGN = 16;
321
    const int DISP_SHIFT = StereoSGBM::DISP_SHIFT;
322
    const int DISP_SCALE = StereoSGBM::DISP_SCALE;
323
    const CostType MAX_COST = SHRT_MAX;
324
325
    int minD = params.minDisparity, maxD = minD + params.numberOfDisparities;
326
    Size SADWindowSize;
327
    SADWindowSize.width = SADWindowSize.height = params.SADWindowSize > 0 ? params.SADWindowSize : 5;
328
    int ftzero = max(params.preFilterCap, 15) | 1;
329
    int uniquenessRatio = params.uniquenessRatio >= 0 ? params.uniquenessRatio : 10;
330
    int disp12MaxDiff = params.disp12MaxDiff > 0 ? params.disp12MaxDiff : 1;
331
    int P1 = params.P1 > 0 ? params.P1 : 2, P2 = max(params.P2 > 0 ? params.P2 : 5, P1+1);
332
    int k, width = disp1.cols, height = disp1.rows;
333
    int minX1 = max(maxD, 0), maxX1 = width + min(minD, 0);
334
    int D = maxD - minD, width1 = maxX1 - minX1;
335
    int INVALID_DISP = minD - 1, INVALID_DISP_SCALED = INVALID_DISP*DISP_SCALE;
336
    int SW2 = SADWindowSize.width/2, SH2 = SADWindowSize.height/2;
337
    int npasses = params.fullDP ? 2 : 1;
338
    const int TAB_OFS = 256*4, TAB_SIZE = 256 + TAB_OFS*2;
339
    PixType clipTab[TAB_SIZE];
340
    int minLr0Pos = 0;
341
342
    for( k = 0; k < TAB_SIZE; k++ )
343
        clipTab[k] = (PixType)(min(max(k - TAB_OFS, -ftzero), ftzero) + ftzero);
344
345
    if( minX1 >= maxX1 )
346
    {
347
        disp1 = Scalar::all(INVALID_DISP_SCALED);
348
        return;
349
    }
350
351
    CV_Assert( D % 16 == 0 );
352
353
    // NR - the number of directions. the loop on x below that computes Lr assumes that NR == 8.
354
    // if you change NR, please, modify the loop as well.
355
    int D2 = D+16, NRD2 = NR2*D2;
356
357
    // the number of L_r(.,.) and min_k L_r(.,.) lines in the buffer:
358
    // for 8-way dynamic programming we need the current row and
359
    // the previous row, i.e. 2 rows in total
360
    const int NLR = 2;
361
    const int LrBorder = NLR - 1;
362
363
    // for each possible stereo match (img1(x,y) <=> img2(x-d,y))
364
    // we keep pixel difference cost (C) and the summary cost over NR directions (S).
365
    // we also keep all the partial costs for the previous line L_r(x,d) and also min_k L_r(x, k)
366
    size_t costBufSize = width1*D;
367
    size_t CSBufSize = costBufSize*(params.fullDP ? height : 1);
368
    size_t minLrSize = (width1 + LrBorder*2)*NR2, LrSize = minLrSize*D2;
369
    int hsumBufNRows = SH2*2 + 2;
370
    size_t totalBufSize = (LrSize + minLrSize)*NLR*sizeof(CostType) + // minLr[] and Lr[]
371
    costBufSize*(hsumBufNRows + 1)*sizeof(CostType) + // hsumBuf, pixdiff
372
    CSBufSize*2*sizeof(CostType) + // C, S
373
    width*16*img1.channels()*sizeof(PixType) + // temp buffer for computing per-pixel cost
374
    width*(sizeof(CostType) + sizeof(DispType)) + 1024; // disp2cost + disp2
375
376
    /*
377
    if( !buffer.data || !buffer.isContinuous() ||
378
        buffer.cols*buffer.rows*buffer.elemSize() < totalBufSize )
379
        buffer.create(1, (int)totalBufSize, CV_8U);
380
    // summary cost over different (nDirs) directions
381
    CostType* Cbuf = (CostType*)alignPtr(buffer.data, ALIGN);
382
    CostType* Sbuf = Cbuf + CSBufSize;
383
    CostType* hsumBuf = Sbuf + CSBufSize;
384
    CostType* pixDiff = hsumBuf + costBufSize*hsumBufNRows;
385
386
    CostType* disp2cost = pixDiff + costBufSize + (LrSize + minLrSize)*NLR;
387
    DispType* disp2ptr = (DispType*)(disp2cost + width);
388
    PixType* tempBuf = (PixType*)(disp2ptr + width);
389
*/
390
    CostType* Cbuf     = (CostType*)fastMalloc(CSBufSize*sizeof(CostType));
391
    CostType* Sbuf     = (CostType*)fastMalloc(CSBufSize*sizeof(CostType));
392
    CostType* hsumBuf  = (CostType*)fastMalloc(costBufSize*(hsumBufNRows+1)*sizeof(CostType));
393
    CostType* pixDiff  = (CostType*)fastMalloc((costBufSize + (LrSize + minLrSize)*NLR)*sizeof(CostType));
394
395
    CostType* disp2cost = (CostType*)fastMalloc(width*sizeof(CostType));
396
    DispType* disp2ptr  = (DispType*)fastMalloc(width*sizeof(DispType));
397
398
    PixType*  tempBuf   = (PixType*)fastMalloc(1024+width*16*img1.channels()*sizeof(PixType));
399
400
    CostType* Cbufc=Cbuf, *Sbufc=Sbuf, *hsumBufc=hsumBuf, *pixDiffc = pixDiff, *disp2costc=disp2cost;
401
    DispType* disp2ptrc=disp2ptr; PixType* tempBufc = tempBuf;
402
403
404
    // add P2 to every C(x,y). it saves a few operations in the inner loops
405
    for( k = 0; k < width1*D; k++ )
406
        Cbuf[k] = (CostType)P2;
407
408
    for( int pass = 1; pass <= npasses; pass++ )
409
    {
410
        int x1, y1, x2, y2, dx, dy;
411
412
        if( pass == 1 )
413
        {
414
            y1 = 0; y2 = height; dy = 1;
415
            x1 = 0; x2 = width1; dx = 1;
416
        }
417
        else
418
        {
419
            y1 = height-1; y2 = -1; dy = -1;
420
            x1 = width1-1; x2 = -1; dx = -1;
421
422
        }
423
424
        CostType *Lr[NLR]={0}, *minLr[NLR]={0};
425
426
        for( k = 0; k < NLR; k++ )
427
        {
428
            // shift Lr[k] and minLr[k] pointers, because we allocated them with the borders,
429
            // and will occasionally use negative indices with the arrays
430
            // we need to shift Lr[k] pointers by 1, to give the space for d=-1.
431
            // however, then the alignment will be imperfect, i.e. bad for SSE,
432
            // thus we shift the pointers by 8 (8*sizeof(short) == 16 - ideal alignment)
433
            Lr[k] = pixDiff + costBufSize + LrSize*k + NRD2*LrBorder + 8;
434
            memset( Lr[k] - LrBorder*NRD2 - 8, 0, LrSize*sizeof(CostType) );
435
            minLr[k] = pixDiff + costBufSize + LrSize*NLR + minLrSize*k + NR2*2;
436
            memset( minLr[k] - LrBorder*NR2, 0, minLrSize*sizeof(CostType) );
437
        }
438
_CrtCheckMemory;
439
        for( int y = y1; y != y2; y += dy )
440
        {
441
            int x, d;
442
            DispType* disp1ptr = disp1.ptr<DispType>(y);
443
            CostType* C = Cbuf + (!params.fullDP ? 0 : y*costBufSize);
444
            CostType* S = Sbuf + (!params.fullDP ? 0 : y*costBufSize);
445
446
            if( pass == 1 ) // compute C on the first pass, and reuse it on the second pass, if any.
447
            {
448
                int dy1 = y == 0 ? 0 : y + SH2, dy2 = y == 0 ? SH2 : dy1;
449
450
                for( k = dy1; k <= dy2; k++ )
451
                {
452
                    CostType* hsumAdd = hsumBuf + (min(k, height-1) % hsumBufNRows)*costBufSize;
453
454
                    if( k < height )
455
                    {
456
                        calcPixelCostBT( img1, img2, k, minD, maxD, pixDiff, tempBuf, clipTab, TAB_OFS, ftzero );
457
458
                        memset(hsumAdd, 0, D*sizeof(CostType));
459
                        for( x = 0; x <= SW2*D; x += D )
460
                        {
461
                            int scale = x == 0 ? SW2 + 1 : 1;
462
                            for( d = 0; d < D; d++ )
463
                                hsumAdd[d] = (CostType)(hsumAdd[d] + pixDiff[x + d]*scale);
464
                        }
465
466
                        if( y > 0 )
467
                        {
468
                            const CostType* hsumSub = hsumBuf + (max(y - SH2 - 1, 0) % hsumBufNRows)*costBufSize;
469
                            const CostType* Cprev = !params.fullDP || y == 0 ? C : C - costBufSize;
470
471
                            for( x = D; x < width1*D; x += D )
472
                            {
473
                                const CostType* pixAdd = pixDiff + min(x + SW2*D, (width1-1)*D);
474
                                const CostType* pixSub = pixDiff + max(x - (SW2+1)*D, 0);
475
476
                            #if CV_SSE2
477
                                if( useSIMD )
478
                                {
479
                                    for( d = 0; d < D; d += 8 )
480
                                    {
481
                                        __m128i hv = _mm_load_si128((const __m128i*)(hsumAdd + x - D + d));
482
                                        __m128i Cx = _mm_load_si128((__m128i*)(Cprev + x + d));
483
                                        hv = _mm_adds_epi16(_mm_subs_epi16(hv,
484
                                                                           _mm_load_si128((const __m128i*)(pixSub + d))),
485
                                                            _mm_load_si128((const __m128i*)(pixAdd + d)));
486
                                        Cx = _mm_adds_epi16(_mm_subs_epi16(Cx,
487
                                                                           _mm_load_si128((const __m128i*)(hsumSub + x + d))),
488
                                                            hv);
489
                                        _mm_store_si128((__m128i*)(hsumAdd + x + d), hv);
490
                                        _mm_store_si128((__m128i*)(C + x + d), Cx);
491
                                    }
492
                                }
493
                                else
494
                            #endif
495
                                {
496
                                    for( d = 0; d < D; d++ )
497
                                    {
498
                                        int hv = hsumAdd[x + d] = (CostType)(hsumAdd[x - D + d] + pixAdd[d] - pixSub[d]);
499
                                        C[x + d] = (CostType)(Cprev[x + d] + hv - hsumSub[x + d]);
500
                                    }
501
                                }
502
                            }
503
                        }
504
                        else
505
                        {
506
                            for( x = D; x < width1*D; x += D )
507
                            {
508
                                const CostType* pixAdd = pixDiff + min(x + SW2*D, (width1-1)*D);
509
                                const CostType* pixSub = pixDiff + max(x - (SW2+1)*D, 0);
510
511
                                for( d = 0; d < D; d++ )
512
                                    hsumAdd[x + d] = (CostType)(hsumAdd[x - D + d] + pixAdd[d] - pixSub[d]);
513
                            }
514
                        }
515
                    }
516
517
                    if( y == 0 )
518
                    {
519
                        int scale = k == 0 ? SH2 + 1 : 1;
520
                        for( x = 0; x < width1*D; x++ )
521
                            C[x] = (CostType)(C[x] + hsumAdd[x]*scale);
522
                    }
523
524
                }
525
526
                // also, clear the S buffer
527
                for( k = 0; k < width1*D; k++ )
528
                    S[k] = 0;
529
            }
530
531
            // clear the left and the right borders
532
            memset( Lr[0] - NRD2*LrBorder - 8, 0, NRD2*LrBorder*sizeof(CostType) );
533
            memset( Lr[0] + width1*NRD2 - 8, 0, NRD2*LrBorder*sizeof(CostType) );
534
            memset( minLr[0] - NR2*LrBorder, 0, NR2*LrBorder*sizeof(CostType) );
535
            memset( minLr[0] + width1*NR2, 0, NR2*LrBorder*sizeof(CostType) );
536
537
            /*
538
             [formula 13 in the paper]
539
             compute L_r(p, d) = C(p, d) +
540
             min(L_r(p-r, d),
541
             L_r(p-r, d-1) + P1,
542
             L_r(p-r, d+1) + P1,
543
             min_k L_r(p-r, k) + P2) - min_k L_r(p-r, k)
544
             where p = (x,y), r is one of the directions.
545
             we process all the directions at once:
546
             0: r=(-dx, 0)
547
             1: r=(-1, -dy)
548
             2: r=(0, -dy)
549
             3: r=(1, -dy)
550
             4: r=(-2, -dy)
551
             5: r=(-1, -dy*2)
552
             6: r=(1, -dy*2)
553
             7: r=(2, -dy)
554
             */
555
556
            for( x = x1; x != x2; x += dx )
557
            {
558
                int xm = x*NR2, xd = xm*D2;
559
560
                int delta0 = minLr[0][xm - dx*NR2] + P2, delta1 = minLr[1][xm - NR2 + 1] + P2;
561
                int delta2 = minLr[1][xm + 2] + P2, delta3 = minLr[1][xm + NR2 + 3] + P2;
562
563
                CostType* Lr_p0 = Lr[0] + xd - dx*NRD2;
564
                CostType* Lr_p1 = Lr[1] + xd - NRD2 + D2;
565
                CostType* Lr_p2 = Lr[1] + xd + D2*2;
566
                CostType* Lr_p3 = Lr[1] + xd + NRD2 + D2*3;
567
568
                Lr_p0[-1] = Lr_p0[D] = Lr_p1[-1] = Lr_p1[D] =
569
                Lr_p2[-1] = Lr_p2[D] = Lr_p3[-1] = Lr_p3[D] = MAX_COST;
570
571
                CostType* Lr_p = Lr[0] + xd;
572
                const CostType* Cp = C + x*D;
573
                CostType* Sp = S + x*D;
574
575
            #if CV_SSE2*0
576
                if( useSIMD )
577
                {
578
                    __m128i _P1 = _mm_set1_epi16((short)P1);
579
580
                    __m128i _delta0 = _mm_set1_epi16((short)delta0);
581
                    __m128i _delta1 = _mm_set1_epi16((short)delta1);
582
                    __m128i _delta2 = _mm_set1_epi16((short)delta2);
583
                    __m128i _delta3 = _mm_set1_epi16((short)delta3);
584
                    __m128i _minL0 = _mm_set1_epi16((short)MAX_COST);
585
586
                    for( d = 0; d < D; d += 8 )
587
                    {
588
                        __m128i Cpd = _mm_load_si128((const __m128i*)(Cp + d));
589
                        __m128i L0, L1, L2, L3;
590
591
                        L0 = _mm_load_si128((const __m128i*)(Lr_p0 + d));
592
                        L1 = _mm_load_si128((const __m128i*)(Lr_p1 + d));
593
                        L2 = _mm_load_si128((const __m128i*)(Lr_p2 + d));
594
                        L3 = _mm_load_si128((const __m128i*)(Lr_p3 + d));
595
596
                        L0 = _mm_min_epi16(L0, _mm_adds_epi16(_mm_loadu_si128((const __m128i*)(Lr_p0 + d - 1)), _P1));
597
                        L0 = _mm_min_epi16(L0, _mm_adds_epi16(_mm_loadu_si128((const __m128i*)(Lr_p0 + d + 1)), _P1));
598
599
                        L1 = _mm_min_epi16(L1, _mm_adds_epi16(_mm_loadu_si128((const __m128i*)(Lr_p1 + d - 1)), _P1));
600
                        L1 = _mm_min_epi16(L1, _mm_adds_epi16(_mm_loadu_si128((const __m128i*)(Lr_p1 + d + 1)), _P1));
601
602
                        L2 = _mm_min_epi16(L2, _mm_adds_epi16(_mm_loadu_si128((const __m128i*)(Lr_p2 + d - 1)), _P1));
603
                        L2 = _mm_min_epi16(L2, _mm_adds_epi16(_mm_loadu_si128((const __m128i*)(Lr_p2 + d + 1)), _P1));
604
605
                        L3 = _mm_min_epi16(L3, _mm_adds_epi16(_mm_loadu_si128((const __m128i*)(Lr_p3 + d - 1)), _P1));
606
                        L3 = _mm_min_epi16(L3, _mm_adds_epi16(_mm_loadu_si128((const __m128i*)(Lr_p3 + d + 1)), _P1));
607
608
                        L0 = _mm_min_epi16(L0, _delta0);
609
                        L0 = _mm_adds_epi16(_mm_subs_epi16(L0, _delta0), Cpd);
610
611
                        L1 = _mm_min_epi16(L1, _delta1);
612
                        L1 = _mm_adds_epi16(_mm_subs_epi16(L1, _delta1), Cpd);
613
614
                        L2 = _mm_min_epi16(L2, _delta2);
615
                        L2 = _mm_adds_epi16(_mm_subs_epi16(L2, _delta2), Cpd);
616
617
                        L3 = _mm_min_epi16(L3, _delta3);
618
                        L3 = _mm_adds_epi16(_mm_subs_epi16(L3, _delta3), Cpd);
619
620
                        _mm_store_si128( (__m128i*)(Lr_p + d), L0);
621
                        _mm_store_si128( (__m128i*)(Lr_p + d + D2), L1);
622
                        _mm_store_si128( (__m128i*)(Lr_p + d + D2*2), L2);
623
                        _mm_store_si128( (__m128i*)(Lr_p + d + D2*3), L3);
624
625
                        __m128i t0 = _mm_min_epi16(_mm_unpacklo_epi16(L0, L2), _mm_unpackhi_epi16(L0, L2));
626
                        __m128i t1 = _mm_min_epi16(_mm_unpacklo_epi16(L1, L3), _mm_unpackhi_epi16(L1, L3));
627
                        t0 = _mm_min_epi16(_mm_unpacklo_epi16(t0, t1), _mm_unpackhi_epi16(t0, t1));
628
                        _minL0 = _mm_min_epi16(_minL0, t0);
629
630
                        __m128i Sval = _mm_load_si128((const __m128i*)(Sp + d));
631
632
                        L0 = _mm_adds_epi16(L0, L1);
633
                        L2 = _mm_adds_epi16(L2, L3);
634
                        Sval = _mm_adds_epi16(Sval, L0);
635
                        Sval = _mm_adds_epi16(Sval, L2);
636
637
                        _mm_store_si128((__m128i*)(Sp + d), Sval);
638
                    }
639
640
                    _minL0 = _mm_min_epi16(_minL0, _mm_srli_si128(_minL0, 8));
641
                    _mm_storel_epi64((__m128i*)&minLr[0][xm], _minL0);
642
                }
643
                else
644
            #endif
645
                {
646
                    int minL0 = MAX_COST, minL1 = MAX_COST, minL2 = MAX_COST, minL3 = MAX_COST;
647
648
                    int minpos = minLr0Pos, P12=P1+P2;
649
                    for( d = 0; d < D; d++ )
650
                    {
651
                        int Cpd = Cp[d], L0, L1, L2, L3;
652
653
                            if (xd > 0)
654
                                if (d < minpos)
655
                                        L0 = min((int)Lr_p0[d+1] + P12, Cpd + min((int)Lr_p0[d], min(Lr_p0[d-1] + P1, delta0))) - delta0;
656
                                else
657
                                        L0 = min((int)Lr_p0[d+1] + P12, Cpd + min((int)Lr_p0[d], Lr_p0[d-1] + P1)) - delta0;
658
                        else
659
                                if (d < minpos)
660
                                        L0 = min((int)Lr_p0[d-1] + P12, Cpd + min((int)Lr_p0[d], Lr_p0[d+1] + P1)) - delta0;
661
                                else
662
                                        L0 = min((int)Lr_p0[d-1] + P12, Cpd + min((int)Lr_p0[d], min(Lr_p0[d+1] + P1, delta0))) - delta0;
663
664
                        L1 = Cpd + min((int)Lr_p1[d], min(Lr_p1[d-1] + P1, min(Lr_p1[d+1] + P1, delta1))) - delta1;
665
                        L2 = Cpd + min((int)Lr_p2[d], min(Lr_p2[d-1] + P1, min(Lr_p2[d+1] + P1, delta2))) - delta2;
666
                        L3 = Cpd + min((int)Lr_p3[d], min(Lr_p3[d-1] + P1, min(Lr_p3[d+1] + P1, delta3))) - delta3;
667
668
                        Lr_p[d] = (CostType)L0;
669
                        if (L0 < minL0) {
670
                            minL0 = L0;
671
                            minLr0Pos = d;
672
                        }
673
                        
674
                        Lr_p[d + D2] = (CostType)L1;
675
                        minL1 = min(minL1, L1);
676
677
                        Lr_p[d + D2*2] = (CostType)L2;
678
                        minL2 = min(minL2, L2);
679
680
                        Lr_p[d + D2*3] = (CostType)L3;
681
                        minL3 = min(minL3, L3);
682
683
                        Sp[d] = saturate_cast<CostType>(Sp[d] + L0 + L1 + L2 + L3);
684
                    }
685
                    minLr[0][xm] = (CostType)minL0;
686
                    minLr[0][xm+1] = (CostType)minL1;
687
                    minLr[0][xm+2] = (CostType)minL2;
688
                    minLr[0][xm+3] = (CostType)minL3;
689
                }
690
            }
691
692
            if( pass == npasses )
693
            {
694
                for( x = 0; x < width; x++ )
695
                {
696
                    disp1ptr[x] = disp2ptr[x] = (DispType)INVALID_DISP_SCALED;
697
                    disp2cost[x] = MAX_COST;
698
                }
699
700
                for( x = width1 - 1; x >= 0; x-- )
701
                {
702
                    CostType* Sp = S + x*D;
703
                    int minS = MAX_COST, bestDisp = -1;
704
705
                    if( npasses == 1 )
706
                    {
707
                        int xm = x*NR2, xd = xm*D2;
708
709
                        int minL0 = MAX_COST;
710
                        int delta0 = minLr[0][xm + NR2] + P2;
711
                        CostType* Lr_p0 = Lr[0] + xd + NRD2;
712
                        Lr_p0[-1] = Lr_p0[D] = MAX_COST;
713
                        CostType* Lr_p = Lr[0] + xd;
714
715
                        const CostType* Cp = C + x*D;
716
717
                    #if CV_SSE2
718
                        if( useSIMD )
719
                        {
720
                            __m128i _P1 = _mm_set1_epi16((short)P1);
721
                            __m128i _delta0 = _mm_set1_epi16((short)delta0);
722
723
                            __m128i _minL0 = _mm_set1_epi16((short)minL0);
724
                            __m128i _minS = _mm_set1_epi16(MAX_COST), _bestDisp = _mm_set1_epi16(-1);
725
                            __m128i _d8 = _mm_setr_epi16(0, 1, 2, 3, 4, 5, 6, 7), _8 = _mm_set1_epi16(8);
726
727
                            for( d = 0; d < D; d += 8 )
728
                            {
729
                                __m128i Cpd = _mm_load_si128((const __m128i*)(Cp + d)), L0;
730
731
                                L0 = _mm_load_si128((const __m128i*)(Lr_p0 + d));
732
                                L0 = _mm_min_epi16(L0, _mm_adds_epi16(_mm_loadu_si128((const __m128i*)(Lr_p0 + d - 1)), _P1));
733
                                L0 = _mm_min_epi16(L0, _mm_adds_epi16(_mm_loadu_si128((const __m128i*)(Lr_p0 + d + 1)), _P1));
734
                                L0 = _mm_min_epi16(L0, _delta0);
735
                                L0 = _mm_adds_epi16(_mm_subs_epi16(L0, _delta0), Cpd);
736
737
                                _mm_store_si128((__m128i*)(Lr_p + d), L0);
738
                                _minL0 = _mm_min_epi16(_minL0, L0);
739
                                L0 = _mm_adds_epi16(L0, *(__m128i*)(Sp + d));
740
                                _mm_store_si128((__m128i*)(Sp + d), L0);
741
742
                                __m128i mask = _mm_cmpgt_epi16(_minS, L0);
743
                                _minS = _mm_min_epi16(_minS, L0);
744
                                _bestDisp = _mm_xor_si128(_bestDisp, _mm_and_si128(_mm_xor_si128(_bestDisp,_d8), mask));
745
                                _d8 = _mm_adds_epi16(_d8, _8);
746
                            }
747
748
                            short CV_DECL_ALIGNED(16) bestDispBuf[8];
749
                            _mm_store_si128((__m128i*)bestDispBuf, _bestDisp);
750
751
                            _minL0 = _mm_min_epi16(_minL0, _mm_srli_si128(_minL0, 8));
752
                            _minL0 = _mm_min_epi16(_minL0, _mm_srli_si128(_minL0, 4));
753
                            _minL0 = _mm_min_epi16(_minL0, _mm_srli_si128(_minL0, 2));
754
755
                            __m128i qS = _mm_min_epi16(_minS, _mm_srli_si128(_minS, 8));
756
                            qS = _mm_min_epi16(qS, _mm_srli_si128(qS, 4));
757
                            qS = _mm_min_epi16(qS, _mm_srli_si128(qS, 2));
758
759
                            minLr[0][xm] = (CostType)_mm_cvtsi128_si32(_minL0);
760
                            minS = (CostType)_mm_cvtsi128_si32(qS);
761
762
                            qS = _mm_shuffle_epi32(_mm_unpacklo_epi16(qS, qS), 0);
763
                            qS = _mm_cmpeq_epi16(_minS, qS);
764
                            int idx = _mm_movemask_epi8(_mm_packs_epi16(qS, qS)) & 255;
765
766
                            bestDisp = bestDispBuf[LSBTab[idx]];
767
                        }
768
                        else
769
                    #endif
770
                        {
771
                            for( d = 0; d < D; d++ )
772
                            {
773
                                int L0 = Cp[d] + min((int)Lr_p0[d], min(Lr_p0[d-1] + P1, min(Lr_p0[d+1] + P1, delta0))) - delta0;
774
775
                                Lr_p[d] = (CostType)L0;
776
                                minL0 = min(minL0, L0);
777
778
                                int Sval = Sp[d] = saturate_cast<CostType>(Sp[d] + L0);
779
                                if( Sval < minS )
780
                                {
781
                                    minS = Sval;
782
                                    bestDisp = d;
783
                                }
784
                            }
785
                            minLr[0][xm] = (CostType)minL0;
786
                        }
787
                    }
788
                    else
789
                    {
790
                        for( d = 0; d < D; d++ )
791
                        {
792
                            int Sval = Sp[d];
793
                            if( Sval < minS )
794
                            {
795
                                minS = Sval;
796
                                bestDisp = d;
797
                            }
798
                        }
799
                    }
800
801
                    for( d = 0; d < D; d++ )
802
                    {
803
                        if( Sp[d]*(100 - uniquenessRatio) < minS*100 && std::abs(bestDisp - d) > 1 )
804
                            break;
805
                    }
806
                    if( d < D )
807
                        continue;
808
                    d = bestDisp;
809
                    int _x2 = x + minX1 - d - minD;
810
                    if( disp2cost[_x2] > minS )
811
                    {
812
                        disp2cost[_x2] = (CostType)minS;
813
                        disp2ptr[_x2] = (DispType)(d + minD);
814
                    }
815
816
                    if( 0 < d && d < D-1 )
817
                    {
818
                        // do subpixel quadratic interpolation:
819
                        //   fit parabola into (x1=d-1, y1=Sp[d-1]), (x2=d, y2=Sp[d]), (x3=d+1, y3=Sp[d+1])
820
                        //   then find minimum of the parabola.
821
                        int denom2 = max(Sp[d-1] + Sp[d+1] - 2*Sp[d], 1);
822
                        d = d*DISP_SCALE + ((Sp[d-1] - Sp[d+1])*DISP_SCALE + denom2)/(denom2*2);
823
                    }
824
                    else
825
                        d *= DISP_SCALE;
826
                    disp1ptr[x + minX1] = (DispType)(d + minD*DISP_SCALE);
827
                }
828
829
                for( x = minX1; x < maxX1; x++ )
830
                {
831
                    // we round the computed disparity both towards -inf and +inf and check
832
                    // if either of the corresponding disparities in disp2 is consistent.
833
                    // This is to give the computed disparity a chance to look valid if it is.
834
                    int d1 = disp1ptr[x];
835
                    if( d1 == INVALID_DISP_SCALED )
836
                        continue;
837
                    int _d = d1 >> DISP_SHIFT;
838
                    int d_ = (d1 + DISP_SCALE-1) >> DISP_SHIFT;
839
                    int _x = x - _d, x_ = x - d_;
840
                    if( 0 <= _x && _x < width && disp2ptr[_x] >= minD && std::abs(disp2ptr[_x] - _d) > disp12MaxDiff &&
841
                       0 <= x_ && x_ < width && disp2ptr[x_] >= minD && std::abs(disp2ptr[x_] - d_) > disp12MaxDiff )
842
                        disp1ptr[x] = (DispType)INVALID_DISP_SCALED;
843
                }
844
            }
845
846
            // now shift the cyclic buffers
847
            std::swap( Lr[0], Lr[1] );
848
            std::swap( minLr[0], minLr[1] );
849
        }
850
    }
851
    fastFree(tempBufc);
852
    fastFree(disp2ptrc);
853
    fastFree(disp2costc);
854
    fastFree(pixDiffc);
855
    fastFree(hsumBufc);
856
    fastFree(Sbufc);
857
    fastFree(Cbufc);
858
}
859
860
typedef cv::Point_<short> Point2s;
861
862
void StereoSGBM::operator ()( InputArray _left, InputArray _right,
863
                             OutputArray _disp )
864
{
865
    Mat left = _left.getMat(), right = _right.getMat();
866
    CV_Assert( left.size() == right.size() && left.type() == right.type() &&
867
              left.depth() == DataType<PixType>::depth );
868
869
    _disp.create( left.size(), CV_16S );
870
    Mat disp = _disp.getMat();
871
872
    computeDisparitySGBM( left, right, disp, *this, buffer );
873
    medianBlur(disp, disp, 3);
874
875
    if( speckleWindowSize > 0 )
876
        filterSpeckles(disp, (minDisparity - 1)*DISP_SCALE, speckleWindowSize, DISP_SCALE*speckleRange, buffer);
877
}
878
879
880
Rect getValidDisparityROI( Rect roi1, Rect roi2,
881
                          int minDisparity,
882
                          int numberOfDisparities,
883
                          int SADWindowSize )
884
{
885
    int SW2 = SADWindowSize/2;
886
    int minD = minDisparity, maxD = minDisparity + numberOfDisparities - 1;
887
888
    int xmin = max(roi1.x, roi2.x + maxD) + SW2;
889
    int xmax = min(roi1.x + roi1.width, roi2.x + roi2.width - minD) - SW2;
890
    int ymin = max(roi1.y, roi2.y) + SW2;
891
    int ymax = min(roi1.y + roi1.height, roi2.y + roi2.height) - SW2;
892
893
    Rect r(xmin, ymin, xmax - xmin, ymax - ymin);
894
895
    return r.width > 0 && r.height > 0 ? r : Rect();
896
}
897
898
}
899
900
void cv::filterSpeckles( InputOutputArray _img, double _newval, int maxSpeckleSize,
901
                         double _maxDiff, InputOutputArray __buf )
902
{
903
    Mat img = _img.getMat();
904
    Mat temp, &_buf = __buf.needed() ? __buf.getMatRef() : temp;
905
    CV_Assert( img.type() == CV_16SC1 );
906
907
    int newVal = cvRound(_newval);
908
    int maxDiff = cvRound(_maxDiff);
909
    int width = img.cols, height = img.rows, npixels = width*height;
910
    size_t bufSize = npixels*(int)(sizeof(Point2s) + sizeof(int) + sizeof(uchar));
911
    if( !_buf.isContinuous() || !_buf.data || _buf.cols*_buf.rows*_buf.elemSize() < bufSize )
912
        _buf.create(1, (int)bufSize, CV_8U);
913
914
    uchar* buf = _buf.data;
915
    int i, j, dstep = (int)(img.step/sizeof(short));
916
    int* labels = (int*)buf;
917
    buf += npixels*sizeof(labels[0]);
918
    Point2s* wbuf = (Point2s*)buf;
919
    buf += npixels*sizeof(wbuf[0]);
920
    uchar* rtype = (uchar*)buf;
921
    int curlabel = 0;
922
923
    // clear out label assignments
924
    memset(labels, 0, npixels*sizeof(labels[0]));
925
926
    for( i = 0; i < height; i++ )
927
    {
928
        short* ds = img.ptr<short>(i);
929
        int* ls = labels + width*i;
930
931
        for( j = 0; j < width; j++ )
932
        {
933
            if( ds[j] != newVal )   // not a bad disparity
934
            {
935
                if( ls[j] )     // has a label, check for bad label
936
                {
937
                    if( rtype[ls[j]] ) // small region, zero out disparity
938
                        ds[j] = (short)newVal;
939
                }
940
                // no label, assign and propagate
941
                else
942
                {
943
                    Point2s* ws = wbuf; // initialize wavefront
944
                    Point2s p((short)j, (short)i);  // current pixel
945
                    curlabel++; // next label
946
                    int count = 0;  // current region size
947
                    ls[j] = curlabel;
948
949
                    // wavefront propagation
950
                    while( ws >= wbuf ) // wavefront not empty
951
                    {
952
                        count++;
953
                        // put neighbors onto wavefront
954
                        short* dpp = &img.at<short>(p.y, p.x);
955
                        short dp = *dpp;
956
                        int* lpp = labels + width*p.y + p.x;
957
958
                        if( p.x < width-1 && !lpp[+1] && dpp[+1] != newVal && std::abs(dp - dpp[+1]) <= maxDiff )
959
                        {
960
                            lpp[+1] = curlabel;
961
                            *ws++ = Point2s(p.x+1, p.y);
962
                        }
963
964
                        if( p.x > 0 && !lpp[-1] && dpp[-1] != newVal && std::abs(dp - dpp[-1]) <= maxDiff )
965
                        {
966
                            lpp[-1] = curlabel;
967
                            *ws++ = Point2s(p.x-1, p.y);
968
                        }
969
970
                        if( p.y < height-1 && !lpp[+width] && dpp[+dstep] != newVal && std::abs(dp - dpp[+dstep]) <= maxDiff )
971
                        {
972
                            lpp[+width] = curlabel;
973
                            *ws++ = Point2s(p.x, p.y+1);
974
                        }
975
976
                        if( p.y > 0 && !lpp[-width] && dpp[-dstep] != newVal && std::abs(dp - dpp[-dstep]) <= maxDiff )
977
                        {
978
                            lpp[-width] = curlabel;
979
                            *ws++ = Point2s(p.x, p.y-1);
980
                        }
981
982
                        // pop most recent and propagate
983
                        // NB: could try least recent, maybe better convergence
984
                        p = *--ws;
985
                    }
986
987
                    // assign label type
988
                    if( count <= maxSpeckleSize )   // speckle region
989
                    {
990
                        rtype[ls[j]] = 1;   // small region label
991
                        ds[j] = (short)newVal;
992
                    }
993
                    else
994
                        rtype[ls[j]] = 0;   // large region label
995
                }
996
            }
997
        }
998
    }
999
}
1000
1001
void cv::validateDisparity( InputOutputArray _disp, InputArray _cost, int minDisparity,
1002
                            int numberOfDisparities, int disp12MaxDiff )
1003
{
1004
    Mat disp = _disp.getMat(), cost = _cost.getMat();
1005
    int cols = disp.cols, rows = disp.rows;
1006
    int minD = minDisparity, maxD = minDisparity + numberOfDisparities;
1007
    int x, minX1 = max(maxD, 0), maxX1 = cols + min(minD, 0);
1008
    AutoBuffer<int> _disp2buf(cols*2);
1009
    int* disp2buf = _disp2buf;
1010
    int* disp2cost = disp2buf + cols;
1011
    const int DISP_SHIFT = 4, DISP_SCALE = 1 << DISP_SHIFT;
1012
    int INVALID_DISP = minD - 1, INVALID_DISP_SCALED = INVALID_DISP*DISP_SCALE;
1013
    int costType = cost.type();
1014
1015
    disp12MaxDiff *= DISP_SCALE;
1016
1017
    CV_Assert( numberOfDisparities > 0 && disp.type() == CV_16S &&
1018
              (costType == CV_16S || costType == CV_32S) &&
1019
              disp.size() == cost.size() );
1020
1021
    for( int y = 0; y < rows; y++ )
1022
    {
1023
        short* dptr = disp.ptr<short>(y);
1024
1025
        for( x = 0; x < cols; x++ )
1026
        {
1027
            disp2buf[x] = INVALID_DISP_SCALED;
1028
            disp2cost[x] = INT_MAX;
1029
        }
1030
1031
        if( costType == CV_16S )
1032
        {
1033
            const short* cptr = cost.ptr<short>(y);
1034
1035
            for( x = minX1; x < maxX1; x++ )
1036
            {
1037
                int d = dptr[x], c = cptr[x];
1038
                int x2 = x - ((d + DISP_SCALE/2) >> DISP_SHIFT);
1039
1040
                if( disp2cost[x2] > c )
1041
                {
1042
                    disp2cost[x2] = c;
1043
                    disp2buf[x2] = d;
1044
                }
1045
            }
1046
        }
1047
        else
1048
        {
1049
            const int* cptr = cost.ptr<int>(y);
1050
1051
            for( x = minX1; x < maxX1; x++ )
1052
            {
1053
                int d = dptr[x], c = cptr[x];
1054
                int x2 = x - ((d + DISP_SCALE/2) >> DISP_SHIFT);
1055
1056
                if( disp2cost[x2] < c )
1057
                {
1058
                    disp2cost[x2] = c;
1059
                    disp2buf[x2] = d;
1060
                }
1061
            }
1062
        }
1063
1064
        for( x = minX1; x < maxX1; x++ )
1065
        {
1066
            // we round the computed disparity both towards -inf and +inf and check
1067
            // if either of the corresponding disparities in disp2 is consistent.
1068
            // This is to give the computed disparity a chance to look valid if it is.
1069
            int d = dptr[x];
1070
            if( d == INVALID_DISP_SCALED )
1071
                continue;
1072
            int d0 = d >> DISP_SHIFT;
1073
            int d1 = (d + DISP_SCALE-1) >> DISP_SHIFT;
1074
            int x0 = x - d0, x1 = x - d1;
1075
            if( (0 <= x0 && x0 < cols && disp2buf[x0] > INVALID_DISP_SCALED && std::abs(disp2buf[x0] - d) > disp12MaxDiff) &&
1076
                (0 <= x1 && x1 < cols && disp2buf[x1] > INVALID_DISP_SCALED && std::abs(disp2buf[x1] - d) > disp12MaxDiff) )
1077
                dptr[x] = (short)INVALID_DISP_SCALED;
1078
        }
1079
    }
1080
}
1081
1082
CvRect cvGetValidDisparityROI( CvRect roi1, CvRect roi2, int minDisparity,
1083
                               int numberOfDisparities, int SADWindowSize )
1084
{
1085
    return (CvRect)cv::getValidDisparityROI( roi1, roi2, minDisparity,
1086
                                             numberOfDisparities, SADWindowSize );
1087
}
1088
1089
void cvValidateDisparity( CvArr* _disp, const CvArr* _cost, int minDisparity,
1090
                          int numberOfDisparities, int disp12MaxDiff )
1091
{
1092
    cv::Mat disp = cv::cvarrToMat(_disp), cost = cv::cvarrToMat(_cost);
1093
    cv::validateDisparity( disp, cost, minDisparity, numberOfDisparities, disp12MaxDiff );
1094
}