1 | |
2 | //
|
3 | // IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
4 | //
|
5 | // By downloading, copying, installing or using the software you agree to this license.
|
6 | // If you do not agree to this license, do not download, install,
|
7 | // copy or use the software.
|
8 | //
|
9 | //
|
10 | // License Agreement
|
11 | // For Open Source Computer Vision Library
|
12 | //
|
13 | // Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
|
14 | // Copyright (C) 2009, Willow Garage Inc., all rights reserved.
|
15 | // Third party copyrights are property of their respective owners.
|
16 | //
|
17 | // Redistribution and use in source and binary forms, with or without modification,
|
18 | // are permitted provided that the following conditions are met:
|
19 | //
|
20 | // * Redistribution's of source code must retain the above copyright notice,
|
21 | // this list of conditions and the following disclaimer.
|
22 | //
|
23 | // * Redistribution's in binary form must reproduce the above copyright notice,
|
24 | // this list of conditions and the following disclaimer in the documentation
|
25 | // and/or other materials provided with the distribution.
|
26 | //
|
27 | // * The name of the copyright holders may not be used to endorse or promote products
|
28 | // derived from this software without specific prior written permission.
|
29 | //
|
30 | // This software is provided by the copyright holders and contributors "as is" and
|
31 | // any express or implied warranties, including, but not limited to, the implied
|
32 | // warranties of merchantability and fitness for a particular purpose are disclaimed.
|
33 | // In no event shall the Intel Corporation or contributors be liable for any direct,
|
34 | // indirect, incidental, special, exemplary, or consequential damages
|
35 | // (including, but not limited to, procurement of substitute goods or services;
|
36 | // loss of use, data, or profits; or business interruption) however caused
|
37 | // and on any theory of liability, whether in contract, strict liability,
|
38 | // or tort (including negligence or otherwise) arising in any way out of
|
39 | // the use of this software, even if advised of the possibility of such damage.
|
40 | //
|
41 | //M*/
|
42 |
|
43 | |
44 | This is a variation of
|
45 | "Stereo Processing by Semiglobal Matching and Mutual Information"
|
46 | by Heiko Hirschmuller.
|
47 |
|
48 | We match blocks rather than individual pixels, thus the algorithm is called
|
49 | SGBM (Semi-global block matching)
|
50 | */
|
51 |
|
52 | #include "precomp.hpp"
|
53 | #include <limits.h>
|
54 |
|
55 | namespace cv
|
56 | {
|
57 |
|
58 | typedef uchar PixType;
|
59 | typedef short CostType;
|
60 | typedef short DispType;
|
61 |
|
62 | enum { NR = 16, NR2 = NR/2 };
|
63 |
|
64 | StereoSGBM::StereoSGBM()
|
65 | {
|
66 | minDisparity = numberOfDisparities = 0;
|
67 | SADWindowSize = 0;
|
68 | P1 = P2 = 0;
|
69 | disp12MaxDiff = 0;
|
70 | preFilterCap = 0;
|
71 | uniquenessRatio = 0;
|
72 | speckleWindowSize = 0;
|
73 | speckleRange = 0;
|
74 | fullDP = false;
|
75 | }
|
76 |
|
77 |
|
78 | StereoSGBM::StereoSGBM( int _minDisparity, int _numDisparities, int _SADWindowSize,
|
79 | int _P1, int _P2, int _disp12MaxDiff, int _preFilterCap,
|
80 | int _uniquenessRatio, int _speckleWindowSize, int _speckleRange,
|
81 | bool _fullDP )
|
82 | {
|
83 | minDisparity = _minDisparity;
|
84 | numberOfDisparities = _numDisparities;
|
85 | SADWindowSize = _SADWindowSize;
|
86 | P1 = _P1;
|
87 | P2 = _P2;
|
88 | disp12MaxDiff = _disp12MaxDiff;
|
89 | preFilterCap = _preFilterCap;
|
90 | uniquenessRatio = _uniquenessRatio;
|
91 | speckleWindowSize = _speckleWindowSize;
|
92 | speckleRange = _speckleRange;
|
93 | fullDP = _fullDP;
|
94 | }
|
95 |
|
96 |
|
97 | StereoSGBM::~StereoSGBM()
|
98 | {
|
99 | }
|
100 |
|
101 | |
102 | For each pixel row1[x], max(-maxD, 0) <= minX <= x < maxX <= width - max(0, -minD),
|
103 | and for each disparity minD<=d<maxD the function
|
104 | computes the cost (cost[(x-minX)*(maxD - minD) + (d - minD)]), depending on the difference between
|
105 | row1[x] and row2[x-d]. The subpixel algorithm from
|
106 | "Depth Discontinuities by Pixel-to-Pixel Stereo" by Stan Birchfield and C. Tomasi
|
107 | is used, hence the suffix BT.
|
108 |
|
109 | the temporary buffer should contain width2*2 elements
|
110 | */
|
111 | static void calcPixelCostBT( const Mat& img1, const Mat& img2, int y,
|
112 | int minD, int maxD, CostType* cost,
|
113 | PixType* buf, const PixType* tab,
|
114 | int tabOfs, int )
|
115 | {
|
116 | int x, c, width = img1.cols, cn = img1.channels();
|
117 | int minX1 = max(maxD, 0), maxX1 = width + min(minD, 0);
|
118 | int minX2 = max(minX1 - maxD, 0), maxX2 = min(maxX1 - minD, width);
|
119 | int D = maxD - minD, width1 = maxX1 - minX1, width2 = maxX2 - minX2;
|
120 | const PixType *row1 = img1.ptr<PixType>(y), *row2 = img2.ptr<PixType>(y);
|
121 | PixType *buffer = buf;
|
122 | PixType *prow1 = buffer + width2*2, *prow2 = prow1 + width*cn*2;
|
123 |
|
124 | tab += tabOfs;
|
125 |
|
126 | for( c = 0; c < cn*2; c++ )
|
127 | {
|
128 | prow1[width*c] = prow1[width*c + width-1] =
|
129 | prow2[width*c] = prow2[width*c + width-1] = tab[0];
|
130 | }
|
131 |
|
132 | int n1 = y > 0 ? -(int)img1.step : 0, s1 = y < img1.rows-1 ? (int)img1.step : 0;
|
133 | int n2 = y > 0 ? -(int)img2.step : 0, s2 = y < img2.rows-1 ? (int)img2.step : 0;
|
134 |
|
135 | if( cn == 1 )
|
136 | {
|
137 | for( x = 1; x < width-1; x++ )
|
138 | {
|
139 | prow1[x] = tab[(row1[x+1] - row1[x-1])*2 + row1[x+n1+1] - row1[x+n1-1] + row1[x+s1+1] - row1[x+s1-1]];
|
140 | prow2[width-1-x] = tab[(row2[x+1] - row2[x-1])*2 + row2[x+n2+1] - row2[x+n2-1] + row2[x+s2+1] - row2[x+s2-1]];
|
141 |
|
142 | prow1[x+width] = row1[x];
|
143 | prow2[width-1-x+width] = row2[x];
|
144 | }
|
145 | }
|
146 | else
|
147 | {
|
148 | for( x = 1; x < width-1; x++ )
|
149 | {
|
150 | prow1[x] = tab[(row1[x*3+3] - row1[x*3-3])*2 + row1[x*3+n1+3] - row1[x*3+n1-3] + row1[x*3+s1+3] - row1[x*3+s1-3]];
|
151 | prow1[x+width] = tab[(row1[x*3+4] - row1[x*3-2])*2 + row1[x*3+n1+4] - row1[x*3+n1-2] + row1[x*3+s1+4] - row1[x*3+s1-2]];
|
152 | prow1[x+width*2] = tab[(row1[x*3+5] - row1[x*3-1])*2 + row1[x*3+n1+5] - row1[x*3+n1-1] + row1[x*3+s1+5] - row1[x*3+s1-1]];
|
153 |
|
154 | prow2[width-1-x] = tab[(row2[x*3+3] - row2[x*3-3])*2 + row2[x*3+n2+3] - row2[x*3+n2-3] + row2[x*3+s2+3] - row2[x*3+s2-3]];
|
155 | prow2[width-1-x+width] = tab[(row2[x*3+4] - row2[x*3-2])*2 + row2[x*3+n2+4] - row2[x*3+n2-2] + row2[x*3+s2+4] - row2[x*3+s2-2]];
|
156 | prow2[width-1-x+width*2] = tab[(row2[x*3+5] - row2[x*3-1])*2 + row2[x*3+n2+5] - row2[x*3+n2-1] + row2[x*3+s2+5] - row2[x*3+s2-1]];
|
157 |
|
158 | prow1[x+width*3] = row1[x*3];
|
159 | prow1[x+width*4] = row1[x*3+1];
|
160 | prow1[x+width*5] = row1[x*3+2];
|
161 |
|
162 | prow2[width-1-x+width*3] = row2[x*3];
|
163 | prow2[width-1-x+width*4] = row2[x*3+1];
|
164 | prow2[width-1-x+width*5] = row2[x*3+2];
|
165 | }
|
166 | }
|
167 |
|
168 | memset( cost, 0, width1*D*sizeof(cost[0]) );
|
169 |
|
170 | buffer -= minX2;
|
171 | cost -= minX1*D + minD;
|
172 |
|
173 | #if CV_SSE2
|
174 | volatile bool useSIMD = checkHardwareSupport(CV_CPU_SSE2);
|
175 | #endif
|
176 |
|
177 | #if 1
|
178 | for( c = 0; c < cn*2; c++, prow1 += width, prow2 += width )
|
179 | {
|
180 | int diff_scale = c < cn ? 0 : 2;
|
181 |
|
182 |
|
183 |
|
184 |
|
185 | for( x = minX2; x < maxX2; x++ )
|
186 | {
|
187 | int v = prow2[x];
|
188 | int vl = x > 0 ? (v + prow2[x-1])/2 : v;
|
189 | int vr = x < width-1 ? (v + prow2[x+1])/2 : v;
|
190 | int v0 = min(vl, vr); v0 = min(v0, v);
|
191 | int v1 = max(vl, vr); v1 = max(v1, v);
|
192 | buffer[x] = (PixType)v0;
|
193 | buffer[x + width2] = (PixType)v1;
|
194 | }
|
195 |
|
196 | for( x = minX1; x < maxX1; x++ )
|
197 | {
|
198 | int u = prow1[x];
|
199 | int ul = x > 0 ? (u + prow1[x-1])/2 : u;
|
200 | int ur = x < width-1 ? (u + prow1[x+1])/2 : u;
|
201 | int u0 = min(ul, ur); u0 = min(u0, u);
|
202 | int u1 = max(ul, ur); u1 = max(u1, u);
|
203 |
|
204 | #if CV_SSE2
|
205 | if( useSIMD )
|
206 | {
|
207 | __m128i _u = _mm_set1_epi8((char)u), _u0 = _mm_set1_epi8((char)u0);
|
208 | __m128i _u1 = _mm_set1_epi8((char)u1), z = _mm_setzero_si128();
|
209 | __m128i ds = _mm_cvtsi32_si128(diff_scale);
|
210 |
|
211 | for( int d = minD; d < maxD; d += 16 )
|
212 | {
|
213 | __m128i _v = _mm_loadu_si128((const __m128i*)(prow2 + width-x-1 + d));
|
214 | __m128i _v0 = _mm_loadu_si128((const __m128i*)(buffer + width-x-1 + d));
|
215 | __m128i _v1 = _mm_loadu_si128((const __m128i*)(buffer + width-x-1 + d + width2));
|
216 | __m128i c0 = _mm_max_epu8(_mm_subs_epu8(_u, _v1), _mm_subs_epu8(_v0, _u));
|
217 | __m128i c1 = _mm_max_epu8(_mm_subs_epu8(_v, _u1), _mm_subs_epu8(_u0, _v));
|
218 | __m128i diff = _mm_min_epu8(c0, c1);
|
219 |
|
220 | c0 = _mm_load_si128((__m128i*)(cost + x*D + d));
|
221 | c1 = _mm_load_si128((__m128i*)(cost + x*D + d + 8));
|
222 |
|
223 | _mm_store_si128((__m128i*)(cost + x*D + d), _mm_adds_epi16(c0, _mm_srl_epi16(_mm_unpacklo_epi8(diff,z), ds)));
|
224 | _mm_store_si128((__m128i*)(cost + x*D + d + 8), _mm_adds_epi16(c1, _mm_srl_epi16(_mm_unpackhi_epi8(diff,z), ds)));
|
225 | }
|
226 | }
|
227 | else
|
228 | #endif
|
229 | {
|
230 | for( int d = minD; d < maxD; d++ )
|
231 | {
|
232 | int v = prow2[width-x-1 + d];
|
233 | int v0 = buffer[width-x-1 + d];
|
234 | int v1 = buffer[width-x-1 + d + width2];
|
235 | int c0 = max(0, u - v1); c0 = max(c0, v0 - u);
|
236 | int c1 = max(0, v - u1); c1 = max(c1, u0 - v);
|
237 |
|
238 | cost[x*D + d] = (CostType)(cost[x*D+d] + (min(c0, c1) >> diff_scale));
|
239 | }
|
240 | }
|
241 | }
|
242 | }
|
243 | #else
|
244 | for( c = 0; c < cn*2; c++, prow1 += width, prow2 += width )
|
245 | {
|
246 | for( x = minX1; x < maxX1; x++ )
|
247 | {
|
248 | int u = prow1[x];
|
249 | #if CV_SSE2
|
250 | if( useSIMD )
|
251 | {
|
252 | __m128i _u = _mm_set1_epi8(u), z = _mm_setzero_si128();
|
253 |
|
254 | for( int d = minD; d < maxD; d += 16 )
|
255 | {
|
256 | __m128i _v = _mm_loadu_si128((const __m128i*)(prow2 + width-1-x + d));
|
257 | __m128i diff = _mm_adds_epu8(_mm_subs_epu8(_u,_v), _mm_subs_epu8(_v,_u));
|
258 | __m128i c0 = _mm_load_si128((__m128i*)(cost + x*D + d));
|
259 | __m128i c1 = _mm_load_si128((__m128i*)(cost + x*D + d + 8));
|
260 |
|
261 | _mm_store_si128((__m128i*)(cost + x*D + d), _mm_adds_epi16(c0, _mm_unpacklo_epi8(diff,z)));
|
262 | _mm_store_si128((__m128i*)(cost + x*D + d + 8), _mm_adds_epi16(c1, _mm_unpackhi_epi8(diff,z)));
|
263 | }
|
264 | }
|
265 | else
|
266 | #endif
|
267 | {
|
268 | for( int d = minD; d < maxD; d++ )
|
269 | {
|
270 | int v = prow2[width-1-x + d];
|
271 | cost[x*D + d] = (CostType)(cost[x*D + d] + (CostType)std::abs(u - v));
|
272 | }
|
273 | }
|
274 | }
|
275 | }
|
276 | #endif
|
277 | }
|
278 |
|
279 |
|
280 | |
281 | computes disparity for "roi" in img1 w.r.t. img2 and write it to disp1buf.
|
282 | that is, disp1buf(x, y)=d means that img1(x+roi.x, y+roi.y) ~ img2(x+roi.x-d, y+roi.y).
|
283 | minD <= d < maxD.
|
284 | disp2full is the reverse disparity map, that is:
|
285 | disp2full(x+roi.x,y+roi.y)=d means that img2(x+roi.x, y+roi.y) ~ img1(x+roi.x+d, y+roi.y)
|
286 |
|
287 | note that disp1buf will have the same size as the roi and
|
288 | disp2full will have the same size as img1 (or img2).
|
289 | On exit disp2buf is not the final disparity, it is an intermediate result that becomes
|
290 | final after all the tiles are processed.
|
291 |
|
292 | the disparity in disp1buf is written with sub-pixel accuracy
|
293 | (4 fractional bits, see CvStereoSGBM::DISP_SCALE),
|
294 | using quadratic interpolation, while the disparity in disp2buf
|
295 | is written as is, without interpolation.
|
296 |
|
297 | disp2cost also has the same size as img1 (or img2).
|
298 | It contains the minimum current cost, used to find the best disparity, corresponding to the minimal cost.
|
299 | */
|
300 | static void computeDisparitySGBM( const Mat& img1, const Mat& img2,
|
301 | Mat& disp1, const StereoSGBM& params,
|
302 | Mat& buffer )
|
303 | {
|
304 | #if CV_SSE2
|
305 | static const uchar LSBTab[] =
|
306 | {
|
307 | 0, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
|
308 | 5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
|
309 | 6, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
|
310 | 5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
|
311 | 7, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
|
312 | 5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
|
313 | 6, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
|
314 | 5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0
|
315 | };
|
316 |
|
317 | volatile bool useSIMD = checkHardwareSupport(CV_CPU_SSE2);
|
318 | #endif
|
319 |
|
320 | const int ALIGN = 16;
|
321 | const int DISP_SHIFT = StereoSGBM::DISP_SHIFT;
|
322 | const int DISP_SCALE = StereoSGBM::DISP_SCALE;
|
323 | const CostType MAX_COST = SHRT_MAX;
|
324 |
|
325 | int minD = params.minDisparity, maxD = minD + params.numberOfDisparities;
|
326 | Size SADWindowSize;
|
327 | SADWindowSize.width = SADWindowSize.height = params.SADWindowSize > 0 ? params.SADWindowSize : 5;
|
328 | int ftzero = max(params.preFilterCap, 15) | 1;
|
329 | int uniquenessRatio = params.uniquenessRatio >= 0 ? params.uniquenessRatio : 10;
|
330 | int disp12MaxDiff = params.disp12MaxDiff > 0 ? params.disp12MaxDiff : 1;
|
331 | int P1 = params.P1 > 0 ? params.P1 : 2, P2 = max(params.P2 > 0 ? params.P2 : 5, P1+1);
|
332 | int k, width = disp1.cols, height = disp1.rows;
|
333 | int minX1 = max(maxD, 0), maxX1 = width + min(minD, 0);
|
334 | int D = maxD - minD, width1 = maxX1 - minX1;
|
335 | int INVALID_DISP = minD - 1, INVALID_DISP_SCALED = INVALID_DISP*DISP_SCALE;
|
336 | int SW2 = SADWindowSize.width/2, SH2 = SADWindowSize.height/2;
|
337 | int npasses = params.fullDP ? 2 : 1;
|
338 | const int TAB_OFS = 256*4, TAB_SIZE = 256 + TAB_OFS*2;
|
339 | PixType clipTab[TAB_SIZE];
|
340 | int minLr0Pos = 0;
|
341 |
|
342 | for( k = 0; k < TAB_SIZE; k++ )
|
343 | clipTab[k] = (PixType)(min(max(k - TAB_OFS, -ftzero), ftzero) + ftzero);
|
344 |
|
345 | if( minX1 >= maxX1 )
|
346 | {
|
347 | disp1 = Scalar::all(INVALID_DISP_SCALED);
|
348 | return;
|
349 | }
|
350 |
|
351 | CV_Assert( D % 16 == 0 );
|
352 |
|
353 |
|
354 |
|
355 | int D2 = D+16, NRD2 = NR2*D2;
|
356 |
|
357 |
|
358 |
|
359 |
|
360 | const int NLR = 2;
|
361 | const int LrBorder = NLR - 1;
|
362 |
|
363 |
|
364 |
|
365 |
|
366 | size_t costBufSize = width1*D;
|
367 | size_t CSBufSize = costBufSize*(params.fullDP ? height : 1);
|
368 | size_t minLrSize = (width1 + LrBorder*2)*NR2, LrSize = minLrSize*D2;
|
369 | int hsumBufNRows = SH2*2 + 2;
|
370 | size_t totalBufSize = (LrSize + minLrSize)*NLR*sizeof(CostType) +
|
371 | costBufSize*(hsumBufNRows + 1)*sizeof(CostType) +
|
372 | CSBufSize*2*sizeof(CostType) +
|
373 | width*16*img1.channels()*sizeof(PixType) +
|
374 | width*(sizeof(CostType) + sizeof(DispType)) + 1024;
|
375 |
|
376 | |
377 | if( !buffer.data || !buffer.isContinuous() ||
|
378 | buffer.cols*buffer.rows*buffer.elemSize() < totalBufSize )
|
379 | buffer.create(1, (int)totalBufSize, CV_8U);
|
380 | // summary cost over different (nDirs) directions
|
381 | CostType* Cbuf = (CostType*)alignPtr(buffer.data, ALIGN);
|
382 | CostType* Sbuf = Cbuf + CSBufSize;
|
383 | CostType* hsumBuf = Sbuf + CSBufSize;
|
384 | CostType* pixDiff = hsumBuf + costBufSize*hsumBufNRows;
|
385 |
|
386 | CostType* disp2cost = pixDiff + costBufSize + (LrSize + minLrSize)*NLR;
|
387 | DispType* disp2ptr = (DispType*)(disp2cost + width);
|
388 | PixType* tempBuf = (PixType*)(disp2ptr + width);
|
389 | */
|
390 | CostType* Cbuf = (CostType*)fastMalloc(CSBufSize*sizeof(CostType));
|
391 | CostType* Sbuf = (CostType*)fastMalloc(CSBufSize*sizeof(CostType));
|
392 | CostType* hsumBuf = (CostType*)fastMalloc(costBufSize*(hsumBufNRows+1)*sizeof(CostType));
|
393 | CostType* pixDiff = (CostType*)fastMalloc((costBufSize + (LrSize + minLrSize)*NLR)*sizeof(CostType));
|
394 |
|
395 | CostType* disp2cost = (CostType*)fastMalloc(width*sizeof(CostType));
|
396 | DispType* disp2ptr = (DispType*)fastMalloc(width*sizeof(DispType));
|
397 |
|
398 | PixType* tempBuf = (PixType*)fastMalloc(1024+width*16*img1.channels()*sizeof(PixType));
|
399 |
|
400 | CostType* Cbufc=Cbuf, *Sbufc=Sbuf, *hsumBufc=hsumBuf, *pixDiffc = pixDiff, *disp2costc=disp2cost;
|
401 | DispType* disp2ptrc=disp2ptr; PixType* tempBufc = tempBuf;
|
402 |
|
403 |
|
404 |
|
405 | for( k = 0; k < width1*D; k++ )
|
406 | Cbuf[k] = (CostType)P2;
|
407 |
|
408 | for( int pass = 1; pass <= npasses; pass++ )
|
409 | {
|
410 | int x1, y1, x2, y2, dx, dy;
|
411 |
|
412 | if( pass == 1 )
|
413 | {
|
414 | y1 = 0; y2 = height; dy = 1;
|
415 | x1 = 0; x2 = width1; dx = 1;
|
416 | }
|
417 | else
|
418 | {
|
419 | y1 = height-1; y2 = -1; dy = -1;
|
420 | x1 = width1-1; x2 = -1; dx = -1;
|
421 |
|
422 | }
|
423 |
|
424 | CostType *Lr[NLR]={0}, *minLr[NLR]={0};
|
425 |
|
426 | for( k = 0; k < NLR; k++ )
|
427 | {
|
428 |
|
429 |
|
430 |
|
431 |
|
432 |
|
433 | Lr[k] = pixDiff + costBufSize + LrSize*k + NRD2*LrBorder + 8;
|
434 | memset( Lr[k] - LrBorder*NRD2 - 8, 0, LrSize*sizeof(CostType) );
|
435 | minLr[k] = pixDiff + costBufSize + LrSize*NLR + minLrSize*k + NR2*2;
|
436 | memset( minLr[k] - LrBorder*NR2, 0, minLrSize*sizeof(CostType) );
|
437 | }
|
438 | _CrtCheckMemory;
|
439 | for( int y = y1; y != y2; y += dy )
|
440 | {
|
441 | int x, d;
|
442 | DispType* disp1ptr = disp1.ptr<DispType>(y);
|
443 | CostType* C = Cbuf + (!params.fullDP ? 0 : y*costBufSize);
|
444 | CostType* S = Sbuf + (!params.fullDP ? 0 : y*costBufSize);
|
445 |
|
446 | if( pass == 1 )
|
447 | {
|
448 | int dy1 = y == 0 ? 0 : y + SH2, dy2 = y == 0 ? SH2 : dy1;
|
449 |
|
450 | for( k = dy1; k <= dy2; k++ )
|
451 | {
|
452 | CostType* hsumAdd = hsumBuf + (min(k, height-1) % hsumBufNRows)*costBufSize;
|
453 |
|
454 | if( k < height )
|
455 | {
|
456 | calcPixelCostBT( img1, img2, k, minD, maxD, pixDiff, tempBuf, clipTab, TAB_OFS, ftzero );
|
457 |
|
458 | memset(hsumAdd, 0, D*sizeof(CostType));
|
459 | for( x = 0; x <= SW2*D; x += D )
|
460 | {
|
461 | int scale = x == 0 ? SW2 + 1 : 1;
|
462 | for( d = 0; d < D; d++ )
|
463 | hsumAdd[d] = (CostType)(hsumAdd[d] + pixDiff[x + d]*scale);
|
464 | }
|
465 |
|
466 | if( y > 0 )
|
467 | {
|
468 | const CostType* hsumSub = hsumBuf + (max(y - SH2 - 1, 0) % hsumBufNRows)*costBufSize;
|
469 | const CostType* Cprev = !params.fullDP || y == 0 ? C : C - costBufSize;
|
470 |
|
471 | for( x = D; x < width1*D; x += D )
|
472 | {
|
473 | const CostType* pixAdd = pixDiff + min(x + SW2*D, (width1-1)*D);
|
474 | const CostType* pixSub = pixDiff + max(x - (SW2+1)*D, 0);
|
475 |
|
476 | #if CV_SSE2
|
477 | if( useSIMD )
|
478 | {
|
479 | for( d = 0; d < D; d += 8 )
|
480 | {
|
481 | __m128i hv = _mm_load_si128((const __m128i*)(hsumAdd + x - D + d));
|
482 | __m128i Cx = _mm_load_si128((__m128i*)(Cprev + x + d));
|
483 | hv = _mm_adds_epi16(_mm_subs_epi16(hv,
|
484 | _mm_load_si128((const __m128i*)(pixSub + d))),
|
485 | _mm_load_si128((const __m128i*)(pixAdd + d)));
|
486 | Cx = _mm_adds_epi16(_mm_subs_epi16(Cx,
|
487 | _mm_load_si128((const __m128i*)(hsumSub + x + d))),
|
488 | hv);
|
489 | _mm_store_si128((__m128i*)(hsumAdd + x + d), hv);
|
490 | _mm_store_si128((__m128i*)(C + x + d), Cx);
|
491 | }
|
492 | }
|
493 | else
|
494 | #endif
|
495 | {
|
496 | for( d = 0; d < D; d++ )
|
497 | {
|
498 | int hv = hsumAdd[x + d] = (CostType)(hsumAdd[x - D + d] + pixAdd[d] - pixSub[d]);
|
499 | C[x + d] = (CostType)(Cprev[x + d] + hv - hsumSub[x + d]);
|
500 | }
|
501 | }
|
502 | }
|
503 | }
|
504 | else
|
505 | {
|
506 | for( x = D; x < width1*D; x += D )
|
507 | {
|
508 | const CostType* pixAdd = pixDiff + min(x + SW2*D, (width1-1)*D);
|
509 | const CostType* pixSub = pixDiff + max(x - (SW2+1)*D, 0);
|
510 |
|
511 | for( d = 0; d < D; d++ )
|
512 | hsumAdd[x + d] = (CostType)(hsumAdd[x - D + d] + pixAdd[d] - pixSub[d]);
|
513 | }
|
514 | }
|
515 | }
|
516 |
|
517 | if( y == 0 )
|
518 | {
|
519 | int scale = k == 0 ? SH2 + 1 : 1;
|
520 | for( x = 0; x < width1*D; x++ )
|
521 | C[x] = (CostType)(C[x] + hsumAdd[x]*scale);
|
522 | }
|
523 |
|
524 | }
|
525 |
|
526 |
|
527 | for( k = 0; k < width1*D; k++ )
|
528 | S[k] = 0;
|
529 | }
|
530 |
|
531 |
|
532 | memset( Lr[0] - NRD2*LrBorder - 8, 0, NRD2*LrBorder*sizeof(CostType) );
|
533 | memset( Lr[0] + width1*NRD2 - 8, 0, NRD2*LrBorder*sizeof(CostType) );
|
534 | memset( minLr[0] - NR2*LrBorder, 0, NR2*LrBorder*sizeof(CostType) );
|
535 | memset( minLr[0] + width1*NR2, 0, NR2*LrBorder*sizeof(CostType) );
|
536 |
|
537 | |
538 | [formula 13 in the paper]
|
539 | compute L_r(p, d) = C(p, d) +
|
540 | min(L_r(p-r, d),
|
541 | L_r(p-r, d-1) + P1,
|
542 | L_r(p-r, d+1) + P1,
|
543 | min_k L_r(p-r, k) + P2) - min_k L_r(p-r, k)
|
544 | where p = (x,y), r is one of the directions.
|
545 | we process all the directions at once:
|
546 | 0: r=(-dx, 0)
|
547 | 1: r=(-1, -dy)
|
548 | 2: r=(0, -dy)
|
549 | 3: r=(1, -dy)
|
550 | 4: r=(-2, -dy)
|
551 | 5: r=(-1, -dy*2)
|
552 | 6: r=(1, -dy*2)
|
553 | 7: r=(2, -dy)
|
554 | */
|
555 |
|
556 | for( x = x1; x != x2; x += dx )
|
557 | {
|
558 | int xm = x*NR2, xd = xm*D2;
|
559 |
|
560 | int delta0 = minLr[0][xm - dx*NR2] + P2, delta1 = minLr[1][xm - NR2 + 1] + P2;
|
561 | int delta2 = minLr[1][xm + 2] + P2, delta3 = minLr[1][xm + NR2 + 3] + P2;
|
562 |
|
563 | CostType* Lr_p0 = Lr[0] + xd - dx*NRD2;
|
564 | CostType* Lr_p1 = Lr[1] + xd - NRD2 + D2;
|
565 | CostType* Lr_p2 = Lr[1] + xd + D2*2;
|
566 | CostType* Lr_p3 = Lr[1] + xd + NRD2 + D2*3;
|
567 |
|
568 | Lr_p0[-1] = Lr_p0[D] = Lr_p1[-1] = Lr_p1[D] =
|
569 | Lr_p2[-1] = Lr_p2[D] = Lr_p3[-1] = Lr_p3[D] = MAX_COST;
|
570 |
|
571 | CostType* Lr_p = Lr[0] + xd;
|
572 | const CostType* Cp = C + x*D;
|
573 | CostType* Sp = S + x*D;
|
574 |
|
575 | #if CV_SSE2*0
|
576 | if( useSIMD )
|
577 | {
|
578 | __m128i _P1 = _mm_set1_epi16((short)P1);
|
579 |
|
580 | __m128i _delta0 = _mm_set1_epi16((short)delta0);
|
581 | __m128i _delta1 = _mm_set1_epi16((short)delta1);
|
582 | __m128i _delta2 = _mm_set1_epi16((short)delta2);
|
583 | __m128i _delta3 = _mm_set1_epi16((short)delta3);
|
584 | __m128i _minL0 = _mm_set1_epi16((short)MAX_COST);
|
585 |
|
586 | for( d = 0; d < D; d += 8 )
|
587 | {
|
588 | __m128i Cpd = _mm_load_si128((const __m128i*)(Cp + d));
|
589 | __m128i L0, L1, L2, L3;
|
590 |
|
591 | L0 = _mm_load_si128((const __m128i*)(Lr_p0 + d));
|
592 | L1 = _mm_load_si128((const __m128i*)(Lr_p1 + d));
|
593 | L2 = _mm_load_si128((const __m128i*)(Lr_p2 + d));
|
594 | L3 = _mm_load_si128((const __m128i*)(Lr_p3 + d));
|
595 |
|
596 | L0 = _mm_min_epi16(L0, _mm_adds_epi16(_mm_loadu_si128((const __m128i*)(Lr_p0 + d - 1)), _P1));
|
597 | L0 = _mm_min_epi16(L0, _mm_adds_epi16(_mm_loadu_si128((const __m128i*)(Lr_p0 + d + 1)), _P1));
|
598 |
|
599 | L1 = _mm_min_epi16(L1, _mm_adds_epi16(_mm_loadu_si128((const __m128i*)(Lr_p1 + d - 1)), _P1));
|
600 | L1 = _mm_min_epi16(L1, _mm_adds_epi16(_mm_loadu_si128((const __m128i*)(Lr_p1 + d + 1)), _P1));
|
601 |
|
602 | L2 = _mm_min_epi16(L2, _mm_adds_epi16(_mm_loadu_si128((const __m128i*)(Lr_p2 + d - 1)), _P1));
|
603 | L2 = _mm_min_epi16(L2, _mm_adds_epi16(_mm_loadu_si128((const __m128i*)(Lr_p2 + d + 1)), _P1));
|
604 |
|
605 | L3 = _mm_min_epi16(L3, _mm_adds_epi16(_mm_loadu_si128((const __m128i*)(Lr_p3 + d - 1)), _P1));
|
606 | L3 = _mm_min_epi16(L3, _mm_adds_epi16(_mm_loadu_si128((const __m128i*)(Lr_p3 + d + 1)), _P1));
|
607 |
|
608 | L0 = _mm_min_epi16(L0, _delta0);
|
609 | L0 = _mm_adds_epi16(_mm_subs_epi16(L0, _delta0), Cpd);
|
610 |
|
611 | L1 = _mm_min_epi16(L1, _delta1);
|
612 | L1 = _mm_adds_epi16(_mm_subs_epi16(L1, _delta1), Cpd);
|
613 |
|
614 | L2 = _mm_min_epi16(L2, _delta2);
|
615 | L2 = _mm_adds_epi16(_mm_subs_epi16(L2, _delta2), Cpd);
|
616 |
|
617 | L3 = _mm_min_epi16(L3, _delta3);
|
618 | L3 = _mm_adds_epi16(_mm_subs_epi16(L3, _delta3), Cpd);
|
619 |
|
620 | _mm_store_si128( (__m128i*)(Lr_p + d), L0);
|
621 | _mm_store_si128( (__m128i*)(Lr_p + d + D2), L1);
|
622 | _mm_store_si128( (__m128i*)(Lr_p + d + D2*2), L2);
|
623 | _mm_store_si128( (__m128i*)(Lr_p + d + D2*3), L3);
|
624 |
|
625 | __m128i t0 = _mm_min_epi16(_mm_unpacklo_epi16(L0, L2), _mm_unpackhi_epi16(L0, L2));
|
626 | __m128i t1 = _mm_min_epi16(_mm_unpacklo_epi16(L1, L3), _mm_unpackhi_epi16(L1, L3));
|
627 | t0 = _mm_min_epi16(_mm_unpacklo_epi16(t0, t1), _mm_unpackhi_epi16(t0, t1));
|
628 | _minL0 = _mm_min_epi16(_minL0, t0);
|
629 |
|
630 | __m128i Sval = _mm_load_si128((const __m128i*)(Sp + d));
|
631 |
|
632 | L0 = _mm_adds_epi16(L0, L1);
|
633 | L2 = _mm_adds_epi16(L2, L3);
|
634 | Sval = _mm_adds_epi16(Sval, L0);
|
635 | Sval = _mm_adds_epi16(Sval, L2);
|
636 |
|
637 | _mm_store_si128((__m128i*)(Sp + d), Sval);
|
638 | }
|
639 |
|
640 | _minL0 = _mm_min_epi16(_minL0, _mm_srli_si128(_minL0, 8));
|
641 | _mm_storel_epi64((__m128i*)&minLr[0][xm], _minL0);
|
642 | }
|
643 | else
|
644 | #endif
|
645 | {
|
646 | int minL0 = MAX_COST, minL1 = MAX_COST, minL2 = MAX_COST, minL3 = MAX_COST;
|
647 |
|
648 | int minpos = minLr0Pos, P12=P1+P2;
|
649 | for( d = 0; d < D; d++ )
|
650 | {
|
651 | int Cpd = Cp[d], L0, L1, L2, L3;
|
652 |
|
653 | if (xd > 0)
|
654 | if (d < minpos)
|
655 | L0 = min((int)Lr_p0[d+1] + P12, Cpd + min((int)Lr_p0[d], min(Lr_p0[d-1] + P1, delta0))) - delta0;
|
656 | else
|
657 | L0 = min((int)Lr_p0[d+1] + P12, Cpd + min((int)Lr_p0[d], Lr_p0[d-1] + P1)) - delta0;
|
658 | else
|
659 | if (d < minpos)
|
660 | L0 = min((int)Lr_p0[d-1] + P12, Cpd + min((int)Lr_p0[d], Lr_p0[d+1] + P1)) - delta0;
|
661 | else
|
662 | L0 = min((int)Lr_p0[d-1] + P12, Cpd + min((int)Lr_p0[d], min(Lr_p0[d+1] + P1, delta0))) - delta0;
|
663 |
|
664 | L1 = Cpd + min((int)Lr_p1[d], min(Lr_p1[d-1] + P1, min(Lr_p1[d+1] + P1, delta1))) - delta1;
|
665 | L2 = Cpd + min((int)Lr_p2[d], min(Lr_p2[d-1] + P1, min(Lr_p2[d+1] + P1, delta2))) - delta2;
|
666 | L3 = Cpd + min((int)Lr_p3[d], min(Lr_p3[d-1] + P1, min(Lr_p3[d+1] + P1, delta3))) - delta3;
|
667 |
|
668 | Lr_p[d] = (CostType)L0;
|
669 | if (L0 < minL0) {
|
670 | minL0 = L0;
|
671 | minLr0Pos = d;
|
672 | }
|
673 |
|
674 | Lr_p[d + D2] = (CostType)L1;
|
675 | minL1 = min(minL1, L1);
|
676 |
|
677 | Lr_p[d + D2*2] = (CostType)L2;
|
678 | minL2 = min(minL2, L2);
|
679 |
|
680 | Lr_p[d + D2*3] = (CostType)L3;
|
681 | minL3 = min(minL3, L3);
|
682 |
|
683 | Sp[d] = saturate_cast<CostType>(Sp[d] + L0 + L1 + L2 + L3);
|
684 | }
|
685 | minLr[0][xm] = (CostType)minL0;
|
686 | minLr[0][xm+1] = (CostType)minL1;
|
687 | minLr[0][xm+2] = (CostType)minL2;
|
688 | minLr[0][xm+3] = (CostType)minL3;
|
689 | }
|
690 | }
|
691 |
|
692 | if( pass == npasses )
|
693 | {
|
694 | for( x = 0; x < width; x++ )
|
695 | {
|
696 | disp1ptr[x] = disp2ptr[x] = (DispType)INVALID_DISP_SCALED;
|
697 | disp2cost[x] = MAX_COST;
|
698 | }
|
699 |
|
700 | for( x = width1 - 1; x >= 0; x-- )
|
701 | {
|
702 | CostType* Sp = S + x*D;
|
703 | int minS = MAX_COST, bestDisp = -1;
|
704 |
|
705 | if( npasses == 1 )
|
706 | {
|
707 | int xm = x*NR2, xd = xm*D2;
|
708 |
|
709 | int minL0 = MAX_COST;
|
710 | int delta0 = minLr[0][xm + NR2] + P2;
|
711 | CostType* Lr_p0 = Lr[0] + xd + NRD2;
|
712 | Lr_p0[-1] = Lr_p0[D] = MAX_COST;
|
713 | CostType* Lr_p = Lr[0] + xd;
|
714 |
|
715 | const CostType* Cp = C + x*D;
|
716 |
|
717 | #if CV_SSE2
|
718 | if( useSIMD )
|
719 | {
|
720 | __m128i _P1 = _mm_set1_epi16((short)P1);
|
721 | __m128i _delta0 = _mm_set1_epi16((short)delta0);
|
722 |
|
723 | __m128i _minL0 = _mm_set1_epi16((short)minL0);
|
724 | __m128i _minS = _mm_set1_epi16(MAX_COST), _bestDisp = _mm_set1_epi16(-1);
|
725 | __m128i _d8 = _mm_setr_epi16(0, 1, 2, 3, 4, 5, 6, 7), _8 = _mm_set1_epi16(8);
|
726 |
|
727 | for( d = 0; d < D; d += 8 )
|
728 | {
|
729 | __m128i Cpd = _mm_load_si128((const __m128i*)(Cp + d)), L0;
|
730 |
|
731 | L0 = _mm_load_si128((const __m128i*)(Lr_p0 + d));
|
732 | L0 = _mm_min_epi16(L0, _mm_adds_epi16(_mm_loadu_si128((const __m128i*)(Lr_p0 + d - 1)), _P1));
|
733 | L0 = _mm_min_epi16(L0, _mm_adds_epi16(_mm_loadu_si128((const __m128i*)(Lr_p0 + d + 1)), _P1));
|
734 | L0 = _mm_min_epi16(L0, _delta0);
|
735 | L0 = _mm_adds_epi16(_mm_subs_epi16(L0, _delta0), Cpd);
|
736 |
|
737 | _mm_store_si128((__m128i*)(Lr_p + d), L0);
|
738 | _minL0 = _mm_min_epi16(_minL0, L0);
|
739 | L0 = _mm_adds_epi16(L0, *(__m128i*)(Sp + d));
|
740 | _mm_store_si128((__m128i*)(Sp + d), L0);
|
741 |
|
742 | __m128i mask = _mm_cmpgt_epi16(_minS, L0);
|
743 | _minS = _mm_min_epi16(_minS, L0);
|
744 | _bestDisp = _mm_xor_si128(_bestDisp, _mm_and_si128(_mm_xor_si128(_bestDisp,_d8), mask));
|
745 | _d8 = _mm_adds_epi16(_d8, _8);
|
746 | }
|
747 |
|
748 | short CV_DECL_ALIGNED(16) bestDispBuf[8];
|
749 | _mm_store_si128((__m128i*)bestDispBuf, _bestDisp);
|
750 |
|
751 | _minL0 = _mm_min_epi16(_minL0, _mm_srli_si128(_minL0, 8));
|
752 | _minL0 = _mm_min_epi16(_minL0, _mm_srli_si128(_minL0, 4));
|
753 | _minL0 = _mm_min_epi16(_minL0, _mm_srli_si128(_minL0, 2));
|
754 |
|
755 | __m128i qS = _mm_min_epi16(_minS, _mm_srli_si128(_minS, 8));
|
756 | qS = _mm_min_epi16(qS, _mm_srli_si128(qS, 4));
|
757 | qS = _mm_min_epi16(qS, _mm_srli_si128(qS, 2));
|
758 |
|
759 | minLr[0][xm] = (CostType)_mm_cvtsi128_si32(_minL0);
|
760 | minS = (CostType)_mm_cvtsi128_si32(qS);
|
761 |
|
762 | qS = _mm_shuffle_epi32(_mm_unpacklo_epi16(qS, qS), 0);
|
763 | qS = _mm_cmpeq_epi16(_minS, qS);
|
764 | int idx = _mm_movemask_epi8(_mm_packs_epi16(qS, qS)) & 255;
|
765 |
|
766 | bestDisp = bestDispBuf[LSBTab[idx]];
|
767 | }
|
768 | else
|
769 | #endif
|
770 | {
|
771 | for( d = 0; d < D; d++ )
|
772 | {
|
773 | int L0 = Cp[d] + min((int)Lr_p0[d], min(Lr_p0[d-1] + P1, min(Lr_p0[d+1] + P1, delta0))) - delta0;
|
774 |
|
775 | Lr_p[d] = (CostType)L0;
|
776 | minL0 = min(minL0, L0);
|
777 |
|
778 | int Sval = Sp[d] = saturate_cast<CostType>(Sp[d] + L0);
|
779 | if( Sval < minS )
|
780 | {
|
781 | minS = Sval;
|
782 | bestDisp = d;
|
783 | }
|
784 | }
|
785 | minLr[0][xm] = (CostType)minL0;
|
786 | }
|
787 | }
|
788 | else
|
789 | {
|
790 | for( d = 0; d < D; d++ )
|
791 | {
|
792 | int Sval = Sp[d];
|
793 | if( Sval < minS )
|
794 | {
|
795 | minS = Sval;
|
796 | bestDisp = d;
|
797 | }
|
798 | }
|
799 | }
|
800 |
|
801 | for( d = 0; d < D; d++ )
|
802 | {
|
803 | if( Sp[d]*(100 - uniquenessRatio) < minS*100 && std::abs(bestDisp - d) > 1 )
|
804 | break;
|
805 | }
|
806 | if( d < D )
|
807 | continue;
|
808 | d = bestDisp;
|
809 | int _x2 = x + minX1 - d - minD;
|
810 | if( disp2cost[_x2] > minS )
|
811 | {
|
812 | disp2cost[_x2] = (CostType)minS;
|
813 | disp2ptr[_x2] = (DispType)(d + minD);
|
814 | }
|
815 |
|
816 | if( 0 < d && d < D-1 )
|
817 | {
|
818 |
|
819 |
|
820 |
|
821 | int denom2 = max(Sp[d-1] + Sp[d+1] - 2*Sp[d], 1);
|
822 | d = d*DISP_SCALE + ((Sp[d-1] - Sp[d+1])*DISP_SCALE + denom2)/(denom2*2);
|
823 | }
|
824 | else
|
825 | d *= DISP_SCALE;
|
826 | disp1ptr[x + minX1] = (DispType)(d + minD*DISP_SCALE);
|
827 | }
|
828 |
|
829 | for( x = minX1; x < maxX1; x++ )
|
830 | {
|
831 |
|
832 |
|
833 |
|
834 | int d1 = disp1ptr[x];
|
835 | if( d1 == INVALID_DISP_SCALED )
|
836 | continue;
|
837 | int _d = d1 >> DISP_SHIFT;
|
838 | int d_ = (d1 + DISP_SCALE-1) >> DISP_SHIFT;
|
839 | int _x = x - _d, x_ = x - d_;
|
840 | if( 0 <= _x && _x < width && disp2ptr[_x] >= minD && std::abs(disp2ptr[_x] - _d) > disp12MaxDiff &&
|
841 | 0 <= x_ && x_ < width && disp2ptr[x_] >= minD && std::abs(disp2ptr[x_] - d_) > disp12MaxDiff )
|
842 | disp1ptr[x] = (DispType)INVALID_DISP_SCALED;
|
843 | }
|
844 | }
|
845 |
|
846 |
|
847 | std::swap( Lr[0], Lr[1] );
|
848 | std::swap( minLr[0], minLr[1] );
|
849 | }
|
850 | }
|
851 | fastFree(tempBufc);
|
852 | fastFree(disp2ptrc);
|
853 | fastFree(disp2costc);
|
854 | fastFree(pixDiffc);
|
855 | fastFree(hsumBufc);
|
856 | fastFree(Sbufc);
|
857 | fastFree(Cbufc);
|
858 | }
|
859 |
|
860 | typedef cv::Point_<short> Point2s;
|
861 |
|
862 | void StereoSGBM::operator ()( InputArray _left, InputArray _right,
|
863 | OutputArray _disp )
|
864 | {
|
865 | Mat left = _left.getMat(), right = _right.getMat();
|
866 | CV_Assert( left.size() == right.size() && left.type() == right.type() &&
|
867 | left.depth() == DataType<PixType>::depth );
|
868 |
|
869 | _disp.create( left.size(), CV_16S );
|
870 | Mat disp = _disp.getMat();
|
871 |
|
872 | computeDisparitySGBM( left, right, disp, *this, buffer );
|
873 | medianBlur(disp, disp, 3);
|
874 |
|
875 | if( speckleWindowSize > 0 )
|
876 | filterSpeckles(disp, (minDisparity - 1)*DISP_SCALE, speckleWindowSize, DISP_SCALE*speckleRange, buffer);
|
877 | }
|
878 |
|
879 |
|
880 | Rect getValidDisparityROI( Rect roi1, Rect roi2,
|
881 | int minDisparity,
|
882 | int numberOfDisparities,
|
883 | int SADWindowSize )
|
884 | {
|
885 | int SW2 = SADWindowSize/2;
|
886 | int minD = minDisparity, maxD = minDisparity + numberOfDisparities - 1;
|
887 |
|
888 | int xmin = max(roi1.x, roi2.x + maxD) + SW2;
|
889 | int xmax = min(roi1.x + roi1.width, roi2.x + roi2.width - minD) - SW2;
|
890 | int ymin = max(roi1.y, roi2.y) + SW2;
|
891 | int ymax = min(roi1.y + roi1.height, roi2.y + roi2.height) - SW2;
|
892 |
|
893 | Rect r(xmin, ymin, xmax - xmin, ymax - ymin);
|
894 |
|
895 | return r.width > 0 && r.height > 0 ? r : Rect();
|
896 | }
|
897 |
|
898 | }
|
899 |
|
900 | void cv::filterSpeckles( InputOutputArray _img, double _newval, int maxSpeckleSize,
|
901 | double _maxDiff, InputOutputArray __buf )
|
902 | {
|
903 | Mat img = _img.getMat();
|
904 | Mat temp, &_buf = __buf.needed() ? __buf.getMatRef() : temp;
|
905 | CV_Assert( img.type() == CV_16SC1 );
|
906 |
|
907 | int newVal = cvRound(_newval);
|
908 | int maxDiff = cvRound(_maxDiff);
|
909 | int width = img.cols, height = img.rows, npixels = width*height;
|
910 | size_t bufSize = npixels*(int)(sizeof(Point2s) + sizeof(int) + sizeof(uchar));
|
911 | if( !_buf.isContinuous() || !_buf.data || _buf.cols*_buf.rows*_buf.elemSize() < bufSize )
|
912 | _buf.create(1, (int)bufSize, CV_8U);
|
913 |
|
914 | uchar* buf = _buf.data;
|
915 | int i, j, dstep = (int)(img.step/sizeof(short));
|
916 | int* labels = (int*)buf;
|
917 | buf += npixels*sizeof(labels[0]);
|
918 | Point2s* wbuf = (Point2s*)buf;
|
919 | buf += npixels*sizeof(wbuf[0]);
|
920 | uchar* rtype = (uchar*)buf;
|
921 | int curlabel = 0;
|
922 |
|
923 |
|
924 | memset(labels, 0, npixels*sizeof(labels[0]));
|
925 |
|
926 | for( i = 0; i < height; i++ )
|
927 | {
|
928 | short* ds = img.ptr<short>(i);
|
929 | int* ls = labels + width*i;
|
930 |
|
931 | for( j = 0; j < width; j++ )
|
932 | {
|
933 | if( ds[j] != newVal )
|
934 | {
|
935 | if( ls[j] )
|
936 | {
|
937 | if( rtype[ls[j]] )
|
938 | ds[j] = (short)newVal;
|
939 | }
|
940 |
|
941 | else
|
942 | {
|
943 | Point2s* ws = wbuf;
|
944 | Point2s p((short)j, (short)i);
|
945 | curlabel++;
|
946 | int count = 0;
|
947 | ls[j] = curlabel;
|
948 |
|
949 |
|
950 | while( ws >= wbuf )
|
951 | {
|
952 | count++;
|
953 |
|
954 | short* dpp = &img.at<short>(p.y, p.x);
|
955 | short dp = *dpp;
|
956 | int* lpp = labels + width*p.y + p.x;
|
957 |
|
958 | if( p.x < width-1 && !lpp[+1] && dpp[+1] != newVal && std::abs(dp - dpp[+1]) <= maxDiff )
|
959 | {
|
960 | lpp[+1] = curlabel;
|
961 | *ws++ = Point2s(p.x+1, p.y);
|
962 | }
|
963 |
|
964 | if( p.x > 0 && !lpp[-1] && dpp[-1] != newVal && std::abs(dp - dpp[-1]) <= maxDiff )
|
965 | {
|
966 | lpp[-1] = curlabel;
|
967 | *ws++ = Point2s(p.x-1, p.y);
|
968 | }
|
969 |
|
970 | if( p.y < height-1 && !lpp[+width] && dpp[+dstep] != newVal && std::abs(dp - dpp[+dstep]) <= maxDiff )
|
971 | {
|
972 | lpp[+width] = curlabel;
|
973 | *ws++ = Point2s(p.x, p.y+1);
|
974 | }
|
975 |
|
976 | if( p.y > 0 && !lpp[-width] && dpp[-dstep] != newVal && std::abs(dp - dpp[-dstep]) <= maxDiff )
|
977 | {
|
978 | lpp[-width] = curlabel;
|
979 | *ws++ = Point2s(p.x, p.y-1);
|
980 | }
|
981 |
|
982 |
|
983 |
|
984 | p = *--ws;
|
985 | }
|
986 |
|
987 |
|
988 | if( count <= maxSpeckleSize )
|
989 | {
|
990 | rtype[ls[j]] = 1;
|
991 | ds[j] = (short)newVal;
|
992 | }
|
993 | else
|
994 | rtype[ls[j]] = 0;
|
995 | }
|
996 | }
|
997 | }
|
998 | }
|
999 | }
|
1000 |
|
1001 | void cv::validateDisparity( InputOutputArray _disp, InputArray _cost, int minDisparity,
|
1002 | int numberOfDisparities, int disp12MaxDiff )
|
1003 | {
|
1004 | Mat disp = _disp.getMat(), cost = _cost.getMat();
|
1005 | int cols = disp.cols, rows = disp.rows;
|
1006 | int minD = minDisparity, maxD = minDisparity + numberOfDisparities;
|
1007 | int x, minX1 = max(maxD, 0), maxX1 = cols + min(minD, 0);
|
1008 | AutoBuffer<int> _disp2buf(cols*2);
|
1009 | int* disp2buf = _disp2buf;
|
1010 | int* disp2cost = disp2buf + cols;
|
1011 | const int DISP_SHIFT = 4, DISP_SCALE = 1 << DISP_SHIFT;
|
1012 | int INVALID_DISP = minD - 1, INVALID_DISP_SCALED = INVALID_DISP*DISP_SCALE;
|
1013 | int costType = cost.type();
|
1014 |
|
1015 | disp12MaxDiff *= DISP_SCALE;
|
1016 |
|
1017 | CV_Assert( numberOfDisparities > 0 && disp.type() == CV_16S &&
|
1018 | (costType == CV_16S || costType == CV_32S) &&
|
1019 | disp.size() == cost.size() );
|
1020 |
|
1021 | for( int y = 0; y < rows; y++ )
|
1022 | {
|
1023 | short* dptr = disp.ptr<short>(y);
|
1024 |
|
1025 | for( x = 0; x < cols; x++ )
|
1026 | {
|
1027 | disp2buf[x] = INVALID_DISP_SCALED;
|
1028 | disp2cost[x] = INT_MAX;
|
1029 | }
|
1030 |
|
1031 | if( costType == CV_16S )
|
1032 | {
|
1033 | const short* cptr = cost.ptr<short>(y);
|
1034 |
|
1035 | for( x = minX1; x < maxX1; x++ )
|
1036 | {
|
1037 | int d = dptr[x], c = cptr[x];
|
1038 | int x2 = x - ((d + DISP_SCALE/2) >> DISP_SHIFT);
|
1039 |
|
1040 | if( disp2cost[x2] > c )
|
1041 | {
|
1042 | disp2cost[x2] = c;
|
1043 | disp2buf[x2] = d;
|
1044 | }
|
1045 | }
|
1046 | }
|
1047 | else
|
1048 | {
|
1049 | const int* cptr = cost.ptr<int>(y);
|
1050 |
|
1051 | for( x = minX1; x < maxX1; x++ )
|
1052 | {
|
1053 | int d = dptr[x], c = cptr[x];
|
1054 | int x2 = x - ((d + DISP_SCALE/2) >> DISP_SHIFT);
|
1055 |
|
1056 | if( disp2cost[x2] < c )
|
1057 | {
|
1058 | disp2cost[x2] = c;
|
1059 | disp2buf[x2] = d;
|
1060 | }
|
1061 | }
|
1062 | }
|
1063 |
|
1064 | for( x = minX1; x < maxX1; x++ )
|
1065 | {
|
1066 |
|
1067 |
|
1068 |
|
1069 | int d = dptr[x];
|
1070 | if( d == INVALID_DISP_SCALED )
|
1071 | continue;
|
1072 | int d0 = d >> DISP_SHIFT;
|
1073 | int d1 = (d + DISP_SCALE-1) >> DISP_SHIFT;
|
1074 | int x0 = x - d0, x1 = x - d1;
|
1075 | if( (0 <= x0 && x0 < cols && disp2buf[x0] > INVALID_DISP_SCALED && std::abs(disp2buf[x0] - d) > disp12MaxDiff) &&
|
1076 | (0 <= x1 && x1 < cols && disp2buf[x1] > INVALID_DISP_SCALED && std::abs(disp2buf[x1] - d) > disp12MaxDiff) )
|
1077 | dptr[x] = (short)INVALID_DISP_SCALED;
|
1078 | }
|
1079 | }
|
1080 | }
|
1081 |
|
1082 | CvRect cvGetValidDisparityROI( CvRect roi1, CvRect roi2, int minDisparity,
|
1083 | int numberOfDisparities, int SADWindowSize )
|
1084 | {
|
1085 | return (CvRect)cv::getValidDisparityROI( roi1, roi2, minDisparity,
|
1086 | numberOfDisparities, SADWindowSize );
|
1087 | }
|
1088 |
|
1089 | void cvValidateDisparity( CvArr* _disp, const CvArr* _cost, int minDisparity,
|
1090 | int numberOfDisparities, int disp12MaxDiff )
|
1091 | {
|
1092 | cv::Mat disp = cv::cvarrToMat(_disp), cost = cv::cvarrToMat(_cost);
|
1093 | cv::validateDisparity( disp, cost, minDisparity, numberOfDisparities, disp12MaxDiff );
|
1094 | }
|