color.cpp

Kevin Smith, 2012-06-27 05:04 pm

Download (140.7 kB)

 
1
/*M///////////////////////////////////////////////////////////////////////////////////////
2
//
3
//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
4
//
5
//  By downloading, copying, installing or using the software you agree to this license.
6
//  If you do not agree to this license, do not download, install,
7
//  copy or use the software.
8
//
9
//
10
//                           License Agreement
11
//                For Open Source Computer Vision Library
12
//
13
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
14
// Copyright (C) 2009-2010, Willow Garage Inc., all rights reserved.
15
// Third party copyrights are property of their respective owners.
16
//
17
// Redistribution and use in source and binary forms, with or without modification,
18
// are permitted provided that the following conditions are met:
19
//
20
//   * Redistribution's of source code must retain the above copyright notice,
21
//     this list of conditions and the following disclaimer.
22
//
23
//   * Redistribution's in binary form must reproduce the above copyright notice,
24
//     this list of conditions and the following disclaimer in the documentation
25
//     and/or other materials provided with the distribution.
26
//
27
//   * The name of the copyright holders may not be used to endorse or promote products
28
//     derived from this software without specific prior written permission.
29
//
30
// This software is provided by the copyright holders and contributors "as is" and
31
// any express or implied warranties, including, but not limited to, the implied
32
// warranties of merchantability and fitness for a particular purpose are disclaimed.
33
// In no event shall the Intel Corporation or contributors be liable for any direct,
34
// indirect, incidental, special, exemplary, or consequential damages
35
// (including, but not limited to, procurement of substitute goods or services;
36
// loss of use, data, or profits; or business interruption) however caused
37
// and on any theory of liability, whether in contract, strict liability,
38
// or tort (including negligence or otherwise) arising in any way out of
39
// the use of this software, even if advised of the possibility of such damage.
40
//
41
//M*/
42
43
/********************************* COPYRIGHT NOTICE *******************************\
44
  The function for RGB to Lab conversion is based on the MATLAB script
45
  RGB2Lab.m translated by Mark Ruzon from C code by Yossi Rubner, 23 September 1997.
46
  See the page [http://vision.stanford.edu/~ruzon/software/rgblab.html]
47
\**********************************************************************************/
48
49
/********************************* COPYRIGHT NOTICE *******************************\
50
  Original code for Bayer->BGR/RGB conversion is provided by Dirk Schaefer
51
  from MD-Mathematische Dienste GmbH. Below is the copyright notice:
52
53
    IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
54
    By downloading, copying, installing or using the software you agree
55
    to this license. If you do not agree to this license, do not download,
56
    install, copy or use the software.
57
58
    Contributors License Agreement:
59
60
      Copyright (c) 2002,
61
      MD-Mathematische Dienste GmbH
62
      Im Defdahl 5-10
63
      44141 Dortmund
64
      Germany
65
      www.md-it.de
66
67
    Redistribution and use in source and binary forms,
68
    with or without modification, are permitted provided
69
    that the following conditions are met:
70
71
    Redistributions of source code must retain
72
    the above copyright notice, this list of conditions and the following disclaimer.
73
    Redistributions in binary form must reproduce the above copyright notice,
74
    this list of conditions and the following disclaimer in the documentation
75
    and/or other materials provided with the distribution.
76
    The name of Contributor may not be used to endorse or promote products
77
    derived from this software without specific prior written permission.
78
79
    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
80
    AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
81
    THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
82
    PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE CONTRIBUTORS BE LIABLE
83
    FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
84
    DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
85
    OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
86
    HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
87
    STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
88
    ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
89
    THE POSSIBILITY OF SUCH DAMAGE.
90
\**********************************************************************************/
91
92
#include "precomp.hpp"
93
#include <limits>
94
#include <iostream>
95
96
namespace cv
97
{
98
99
// computes cubic spline coefficients for a function: (xi=i, yi=f[i]), i=0..n
100
template<typename _Tp> static void splineBuild(const _Tp* f, int n, _Tp* tab)
101
{
102
    _Tp cn = 0;
103
    int i;
104
    tab[0] = tab[1] = (_Tp)0;
105
106
    for(i = 1; i < n-1; i++)
107
    {
108
        _Tp t = 3*(f[i+1] - 2*f[i] + f[i-1]);
109
        _Tp l = 1/(4 - tab[(i-1)*4]);
110
        tab[i*4] = l; tab[i*4+1] = (t - tab[(i-1)*4+1])*l;
111
    }
112
113
    for(i = n-1; i >= 0; i--)
114
    {
115
        _Tp c = tab[i*4+1] - tab[i*4]*cn;
116
        _Tp b = f[i+1] - f[i] - (cn + c*2)*(_Tp)0.3333333333333333;
117
        _Tp d = (cn - c)*(_Tp)0.3333333333333333;
118
        tab[i*4] = f[i]; tab[i*4+1] = b;
119
        tab[i*4+2] = c; tab[i*4+3] = d;
120
        cn = c;
121
    }
122
}
123
124
// interpolates value of a function at x, 0 <= x <= n using a cubic spline.
125
template<typename _Tp> static inline _Tp splineInterpolate(_Tp x, const _Tp* tab, int n)
126
{
127
    int ix = cvFloor(x);
128
    ix = std::min(std::max(ix, 0), n-1);
129
    x -= ix;
130
    tab += ix*4;
131
    return ((tab[3]*x + tab[2])*x + tab[1])*x + tab[0];
132
}
133
134
135
template<typename _Tp> struct ColorChannel
136
{
137
    typedef float worktype_f;
138
    static _Tp max() { return std::numeric_limits<_Tp>::max(); }
139
    static _Tp half() { return (_Tp)(max()/2 + 1); }
140
};
141
142
template<> struct ColorChannel<float>
143
{
144
    typedef float worktype_f;
145
    static float max() { return 1.f; }
146
    static float half() { return 0.5f; }
147
};
148
149
/*template<> struct ColorChannel<double>
150
{
151
    typedef double worktype_f;
152
    static double max() { return 1.; }
153
    static double half() { return 0.5; }
154
};*/
155
156
157
///////////////////////////// Top-level template function ////////////////////////////////
158
159
template<class Cvt> void CvtColorLoop(const Mat& srcmat, Mat& dstmat, const Cvt& cvt)
160
{
161
    typedef typename Cvt::channel_type _Tp;
162
    Size sz = srcmat.size();
163
    const uchar* src = srcmat.data;
164
    uchar* dst = dstmat.data;
165
    size_t srcstep = srcmat.step, dststep = dstmat.step;
166
167
    if( srcmat.isContinuous() && dstmat.isContinuous() )
168
    {
169
        sz.width *= sz.height;
170
        sz.height = 1;
171
    }
172
173
    for( ; sz.height--; src += srcstep, dst += dststep )
174
        cvt((const _Tp*)src, (_Tp*)dst, sz.width);
175
}
176
177
178
////////////////// Various 3/4-channel to 3/4-channel RGB transformations /////////////////
179
180
template<typename _Tp> struct RGB2RGB
181
{
182
    typedef _Tp channel_type;
183
184
    RGB2RGB(int _srccn, int _dstcn, int _blueIdx) : srccn(_srccn), dstcn(_dstcn), blueIdx(_blueIdx) {}
185
    void operator()(const _Tp* src, _Tp* dst, int n) const
186
    {
187
        int scn = srccn, dcn = dstcn, bidx = blueIdx;
188
        if( dcn == 3 )
189
        {
190
            n *= 3;
191
            for( int i = 0; i < n; i += 3, src += scn )
192
            {
193
                _Tp t0 = src[bidx], t1 = src[1], t2 = src[bidx ^ 2];
194
                dst[i] = t0; dst[i+1] = t1; dst[i+2] = t2;
195
            }
196
        }
197
        else if( scn == 3 )
198
        {
199
            n *= 3;
200
            _Tp alpha = ColorChannel<_Tp>::max();
201
            for( int i = 0; i < n; i += 3, dst += 4 )
202
            {
203
                _Tp t0 = src[i], t1 = src[i+1], t2 = src[i+2];
204
                dst[bidx] = t0; dst[1] = t1; dst[bidx^2] = t2; dst[3] = alpha;
205
            }
206
        }
207
        else
208
        {
209
            n *= 4;
210
            for( int i = 0; i < n; i += 4 )
211
            {
212
                _Tp t0 = src[i], t1 = src[i+1], t2 = src[i+2], t3 = src[i+3];
213
                dst[i] = t2; dst[i+1] = t1; dst[i+2] = t0; dst[i+3] = t3;
214
            }
215
        }
216
    }
217
218
    int srccn, dstcn, blueIdx;
219
};
220
221
/////////// Transforming 16-bit (565 or 555) RGB to/from 24/32-bit (888[8]) RGB //////////
222
223
struct RGB5x52RGB
224
{
225
    typedef uchar channel_type;
226
227
    RGB5x52RGB(int _dstcn, int _blueIdx, int _greenBits)
228
        : dstcn(_dstcn), blueIdx(_blueIdx), greenBits(_greenBits) {}
229
230
    void operator()(const uchar* src, uchar* dst, int n) const
231
    {
232
        int dcn = dstcn, bidx = blueIdx;
233
        if( greenBits == 6 )
234
            for( int i = 0; i < n; i++, dst += dcn )
235
            {
236
                unsigned t = ((const ushort*)src)[i];
237
                dst[bidx] = (uchar)(t << 3);
238
                dst[1] = (uchar)((t >> 3) & ~3);
239
                dst[bidx ^ 2] = (uchar)((t >> 8) & ~7);
240
                if( dcn == 4 )
241
                    dst[3] = 255;
242
            }
243
        else
244
            for( int i = 0; i < n; i++, dst += dcn )
245
            {
246
                unsigned t = ((const ushort*)src)[i];
247
                dst[bidx] = (uchar)(t << 3);
248
                dst[1] = (uchar)((t >> 2) & ~7);
249
                dst[bidx ^ 2] = (uchar)((t >> 7) & ~7);
250
                if( dcn == 4 )
251
                    dst[3] = t & 0x8000 ? 255 : 0;
252
            }
253
    }
254
255
    int dstcn, blueIdx, greenBits;
256
};
257
258
259
struct RGB2RGB5x5
260
{
261
    typedef uchar channel_type;
262
263
    RGB2RGB5x5(int _srccn, int _blueIdx, int _greenBits)
264
        : srccn(_srccn), blueIdx(_blueIdx), greenBits(_greenBits) {}
265
266
    void operator()(const uchar* src, uchar* dst, int n) const
267
    {
268
        int scn = srccn, bidx = blueIdx;
269
        if( greenBits == 6 )
270
            for( int i = 0; i < n; i++, src += scn )
271
            {
272
                ((ushort*)dst)[i] = (ushort)((src[bidx] >> 3)|((src[1]&~3) << 3)|((src[bidx^2]&~7) << 8));
273
            }
274
        else if( scn == 3 )
275
            for( int i = 0; i < n; i++, src += 3 )
276
            {
277
                ((ushort*)dst)[i] = (ushort)((src[bidx] >> 3)|((src[1]&~7) << 2)|((src[bidx^2]&~7) << 7));
278
            }
279
        else
280
            for( int i = 0; i < n; i++, src += 4 )
281
            {
282
                ((ushort*)dst)[i] = (ushort)((src[bidx] >> 3)|((src[1]&~7) << 2)|
283
                    ((src[bidx^2]&~7) << 7)|(src[3] ? 0x8000 : 0));
284
            }
285
    }
286
287
    int srccn, blueIdx, greenBits;
288
};
289
290
///////////////////////////////// Color to/from Grayscale ////////////////////////////////
291
292
template<typename _Tp>
293
struct Gray2RGB
294
{
295
    typedef _Tp channel_type;
296
297
    Gray2RGB(int _dstcn) : dstcn(_dstcn) {}
298
    void operator()(const _Tp* src, _Tp* dst, int n) const
299
    {
300
        if( dstcn == 3 )
301
            for( int i = 0; i < n; i++, dst += 3 )
302
            {
303
                dst[0] = dst[1] = dst[2] = src[i];
304
            }
305
        else
306
        {
307
            _Tp alpha = ColorChannel<_Tp>::max();
308
            for( int i = 0; i < n; i++, dst += 4 )
309
            {
310
                dst[0] = dst[1] = dst[2] = src[i];
311
                dst[3] = alpha;
312
            }
313
        }
314
    }
315
316
    int dstcn;
317
};
318
319
320
struct Gray2RGB5x5
321
{
322
    typedef uchar channel_type;
323
324
    Gray2RGB5x5(int _greenBits) : greenBits(_greenBits) {}
325
    void operator()(const uchar* src, uchar* dst, int n) const
326
    {
327
        if( greenBits == 6 )
328
            for( int i = 0; i < n; i++ )
329
            {
330
                int t = src[i];
331
                ((ushort*)dst)[i] = (ushort)((t >> 3)|((t & ~3) << 3)|((t & ~7) << 8));
332
            }
333
        else
334
            for( int i = 0; i < n; i++ )
335
            {
336
                int t = src[i] >> 3;
337
                ((ushort*)dst)[i] = (ushort)(t|(t << 5)|(t << 10));
338
            }
339
    }
340
    int greenBits;
341
};
342
343
344
#undef R2Y
345
#undef G2Y
346
#undef B2Y
347
348
enum
349
{
350
    yuv_shift = 14,
351
    xyz_shift = 12,
352
    R2Y = 4899,
353
    G2Y = 9617,
354
    B2Y = 1868,
355
    BLOCK_SIZE = 256
356
};
357
358
359
struct RGB5x52Gray
360
{
361
    typedef uchar channel_type;
362
363
    RGB5x52Gray(int _greenBits) : greenBits(_greenBits) {}
364
    void operator()(const uchar* src, uchar* dst, int n) const
365
    {
366
        if( greenBits == 6 )
367
            for( int i = 0; i < n; i++ )
368
            {
369
                int t = ((ushort*)src)[i];
370
                dst[i] = (uchar)CV_DESCALE(((t << 3) & 0xf8)*B2Y +
371
                                           ((t >> 3) & 0xfc)*G2Y +
372
                                           ((t >> 8) & 0xf8)*R2Y, yuv_shift);
373
            }
374
        else
375
            for( int i = 0; i < n; i++ )
376
            {
377
                int t = ((ushort*)src)[i];
378
                dst[i] = (uchar)CV_DESCALE(((t << 3) & 0xf8)*B2Y +
379
                                           ((t >> 2) & 0xf8)*G2Y +
380
                                           ((t >> 7) & 0xf8)*R2Y, yuv_shift);
381
            }
382
    }
383
    int greenBits;
384
};
385
386
387
template<typename _Tp> struct RGB2Gray
388
{
389
    typedef _Tp channel_type;
390
391
    RGB2Gray(int _srccn, int blueIdx, const float* _coeffs) : srccn(_srccn)
392
    {
393
        static const float coeffs0[] = { 0.299f, 0.587f, 0.114f };
394
        memcpy( coeffs, _coeffs ? _coeffs : coeffs0, 3*sizeof(coeffs[0]) );
395
        if(blueIdx == 0)
396
            std::swap(coeffs[0], coeffs[2]);
397
    }
398
399
    void operator()(const _Tp* src, _Tp* dst, int n) const
400
    {
401
        int scn = srccn;
402
        float cb = coeffs[0], cg = coeffs[1], cr = coeffs[2];
403
        for(int i = 0; i < n; i++, src += scn)
404
            dst[i] = saturate_cast<_Tp>(src[0]*cb + src[1]*cg + src[2]*cr);
405
    }
406
    int srccn;
407
    float coeffs[3];
408
};
409
410
411
template<> struct RGB2Gray<uchar>
412
{
413
    typedef uchar channel_type;
414
415
    RGB2Gray<uchar>(int _srccn, int blueIdx, const int* coeffs) : srccn(_srccn)
416
    {
417
        const int coeffs0[] = { R2Y, G2Y, B2Y };
418
        if(!coeffs) coeffs = coeffs0;
419
420
        int b = 0, g = 0, r = (1 << (yuv_shift-1));
421
        int db = coeffs[blueIdx^2], dg = coeffs[1], dr = coeffs[blueIdx];
422
423
        for( int i = 0; i < 256; i++, b += db, g += dg, r += dr )
424
        {
425
            tab[i] = b;
426
            tab[i+256] = g;
427
            tab[i+512] = r;
428
        }
429
    }
430
    void operator()(const uchar* src, uchar* dst, int n) const
431
    {
432
        int scn = srccn;
433
        const int* _tab = tab;
434
        for(int i = 0; i < n; i++, src += scn)
435
            dst[i] = (uchar)((_tab[src[0]] + _tab[src[1]+256] + _tab[src[2]+512]) >> yuv_shift);
436
    }
437
    int srccn;
438
    int tab[256*3];
439
};
440
441
442
template<> struct RGB2Gray<ushort>
443
{
444
    typedef ushort channel_type;
445
446
    RGB2Gray<ushort>(int _srccn, int blueIdx, const int* _coeffs) : srccn(_srccn)
447
    {
448
        static const int coeffs0[] = { R2Y, G2Y, B2Y };
449
        memcpy(coeffs, _coeffs ? _coeffs : coeffs0, 3*sizeof(coeffs[0]));
450
        if( blueIdx == 0 )
451
            std::swap(coeffs[0], coeffs[2]);
452
    }
453
454
    void operator()(const ushort* src, ushort* dst, int n) const
455
    {
456
        int scn = srccn, cb = coeffs[0], cg = coeffs[1], cr = coeffs[2];
457
        for(int i = 0; i < n; i++, src += scn)
458
            dst[i] = (ushort)CV_DESCALE((unsigned)(src[0]*cb + src[1]*cg + src[2]*cr), yuv_shift);
459
    }
460
    int srccn;
461
    int coeffs[3];
462
};
463
464
465
///////////////////////////////////// RGB <-> YCrCb //////////////////////////////////////
466
467
template<typename _Tp> struct RGB2YCrCb_f
468
{
469
    typedef _Tp channel_type;
470
471
    RGB2YCrCb_f(int _srccn, int _blueIdx, const float* _coeffs) : srccn(_srccn), blueIdx(_blueIdx)
472
    {
473
        static const float coeffs0[] = {0.299f, 0.587f, 0.114f, 0.713f, 0.564f};
474
        memcpy(coeffs, _coeffs ? _coeffs : coeffs0, 5*sizeof(coeffs[0]));
475
        if(blueIdx==0) std::swap(coeffs[0], coeffs[2]);
476
    }
477
478
    void operator()(const _Tp* src, _Tp* dst, int n) const
479
    {
480
        int scn = srccn, bidx = blueIdx;
481
        const _Tp delta = ColorChannel<_Tp>::half();
482
        float C0 = coeffs[0], C1 = coeffs[1], C2 = coeffs[2], C3 = coeffs[3], C4 = coeffs[4];
483
        n *= 3;
484
        for(int i = 0; i < n; i += 3, src += scn)
485
        {
486
            _Tp Y = saturate_cast<_Tp>(src[0]*C0 + src[1]*C1 + src[2]*C2);
487
            _Tp Cr = saturate_cast<_Tp>((src[bidx^2] - Y)*C3 + delta);
488
            _Tp Cb = saturate_cast<_Tp>((src[bidx] - Y)*C4 + delta);
489
            dst[i] = Y; dst[i+1] = Cr; dst[i+2] = Cb;
490
        }
491
    }
492
    int srccn, blueIdx;
493
    float coeffs[5];
494
};
495
496
497
template<typename _Tp> struct RGB2YCrCb_i
498
{
499
    typedef _Tp channel_type;
500
501
    RGB2YCrCb_i(int _srccn, int _blueIdx, const int* _coeffs)
502
        : srccn(_srccn), blueIdx(_blueIdx)
503
    {
504
        static const int coeffs0[] = {R2Y, G2Y, B2Y, 11682, 9241};
505
        memcpy(coeffs, _coeffs ? _coeffs : coeffs0, 5*sizeof(coeffs[0]));
506
        if(blueIdx==0) std::swap(coeffs[0], coeffs[2]);
507
    }
508
    void operator()(const _Tp* src, _Tp* dst, int n) const
509
    {
510
        int scn = srccn, bidx = blueIdx;
511
        int C0 = coeffs[0], C1 = coeffs[1], C2 = coeffs[2], C3 = coeffs[3], C4 = coeffs[4];
512
        int delta = ColorChannel<_Tp>::half()*(1 << yuv_shift);
513
        n *= 3;
514
        for(int i = 0; i < n; i += 3, src += scn)
515
        {
516
            int Y = CV_DESCALE(src[0]*C0 + src[1]*C1 + src[2]*C2, yuv_shift);
517
            int Cr = CV_DESCALE((src[bidx^2] - Y)*C3 + delta, yuv_shift);
518
            int Cb = CV_DESCALE((src[bidx] - Y)*C4 + delta, yuv_shift);
519
            dst[i] = saturate_cast<_Tp>(Y);
520
            dst[i+1] = saturate_cast<_Tp>(Cr);
521
            dst[i+2] = saturate_cast<_Tp>(Cb);
522
        }
523
    }
524
    int srccn, blueIdx;
525
    int coeffs[5];
526
};
527
528
529
template<typename _Tp> struct YCrCb2RGB_f
530
{
531
    typedef _Tp channel_type;
532
533
    YCrCb2RGB_f(int _dstcn, int _blueIdx, const float* _coeffs)
534
        : dstcn(_dstcn), blueIdx(_blueIdx)
535
    {
536
        static const float coeffs0[] = {1.403f, -0.714f, -0.344f, 1.773f};
537
        memcpy(coeffs, _coeffs ? _coeffs : coeffs0, 4*sizeof(coeffs[0]));
538
    }
539
    void operator()(const _Tp* src, _Tp* dst, int n) const
540
    {
541
        int dcn = dstcn, bidx = blueIdx;
542
        const _Tp delta = ColorChannel<_Tp>::half(), alpha = ColorChannel<_Tp>::max();
543
        float C0 = coeffs[0], C1 = coeffs[1], C2 = coeffs[2], C3 = coeffs[3];
544
        n *= 3;
545
        for(int i = 0; i < n; i += 3, dst += dcn)
546
        {
547
            _Tp Y = src[i];
548
            _Tp Cr = src[i+1];
549
            _Tp Cb = src[i+2];
550
551
            _Tp b = saturate_cast<_Tp>(Y + (Cb - delta)*C3);
552
            _Tp g = saturate_cast<_Tp>(Y + (Cb - delta)*C2 + (Cr - delta)*C1);
553
            _Tp r = saturate_cast<_Tp>(Y + (Cr - delta)*C0);
554
555
            dst[bidx] = b; dst[1] = g; dst[bidx^2] = r;
556
            if( dcn == 4 )
557
                dst[3] = alpha;
558
        }
559
    }
560
    int dstcn, blueIdx;
561
    float coeffs[4];
562
};
563
564
565
template<typename _Tp> struct YCrCb2RGB_i
566
{
567
    typedef _Tp channel_type;
568
569
    YCrCb2RGB_i(int _dstcn, int _blueIdx, const int* _coeffs)
570
        : dstcn(_dstcn), blueIdx(_blueIdx)
571
    {
572
        static const int coeffs0[] = {22987, -11698, -5636, 29049};
573
        memcpy(coeffs, _coeffs ? _coeffs : coeffs0, 4*sizeof(coeffs[0]));
574
    }
575
576
    void operator()(const _Tp* src, _Tp* dst, int n) const
577
    {
578
        int dcn = dstcn, bidx = blueIdx;
579
        const _Tp delta = ColorChannel<_Tp>::half(), alpha = ColorChannel<_Tp>::max();
580
        int C0 = coeffs[0], C1 = coeffs[1], C2 = coeffs[2], C3 = coeffs[3];
581
        n *= 3;
582
        for(int i = 0; i < n; i += 3, dst += dcn)
583
        {
584
            _Tp Y = src[i];
585
            _Tp Cr = src[i+1];
586
            _Tp Cb = src[i+2];
587
588
            int b = Y + CV_DESCALE((Cb - delta)*C3, yuv_shift);
589
            int g = Y + CV_DESCALE((Cb - delta)*C2 + (Cr - delta)*C1, yuv_shift);
590
            int r = Y + CV_DESCALE((Cr - delta)*C0, yuv_shift);
591
592
            dst[bidx] = saturate_cast<_Tp>(b);
593
            dst[1] = saturate_cast<_Tp>(g);
594
            dst[bidx^2] = saturate_cast<_Tp>(r);
595
            if( dcn == 4 )
596
                dst[3] = alpha;
597
        }
598
    }
599
    int dstcn, blueIdx;
600
    int coeffs[4];
601
};
602
603
604
////////////////////////////////////// RGB <-> XYZ ///////////////////////////////////////
605
606
static const float sRGB2XYZ_D65[] =
607
{
608
    0.412453f, 0.357580f, 0.180423f,
609
    0.212671f, 0.715160f, 0.072169f,
610
    0.019334f, 0.119193f, 0.950227f
611
};
612
613
static const float XYZ2sRGB_D65[] =
614
{
615
    3.240479f, -1.53715f, -0.498535f,
616
    -0.969256f, 1.875991f, 0.041556f,
617
    0.055648f, -0.204043f, 1.057311f
618
};
619
620
template<typename _Tp> struct RGB2XYZ_f
621
{
622
    typedef _Tp channel_type;
623
624
    RGB2XYZ_f(int _srccn, int blueIdx, const float* _coeffs) : srccn(_srccn)
625
    {
626
        memcpy(coeffs, _coeffs ? _coeffs : sRGB2XYZ_D65, 9*sizeof(coeffs[0]));
627
        if(blueIdx == 0)
628
        {
629
            std::swap(coeffs[0], coeffs[2]);
630
            std::swap(coeffs[3], coeffs[5]);
631
            std::swap(coeffs[6], coeffs[8]);
632
        }
633
    }
634
    void operator()(const _Tp* src, _Tp* dst, int n) const
635
    {
636
        int scn = srccn;
637
        float C0 = coeffs[0], C1 = coeffs[1], C2 = coeffs[2],
638
              C3 = coeffs[3], C4 = coeffs[4], C5 = coeffs[5],
639
              C6 = coeffs[6], C7 = coeffs[7], C8 = coeffs[8];
640
641
        n *= 3;
642
        for(int i = 0; i < n; i += 3, src += scn)
643
        {
644
            _Tp X = saturate_cast<_Tp>(src[0]*C0 + src[1]*C1 + src[2]*C2);
645
            _Tp Y = saturate_cast<_Tp>(src[0]*C3 + src[1]*C4 + src[2]*C5);
646
            _Tp Z = saturate_cast<_Tp>(src[0]*C6 + src[1]*C7 + src[2]*C8);
647
            dst[i] = X; dst[i+1] = Y; dst[i+2] = Z;
648
        }
649
    }
650
    int srccn;
651
    float coeffs[9];
652
};
653
654
655
template<typename _Tp> struct RGB2XYZ_i
656
{
657
    typedef _Tp channel_type;
658
659
    RGB2XYZ_i(int _srccn, int blueIdx, const float* _coeffs) : srccn(_srccn)
660
    {
661
        static const int coeffs0[] =
662
        {
663
            1689,    1465,    739,
664
            871,     2929,    296,
665
            79,      488,     3892
666
        };
667
        for( int i = 0; i < 9; i++ )
668
            coeffs[i] = _coeffs ? cvRound(_coeffs[i]*(1 << xyz_shift)) : coeffs0[i];
669
        if(blueIdx == 0)
670
        {
671
            std::swap(coeffs[0], coeffs[2]);
672
            std::swap(coeffs[3], coeffs[5]);
673
            std::swap(coeffs[6], coeffs[8]);
674
        }
675
    }
676
    void operator()(const _Tp* src, _Tp* dst, int n) const
677
    {
678
        int scn = srccn;
679
        int C0 = coeffs[0], C1 = coeffs[1], C2 = coeffs[2],
680
            C3 = coeffs[3], C4 = coeffs[4], C5 = coeffs[5],
681
            C6 = coeffs[6], C7 = coeffs[7], C8 = coeffs[8];
682
        n *= 3;
683
        for(int i = 0; i < n; i += 3, src += scn)
684
        {
685
            int X = CV_DESCALE(src[0]*C0 + src[1]*C1 + src[2]*C2, xyz_shift);
686
            int Y = CV_DESCALE(src[0]*C3 + src[1]*C4 + src[2]*C5, xyz_shift);
687
            int Z = CV_DESCALE(src[0]*C6 + src[1]*C7 + src[2]*C8, xyz_shift);
688
            dst[i] = saturate_cast<_Tp>(X); dst[i+1] = saturate_cast<_Tp>(Y);
689
            dst[i+2] = saturate_cast<_Tp>(Z);
690
        }
691
    }
692
    int srccn;
693
    int coeffs[9];
694
};
695
696
697
template<typename _Tp> struct XYZ2RGB_f
698
{
699
    typedef _Tp channel_type;
700
701
    XYZ2RGB_f(int _dstcn, int _blueIdx, const float* _coeffs)
702
    : dstcn(_dstcn), blueIdx(_blueIdx)
703
    {
704
        memcpy(coeffs, _coeffs ? _coeffs : XYZ2sRGB_D65, 9*sizeof(coeffs[0]));
705
        if(blueIdx == 0)
706
        {
707
            std::swap(coeffs[0], coeffs[6]);
708
            std::swap(coeffs[1], coeffs[7]);
709
            std::swap(coeffs[2], coeffs[8]);
710
        }
711
    }
712
713
    void operator()(const _Tp* src, _Tp* dst, int n) const
714
    {
715
        int dcn = dstcn;
716
        _Tp alpha = ColorChannel<_Tp>::max();
717
        float C0 = coeffs[0], C1 = coeffs[1], C2 = coeffs[2],
718
              C3 = coeffs[3], C4 = coeffs[4], C5 = coeffs[5],
719
              C6 = coeffs[6], C7 = coeffs[7], C8 = coeffs[8];
720
        n *= 3;
721
        for(int i = 0; i < n; i += 3, dst += dcn)
722
        {
723
            _Tp B = saturate_cast<_Tp>(src[i]*C0 + src[i+1]*C1 + src[i+2]*C2);
724
            _Tp G = saturate_cast<_Tp>(src[i]*C3 + src[i+1]*C4 + src[i+2]*C5);
725
            _Tp R = saturate_cast<_Tp>(src[i]*C6 + src[i+1]*C7 + src[i+2]*C8);
726
            dst[0] = B; dst[1] = G; dst[2] = R;
727
            if( dcn == 4 )
728
                dst[3] = alpha;
729
        }
730
    }
731
    int dstcn, blueIdx;
732
    float coeffs[9];
733
};
734
735
736
template<typename _Tp> struct XYZ2RGB_i
737
{
738
    typedef _Tp channel_type;
739
740
    XYZ2RGB_i(int _dstcn, int _blueIdx, const int* _coeffs)
741
    : dstcn(_dstcn), blueIdx(_blueIdx)
742
    {
743
        static const int coeffs0[] =
744
        {
745
            13273,  -6296,  -2042,
746
            -3970,   7684,    170,
747
              228,   -836,   4331
748
        };
749
        for(int i = 0; i < 9; i++)
750
            coeffs[i] = _coeffs ? cvRound(_coeffs[i]*(1 << xyz_shift)) : coeffs0[i];
751
752
        if(blueIdx == 0)
753
        {
754
            std::swap(coeffs[0], coeffs[6]);
755
            std::swap(coeffs[1], coeffs[7]);
756
            std::swap(coeffs[2], coeffs[8]);
757
        }
758
    }
759
    void operator()(const _Tp* src, _Tp* dst, int n) const
760
    {
761
        int dcn = dstcn;
762
        _Tp alpha = ColorChannel<_Tp>::max();
763
        int C0 = coeffs[0], C1 = coeffs[1], C2 = coeffs[2],
764
            C3 = coeffs[3], C4 = coeffs[4], C5 = coeffs[5],
765
            C6 = coeffs[6], C7 = coeffs[7], C8 = coeffs[8];
766
        n *= 3;
767
        for(int i = 0; i < n; i += 3, dst += dcn)
768
        {
769
            int B = CV_DESCALE(src[i]*C0 + src[i+1]*C1 + src[i+2]*C2, xyz_shift);
770
            int G = CV_DESCALE(src[i]*C3 + src[i+1]*C4 + src[i+2]*C5, xyz_shift);
771
            int R = CV_DESCALE(src[i]*C6 + src[i+1]*C7 + src[i+2]*C8, xyz_shift);
772
            dst[0] = saturate_cast<_Tp>(B); dst[1] = saturate_cast<_Tp>(G);
773
            dst[2] = saturate_cast<_Tp>(R);
774
            if( dcn == 4 )
775
                dst[3] = alpha;
776
        }
777
    }
778
    int dstcn, blueIdx;
779
    int coeffs[9];
780
};
781
782
783
////////////////////////////////////// RGB <-> HSV ///////////////////////////////////////
784
785
786
struct RGB2HSV_b
787
{
788
    typedef uchar channel_type;
789
790
    RGB2HSV_b(int _srccn, int _blueIdx, int _hrange)
791
    : srccn(_srccn), blueIdx(_blueIdx), hrange(_hrange)
792
    {
793
        CV_Assert( hrange == 180 || hrange == 256 );
794
    }
795
796
    void operator()(const uchar* src, uchar* dst, int n) const
797
    {
798
        int i, bidx = blueIdx, scn = srccn;
799
        const int hsv_shift = 12;
800
801
        static int sdiv_table[256];
802
        static int hdiv_table180[256];
803
        static int hdiv_table256[256];
804
        static volatile bool initialized = false;
805
806
        int hr = hrange;
807
        const int* hdiv_table = hr == 180 ? hdiv_table180 : hdiv_table256;
808
        n *= 3;
809
810
        if( !initialized )
811
        {
812
            sdiv_table[0] = hdiv_table180[0] = hdiv_table256[0] = 0;
813
            for( i = 1; i < 256; i++ )
814
            {
815
                sdiv_table[i] = saturate_cast<int>((255 << hsv_shift)/(1.*i));
816
                hdiv_table180[i] = saturate_cast<int>((180 << hsv_shift)/(6.*i));
817
                hdiv_table256[i] = saturate_cast<int>((256 << hsv_shift)/(6.*i));
818
            }
819
            initialized = true;
820
        }
821
822
        for( i = 0; i < n; i += 3, src += scn )
823
        {
824
            int b = src[bidx], g = src[1], r = src[bidx^2];
825
            int h, s, v = b;
826
            int vmin = b, diff;
827
            int vr, vg;
828
829
            CV_CALC_MAX_8U( v, g );
830
            CV_CALC_MAX_8U( v, r );
831
            CV_CALC_MIN_8U( vmin, g );
832
            CV_CALC_MIN_8U( vmin, r );
833
834
            diff = v - vmin;
835
            vr = v == r ? -1 : 0;
836
            vg = v == g ? -1 : 0;
837
838
            s = (diff * sdiv_table[v] + (1 << (hsv_shift-1))) >> hsv_shift;
839
            h = (vr & (g - b)) +
840
                (~vr & ((vg & (b - r + 2 * diff)) + ((~vg) & (r - g + 4 * diff))));
841
            h = (h * hdiv_table[diff] + (1 << (hsv_shift-1))) >> hsv_shift;
842
            h += h < 0 ? hr : 0;
843
844
            dst[i] = saturate_cast<uchar>(h);
845
            dst[i+1] = (uchar)s;
846
            dst[i+2] = (uchar)v;
847
        }
848
    }
849
850
    int srccn, blueIdx, hrange;
851
};
852
853
854
struct RGB2HSV_f
855
{
856
    typedef float channel_type;
857
858
    RGB2HSV_f(int _srccn, int _blueIdx, float _hrange)
859
    : srccn(_srccn), blueIdx(_blueIdx), hrange(_hrange) {}
860
861
    void operator()(const float* src, float* dst, int n) const
862
    {
863
        int i, bidx = blueIdx, scn = srccn;
864
        float hscale = hrange*(1.f/360.f);
865
        n *= 3;
866
867
        for( i = 0; i < n; i += 3, src += scn )
868
        {
869
            float b = src[bidx], g = src[1], r = src[bidx^2];
870
            float h, s, v;
871
872
            float vmin, diff;
873
874
            v = vmin = r;
875
            if( v < g ) v = g;
876
            if( v < b ) v = b;
877
            if( vmin > g ) vmin = g;
878
            if( vmin > b ) vmin = b;
879
880
            diff = v - vmin;
881
            s = diff/(float)(fabs(v) + FLT_EPSILON);
882
            diff = (float)(60./(diff + FLT_EPSILON));
883
            if( v == r )
884
                h = (g - b)*diff;
885
            else if( v == g )
886
                h = (b - r)*diff + 120.f;
887
            else
888
                h = (r - g)*diff + 240.f;
889
890
            if( h < 0 ) h += 360.f;
891
892
            dst[i] = h*hscale;
893
            dst[i+1] = s;
894
            dst[i+2] = v;
895
        }
896
    }
897
898
    int srccn, blueIdx;
899
    float hrange;
900
};
901
902
903
struct HSV2RGB_f
904
{
905
    typedef float channel_type;
906
907
    HSV2RGB_f(int _dstcn, int _blueIdx, float _hrange)
908
    : dstcn(_dstcn), blueIdx(_blueIdx), hscale(6.f/_hrange) {}
909
910
    void operator()(const float* src, float* dst, int n) const
911
    {
912
        int i, bidx = blueIdx, dcn = dstcn;
913
        float _hscale = hscale;
914
        float alpha = ColorChannel<float>::max();
915
        n *= 3;
916
917
        for( i = 0; i < n; i += 3, dst += dcn )
918
        {
919
            float h = src[i], s = src[i+1], v = src[i+2];
920
            float b, g, r;
921
922
            if( s == 0 )
923
                b = g = r = v;
924
            else
925
            {
926
                static const int sector_data[][3]=
927
                    {{1,3,0}, {1,0,2}, {3,0,1}, {0,2,1}, {0,1,3}, {2,1,0}};
928
                float tab[4];
929
                int sector;
930
                h *= _hscale;
931
                if( h < 0 )
932
                    do h += 6; while( h < 0 );
933
                else if( h >= 6 )
934
                    do h -= 6; while( h >= 6 );
935
                sector = cvFloor(h);
936
                h -= sector;
937
938
                tab[0] = v;
939
                tab[1] = v*(1.f - s);
940
                tab[2] = v*(1.f - s*h);
941
                tab[3] = v*(1.f - s*(1.f - h));
942
943
                b = tab[sector_data[sector][0]];
944
                g = tab[sector_data[sector][1]];
945
                r = tab[sector_data[sector][2]];
946
            }
947
948
            dst[bidx] = b;
949
            dst[1] = g;
950
            dst[bidx^2] = r;
951
            if( dcn == 4 )
952
                dst[3] = alpha;
953
        }
954
    }
955
956
    int dstcn, blueIdx;
957
    float hscale;
958
};
959
960
961
struct HSV2RGB_b
962
{
963
    typedef uchar channel_type;
964
965
    HSV2RGB_b(int _dstcn, int _blueIdx, int _hrange)
966
    : dstcn(_dstcn), cvt(3, _blueIdx, (float)_hrange)
967
    {}
968
969
    void operator()(const uchar* src, uchar* dst, int n) const
970
    {
971
        int i, j, dcn = dstcn;
972
        uchar alpha = ColorChannel<uchar>::max();
973
        float buf[3*BLOCK_SIZE];
974
975
        for( i = 0; i < n; i += BLOCK_SIZE, src += BLOCK_SIZE*3 )
976
        {
977
            int dn = std::min(n - i, (int)BLOCK_SIZE);
978
979
            for( j = 0; j < dn*3; j += 3 )
980
            {
981
                buf[j] = src[j];
982
                buf[j+1] = src[j+1]*(1.f/255.f);
983
                buf[j+2] = src[j+2]*(1.f/255.f);
984
            }
985
            cvt(buf, buf, dn);
986
987
            for( j = 0; j < dn*3; j += 3, dst += dcn )
988
            {
989
                dst[0] = saturate_cast<uchar>(buf[j]*255.f);
990
                dst[1] = saturate_cast<uchar>(buf[j+1]*255.f);
991
                dst[2] = saturate_cast<uchar>(buf[j+2]*255.f);
992
                if( dcn == 4 )
993
                    dst[3] = alpha;
994
            }
995
        }
996
    }
997
998
    int dstcn;
999
    HSV2RGB_f cvt;
1000
};
1001
1002
1003
///////////////////////////////////// RGB <-> HLS ////////////////////////////////////////
1004
1005
struct RGB2HLS_f
1006
{
1007
    typedef float channel_type;
1008
1009
    RGB2HLS_f(int _srccn, int _blueIdx, float _hrange)
1010
    : srccn(_srccn), blueIdx(_blueIdx), hrange(_hrange) {}
1011
1012
    void operator()(const float* src, float* dst, int n) const
1013
    {
1014
        int i, bidx = blueIdx, scn = srccn;
1015
        float hscale = hrange*(1.f/360.f);
1016
        n *= 3;
1017
1018
        for( i = 0; i < n; i += 3, src += scn )
1019
        {
1020
            float b = src[bidx], g = src[1], r = src[bidx^2];
1021
            float h = 0.f, s = 0.f, l;
1022
            float vmin, vmax, diff;
1023
1024
            vmax = vmin = r;
1025
            if( vmax < g ) vmax = g;
1026
            if( vmax < b ) vmax = b;
1027
            if( vmin > g ) vmin = g;
1028
            if( vmin > b ) vmin = b;
1029
1030
            diff = vmax - vmin;
1031
            l = (vmax + vmin)*0.5f;
1032
1033
            if( diff > FLT_EPSILON )
1034
            {
1035
                s = l < 0.5f ? diff/(vmax + vmin) : diff/(2 - vmax - vmin);
1036
                diff = 60.f/diff;
1037
1038
                if( vmax == r )
1039
                    h = (g - b)*diff;
1040
                else if( vmax == g )
1041
                    h = (b - r)*diff + 120.f;
1042
                else
1043
                    h = (r - g)*diff + 240.f;
1044
1045
                if( h < 0.f ) h += 360.f;
1046
            }
1047
1048
            dst[i] = h*hscale;
1049
            dst[i+1] = l;
1050
            dst[i+2] = s;
1051
        }
1052
    }
1053
1054
    int srccn, blueIdx;
1055
    float hrange;
1056
};
1057
1058
1059
struct RGB2HLS_b
1060
{
1061
    typedef uchar channel_type;
1062
1063
    RGB2HLS_b(int _srccn, int _blueIdx, int _hrange)
1064
    : srccn(_srccn), cvt(3, _blueIdx, (float)_hrange) {}
1065
1066
    void operator()(const uchar* src, uchar* dst, int n) const
1067
    {
1068
        int i, j, scn = srccn;
1069
        float buf[3*BLOCK_SIZE];
1070
1071
        for( i = 0; i < n; i += BLOCK_SIZE, dst += BLOCK_SIZE*3 )
1072
        {
1073
            int dn = std::min(n - i, (int)BLOCK_SIZE);
1074
1075
            for( j = 0; j < dn*3; j += 3, src += scn )
1076
            {
1077
                buf[j] = src[0]*(1.f/255.f);
1078
                buf[j+1] = src[1]*(1.f/255.f);
1079
                buf[j+2] = src[2]*(1.f/255.f);
1080
            }
1081
            cvt(buf, buf, dn);
1082
1083
            for( j = 0; j < dn*3; j += 3 )
1084
            {
1085
                dst[j] = saturate_cast<uchar>(buf[j]);
1086
                dst[j+1] = saturate_cast<uchar>(buf[j+1]*255.f);
1087
                dst[j+2] = saturate_cast<uchar>(buf[j+2]*255.f);
1088
            }
1089
        }
1090
    }
1091
1092
    int srccn;
1093
    RGB2HLS_f cvt;
1094
};
1095
1096
1097
struct HLS2RGB_f
1098
{
1099
    typedef float channel_type;
1100
1101
    HLS2RGB_f(int _dstcn, int _blueIdx, float _hrange)
1102
    : dstcn(_dstcn), blueIdx(_blueIdx), hscale(6.f/_hrange) {}
1103
1104
    void operator()(const float* src, float* dst, int n) const
1105
    {
1106
        int i, bidx = blueIdx, dcn = dstcn;
1107
        float _hscale = hscale;
1108
        float alpha = ColorChannel<float>::max();
1109
        n *= 3;
1110
1111
        for( i = 0; i < n; i += 3, dst += dcn )
1112
        {
1113
            float h = src[i], l = src[i+1], s = src[i+2];
1114
            float b, g, r;
1115
1116
            if( s == 0 )
1117
                b = g = r = l;
1118
            else
1119
            {
1120
                static const int sector_data[][3]=
1121
                {{1,3,0}, {1,0,2}, {3,0,1}, {0,2,1}, {0,1,3}, {2,1,0}};
1122
                float tab[4];
1123
                int sector;
1124
1125
                float p2 = l <= 0.5f ? l*(1 + s) : l + s - l*s;
1126
                float p1 = 2*l - p2;
1127
1128
                h *= _hscale;
1129
                if( h < 0 )
1130
                    do h += 6; while( h < 0 );
1131
                else if( h >= 6 )
1132
                    do h -= 6; while( h >= 6 );
1133
1134
                assert( 0 <= h && h < 6 );
1135
                sector = cvFloor(h);
1136
                h -= sector;
1137
1138
                tab[0] = p2;
1139
                tab[1] = p1;
1140
                tab[2] = p1 + (p2 - p1)*(1-h);
1141
                tab[3] = p1 + (p2 - p1)*h;
1142
1143
                b = tab[sector_data[sector][0]];
1144
                g = tab[sector_data[sector][1]];
1145
                r = tab[sector_data[sector][2]];
1146
            }
1147
1148
            dst[bidx] = b;
1149
            dst[1] = g;
1150
            dst[bidx^2] = r;
1151
            if( dcn == 4 )
1152
                dst[3] = alpha;
1153
        }
1154
    }
1155
1156
    int dstcn, blueIdx;
1157
    float hscale;
1158
};
1159
1160
1161
struct HLS2RGB_b
1162
{
1163
    typedef uchar channel_type;
1164
1165
    HLS2RGB_b(int _dstcn, int _blueIdx, int _hrange)
1166
    : dstcn(_dstcn), cvt(3, _blueIdx, (float)_hrange)
1167
    {}
1168
1169
    void operator()(const uchar* src, uchar* dst, int n) const
1170
    {
1171
        int i, j, dcn = dstcn;
1172
        uchar alpha = ColorChannel<uchar>::max();
1173
        float buf[3*BLOCK_SIZE];
1174
1175
        for( i = 0; i < n; i += BLOCK_SIZE, src += BLOCK_SIZE*3 )
1176
        {
1177
            int dn = std::min(n - i, (int)BLOCK_SIZE);
1178
1179
            for( j = 0; j < dn*3; j += 3 )
1180
            {
1181
                buf[j] = src[j];
1182
                buf[j+1] = src[j+1]*(1.f/255.f);
1183
                buf[j+2] = src[j+2]*(1.f/255.f);
1184
            }
1185
            cvt(buf, buf, dn);
1186
1187
            for( j = 0; j < dn*3; j += 3, dst += dcn )
1188
            {
1189
                dst[0] = saturate_cast<uchar>(buf[j]*255.f);
1190
                dst[1] = saturate_cast<uchar>(buf[j+1]*255.f);
1191
                dst[2] = saturate_cast<uchar>(buf[j+2]*255.f);
1192
                if( dcn == 4 )
1193
                    dst[3] = alpha;
1194
            }
1195
        }
1196
    }
1197
1198
    int dstcn;
1199
    HLS2RGB_f cvt;
1200
};
1201
1202
1203
///////////////////////////////////// RGB <-> L*a*b* /////////////////////////////////////
1204
1205
static const float D65[] = { 0.950456f, 1.f, 1.088754f };
1206
1207
enum { LAB_CBRT_TAB_SIZE = 1024, GAMMA_TAB_SIZE = 1024 };
1208
static float LabCbrtTab[LAB_CBRT_TAB_SIZE*4];
1209
static const float LabCbrtTabScale = LAB_CBRT_TAB_SIZE/1.5f;
1210
1211
static float sRGBGammaTab[GAMMA_TAB_SIZE*4], sRGBInvGammaTab[GAMMA_TAB_SIZE*4];
1212
static const float GammaTabScale = (float)GAMMA_TAB_SIZE;
1213
1214
static ushort sRGBGammaTab_b[256], linearGammaTab_b[256];
1215
#undef lab_shift
1216
#define lab_shift xyz_shift
1217
#define gamma_shift 3
1218
#define lab_shift2 (lab_shift + gamma_shift)
1219
#define LAB_CBRT_TAB_SIZE_B (256*3/2*(1<<gamma_shift))
1220
static ushort LabCbrtTab_b[LAB_CBRT_TAB_SIZE_B];
1221
1222
static void initLabTabs()
1223
{
1224
    static bool initialized = false;
1225
    if(!initialized)
1226
    {
1227
        float f[LAB_CBRT_TAB_SIZE+1], g[GAMMA_TAB_SIZE+1], ig[GAMMA_TAB_SIZE+1], scale = 1.f/LabCbrtTabScale;
1228
        int i;
1229
        for(i = 0; i <= LAB_CBRT_TAB_SIZE; i++)
1230
        {
1231
            float x = i*scale;
1232
            f[i] = x < 0.008856f ? x*7.787f + 0.13793103448275862f : cvCbrt(x);
1233
        }
1234
        splineBuild(f, LAB_CBRT_TAB_SIZE, LabCbrtTab);
1235
1236
        scale = 1.f/GammaTabScale;
1237
        for(i = 0; i <= GAMMA_TAB_SIZE; i++)
1238
        {
1239
            float x = i*scale;
1240
            g[i] = x <= 0.04045f ? x*(1.f/12.92f) : (float)pow((double)(x + 0.055)*(1./1.055), 2.4);
1241
            ig[i] = x <= 0.0031308 ? x*12.92f : (float)(1.055*pow((double)x, 1./2.4) - 0.055);
1242
        }
1243
        splineBuild(g, GAMMA_TAB_SIZE, sRGBGammaTab);
1244
        splineBuild(ig, GAMMA_TAB_SIZE, sRGBInvGammaTab);
1245
1246
        for(i = 0; i < 256; i++)
1247
        {
1248
            float x = i*(1.f/255.f);
1249
            sRGBGammaTab_b[i] = saturate_cast<ushort>(255.f*(1 << gamma_shift)*(x <= 0.04045f ? x*(1.f/12.92f) : (float)pow((double)(x + 0.055)*(1./1.055), 2.4)));
1250
            linearGammaTab_b[i] = (ushort)(i*(1 << gamma_shift));
1251
        }
1252
1253
        for(i = 0; i < LAB_CBRT_TAB_SIZE_B; i++)
1254
        {
1255
            float x = i*(1.f/(255.f*(1 << gamma_shift)));
1256
            LabCbrtTab_b[i] = saturate_cast<ushort>((1 << lab_shift2)*(x < 0.008856f ? x*7.787f + 0.13793103448275862f : cvCbrt(x)));
1257
        }
1258
        initialized = true;
1259
    }
1260
}
1261
1262
struct RGB2Lab_b
1263
{
1264
    typedef uchar channel_type;
1265
1266
    RGB2Lab_b(int _srccn, int blueIdx, const float* _coeffs,
1267
              const float* _whitept, bool _srgb)
1268
    : srccn(_srccn), srgb(_srgb)
1269
    {
1270
        static volatile int _3 = 3;
1271
        initLabTabs();
1272
1273
        if(!_coeffs) _coeffs = sRGB2XYZ_D65;
1274
        if(!_whitept) _whitept = D65;
1275
        float scale[] =
1276
        {
1277
            (1 << lab_shift)/_whitept[0],
1278
            (float)(1 << lab_shift),
1279
            (1 << lab_shift)/_whitept[2]
1280
        };
1281
1282
        for( int i = 0; i < _3; i++ )
1283
        {
1284
            coeffs[i*3+(blueIdx^2)] = cvRound(_coeffs[i*3]*scale[i]);
1285
            coeffs[i*3+1] = cvRound(_coeffs[i*3+1]*scale[i]);
1286
            coeffs[i*3+blueIdx] = cvRound(_coeffs[i*3+2]*scale[i]);
1287
1288
            CV_Assert( coeffs[i] >= 0 && coeffs[i*3+1] >= 0 && coeffs[i*3+2] >= 0 &&
1289
                      coeffs[i*3] + coeffs[i*3+1] + coeffs[i*3+2] < 2*(1 << lab_shift) );
1290
        }
1291
    }
1292
1293
    void operator()(const uchar* src, uchar* dst, int n) const
1294
    {
1295
        const int Lscale = (116*255+50)/100;
1296
        const int Lshift = -((16*255*(1 << lab_shift2) + 50)/100);
1297
        const ushort* tab = srgb ? sRGBGammaTab_b : linearGammaTab_b;
1298
        int i, scn = srccn;
1299
        int C0 = coeffs[0], C1 = coeffs[1], C2 = coeffs[2],
1300
            C3 = coeffs[3], C4 = coeffs[4], C5 = coeffs[5],
1301
            C6 = coeffs[6], C7 = coeffs[7], C8 = coeffs[8];
1302
        n *= 3;
1303
1304
        for( i = 0; i < n; i += 3, src += scn )
1305
        {
1306
            int R = tab[src[0]], G = tab[src[1]], B = tab[src[2]];
1307
            int fX = LabCbrtTab_b[CV_DESCALE(R*C0 + G*C1 + B*C2, lab_shift)];
1308
            int fY = LabCbrtTab_b[CV_DESCALE(R*C3 + G*C4 + B*C5, lab_shift)];
1309
            int fZ = LabCbrtTab_b[CV_DESCALE(R*C6 + G*C7 + B*C8, lab_shift)];
1310
1311
            int L = CV_DESCALE( Lscale*fY + Lshift, lab_shift2 );
1312
            int a = CV_DESCALE( 500*(fX - fY) + 128*(1 << lab_shift2), lab_shift2 );
1313
            int b = CV_DESCALE( 200*(fY - fZ) + 128*(1 << lab_shift2), lab_shift2 );
1314
1315
            dst[i] = saturate_cast<uchar>(L);
1316
            dst[i+1] = saturate_cast<uchar>(a);
1317
            dst[i+2] = saturate_cast<uchar>(b);
1318
        }
1319
    }
1320
1321
    int srccn;
1322
    int coeffs[9];
1323
    bool srgb;
1324
};
1325
1326
1327
struct RGB2Lab_f
1328
{
1329
    typedef float channel_type;
1330
1331
    RGB2Lab_f(int _srccn, int blueIdx, const float* _coeffs,
1332
              const float* _whitept, bool _srgb)
1333
    : srccn(_srccn), srgb(_srgb)
1334
    {
1335
        volatile int _3 = 3;
1336
        initLabTabs();
1337
1338
        if(!_coeffs) _coeffs = sRGB2XYZ_D65;
1339
        if(!_whitept) _whitept = D65;
1340
        float scale[] = { LabCbrtTabScale/_whitept[0], LabCbrtTabScale, LabCbrtTabScale/_whitept[2] };
1341
1342
        for( int i = 0; i < _3; i++ )
1343
        {
1344
            coeffs[i*3+(blueIdx^2)] = _coeffs[i*3]*scale[i];
1345
            coeffs[i*3+1] = _coeffs[i*3+1]*scale[i];
1346
            coeffs[i*3+blueIdx] = _coeffs[i*3+2]*scale[i];
1347
            CV_Assert( coeffs[i*3] >= 0 && coeffs[i*3+1] >= 0 && coeffs[i*3+2] >= 0 &&
1348
                       coeffs[i*3] + coeffs[i*3+1] + coeffs[i*3+2] < 1.5f*LabCbrtTabScale );
1349
        }
1350
    }
1351
1352
    void operator()(const float* src, float* dst, int n) const
1353
    {
1354
        int i, scn = srccn;
1355
        float gscale = GammaTabScale;
1356
        const float* gammaTab = srgb ? sRGBGammaTab : 0;
1357
        float C0 = coeffs[0], C1 = coeffs[1], C2 = coeffs[2],
1358
              C3 = coeffs[3], C4 = coeffs[4], C5 = coeffs[5],
1359
              C6 = coeffs[6], C7 = coeffs[7], C8 = coeffs[8];
1360
        n *= 3;
1361
1362
        for( i = 0; i < n; i += 3, src += scn )
1363
        {
1364
            float R = src[0], G = src[1], B = src[2];
1365
            if( gammaTab )
1366
            {
1367
                R = splineInterpolate(R*gscale, gammaTab, GAMMA_TAB_SIZE);
1368
                G = splineInterpolate(G*gscale, gammaTab, GAMMA_TAB_SIZE);
1369
                B = splineInterpolate(B*gscale, gammaTab, GAMMA_TAB_SIZE);
1370
            }
1371
            float fX = splineInterpolate(R*C0 + G*C1 + B*C2, LabCbrtTab, LAB_CBRT_TAB_SIZE);
1372
            float fY = splineInterpolate(R*C3 + G*C4 + B*C5, LabCbrtTab, LAB_CBRT_TAB_SIZE);
1373
            float fZ = splineInterpolate(R*C6 + G*C7 + B*C8, LabCbrtTab, LAB_CBRT_TAB_SIZE);
1374
1375
            float L = 116.f*fY - 16.f;
1376
            float a = 500.f*(fX - fY);
1377
            float b = 200.f*(fY - fZ);
1378
1379
            dst[i] = L; dst[i+1] = a; dst[i+2] = b;
1380
        }
1381
    }
1382
1383
    int srccn;
1384
    float coeffs[9];
1385
    bool srgb;
1386
};
1387
1388
1389
struct Lab2RGB_f
1390
{
1391
    typedef float channel_type;
1392
1393
    Lab2RGB_f( int _dstcn, int blueIdx, const float* _coeffs,
1394
               const float* _whitept, bool _srgb )
1395
    : dstcn(_dstcn), srgb(_srgb)
1396
    {
1397
        initLabTabs();
1398
1399
        if(!_coeffs) _coeffs = XYZ2sRGB_D65;
1400
        if(!_whitept) _whitept = D65;
1401
1402
        for( int i = 0; i < 3; i++ )
1403
        {
1404
            coeffs[i+(blueIdx^2)*3] = _coeffs[i]*_whitept[i];
1405
            coeffs[i+3] = _coeffs[i+3]*_whitept[i];
1406
            coeffs[i+blueIdx*3] = _coeffs[i+6]*_whitept[i];
1407
        }
1408
    }
1409
1410
    void operator()(const float* src, float* dst, int n) const
1411
    {
1412
        int i, dcn = dstcn;
1413
        const float* gammaTab = srgb ? sRGBInvGammaTab : 0;
1414
        float gscale = GammaTabScale;
1415
        float C0 = coeffs[0], C1 = coeffs[1], C2 = coeffs[2],
1416
              C3 = coeffs[3], C4 = coeffs[4], C5 = coeffs[5],
1417
              C6 = coeffs[6], C7 = coeffs[7], C8 = coeffs[8];
1418
        float alpha = ColorChannel<float>::max();
1419
        n *= 3;
1420
1421
        for( i = 0; i < n; i += 3, dst += dcn )
1422
        {
1423
            float L = src[i], a = src[i+1], b = src[i+2];
1424
            float Y = (L + 16.f)*(1.f/116.f);
1425
            float X = (Y + a*0.002f);
1426
            float Z = (Y - b*0.005f);
1427
            Y = Y*Y*Y;
1428
            X = X*X*X;
1429
            Z = Z*Z*Z;
1430
1431
            float R = X*C0 + Y*C1 + Z*C2;
1432
            float G = X*C3 + Y*C4 + Z*C5;
1433
            float B = X*C6 + Y*C7 + Z*C8;
1434
1435
            if( gammaTab )
1436
            {
1437
                R = splineInterpolate(R*gscale, gammaTab, GAMMA_TAB_SIZE);
1438
                G = splineInterpolate(G*gscale, gammaTab, GAMMA_TAB_SIZE);
1439
                B = splineInterpolate(B*gscale, gammaTab, GAMMA_TAB_SIZE);
1440
            }
1441
1442
            dst[0] = R; dst[1] = G; dst[2] = B;
1443
            if( dcn == 4 )
1444
                dst[3] = alpha;
1445
        }
1446
    }
1447
1448
    int dstcn;
1449
    float coeffs[9];
1450
    bool srgb;
1451
};
1452
1453
1454
struct Lab2RGB_b
1455
{
1456
    typedef uchar channel_type;
1457
1458
    Lab2RGB_b( int _dstcn, int blueIdx, const float* _coeffs,
1459
               const float* _whitept, bool _srgb )
1460
    : dstcn(_dstcn), cvt(3, blueIdx, _coeffs, _whitept, _srgb ) {}
1461
1462
    void operator()(const uchar* src, uchar* dst, int n) const
1463
    {
1464
        int i, j, dcn = dstcn;
1465
        uchar alpha = ColorChannel<uchar>::max();
1466
        float buf[3*BLOCK_SIZE];
1467
1468
        for( i = 0; i < n; i += BLOCK_SIZE, src += BLOCK_SIZE*3 )
1469
        {
1470
            int dn = std::min(n - i, (int)BLOCK_SIZE);
1471
1472
            for( j = 0; j < dn*3; j += 3 )
1473
            {
1474
                buf[j] = src[j]*(100.f/255.f);
1475
                buf[j+1] = (float)(src[j+1] - 128);
1476
                buf[j+2] = (float)(src[j+2] - 128);
1477
            }
1478
            cvt(buf, buf, dn);
1479
1480
            for( j = 0; j < dn*3; j += 3, dst += dcn )
1481
            {
1482
                dst[0] = saturate_cast<uchar>(buf[j]*255.f);
1483
                dst[1] = saturate_cast<uchar>(buf[j+1]*255.f);
1484
                dst[2] = saturate_cast<uchar>(buf[j+2]*255.f);
1485
                if( dcn == 4 )
1486
                    dst[3] = alpha;
1487
            }
1488
        }
1489
    }
1490
1491
    int dstcn;
1492
    Lab2RGB_f cvt;
1493
};
1494
1495
1496
///////////////////////////////////// RGB <-> L*u*v* /////////////////////////////////////
1497
1498
struct RGB2Luv_f
1499
{
1500
    typedef float channel_type;
1501
1502
    RGB2Luv_f( int _srccn, int blueIdx, const float* _coeffs,
1503
               const float* whitept, bool _srgb )
1504
    : srccn(_srccn), srgb(_srgb)
1505
    {
1506
        volatile int i;
1507
        initLabTabs();
1508
1509
        if(!_coeffs) _coeffs = sRGB2XYZ_D65;
1510
        if(!whitept) whitept = D65;
1511
1512
        for( i = 0; i < 3; i++ )
1513
        {
1514
            coeffs[i*3] = _coeffs[i*3];
1515
            coeffs[i*3+1] = _coeffs[i*3+1];
1516
            coeffs[i*3+2] = _coeffs[i*3+2];
1517
            if( blueIdx == 0 )
1518
                std::swap(coeffs[i*3], coeffs[i*3+2]);
1519
            CV_Assert( coeffs[i*3] >= 0 && coeffs[i*3+1] >= 0 && coeffs[i*3+2] >= 0 &&
1520
                      coeffs[i*3] + coeffs[i*3+1] + coeffs[i*3+2] < 1.5f );
1521
        }
1522
1523
        float d = 1.f/(whitept[0] + whitept[1]*15 + whitept[2]*3);
1524
        un = 4*whitept[0]*d;
1525
        vn = 9*whitept[1]*d;
1526
1527
        CV_Assert(whitept[1] == 1.f);
1528
    }
1529
1530
    void operator()(const float* src, float* dst, int n) const
1531
    {
1532
        int i, scn = srccn;
1533
        float gscale = GammaTabScale;
1534
        const float* gammaTab = srgb ? sRGBGammaTab : 0;
1535
        float C0 = coeffs[0], C1 = coeffs[1], C2 = coeffs[2],
1536
              C3 = coeffs[3], C4 = coeffs[4], C5 = coeffs[5],
1537
              C6 = coeffs[6], C7 = coeffs[7], C8 = coeffs[8];
1538
        float _un = 13*un, _vn = 13*vn;
1539
        n *= 3;
1540
1541
        for( i = 0; i < n; i += 3, src += scn )
1542
        {
1543
            float R = src[0], G = src[1], B = src[2];
1544
            if( gammaTab )
1545
            {
1546
                R = splineInterpolate(R*gscale, gammaTab, GAMMA_TAB_SIZE);
1547
                G = splineInterpolate(G*gscale, gammaTab, GAMMA_TAB_SIZE);
1548
                B = splineInterpolate(B*gscale, gammaTab, GAMMA_TAB_SIZE);
1549
            }
1550
1551
            float X = R*C0 + G*C1 + B*C2;
1552
            float Y = R*C3 + G*C4 + B*C5;
1553
            float Z = R*C6 + G*C7 + B*C8;
1554
1555
            float L = splineInterpolate(Y*LabCbrtTabScale, LabCbrtTab, LAB_CBRT_TAB_SIZE);
1556
            L = 116.f*L - 16.f;
1557
1558
            float d = (4*13) / std::max(X + 15 * Y + 3 * Z, FLT_EPSILON);
1559
            float u = L*(X*d - _un);
1560
            float v = L*((9*0.25f)*Y*d - _vn);
1561
1562
            dst[i] = L; dst[i+1] = u; dst[i+2] = v;
1563
        }
1564
    }
1565
1566
    int srccn;
1567
    float coeffs[9], un, vn;
1568
    bool srgb;
1569
};
1570
1571
1572
struct Luv2RGB_f
1573
{
1574
    typedef float channel_type;
1575
1576
    Luv2RGB_f( int _dstcn, int blueIdx, const float* _coeffs,
1577
              const float* whitept, bool _srgb )
1578
    : dstcn(_dstcn), srgb(_srgb)
1579
    {
1580
        initLabTabs();
1581
1582
        if(!_coeffs) _coeffs = XYZ2sRGB_D65;
1583
        if(!whitept) whitept = D65;
1584
1585
        for( int i = 0; i < 3; i++ )
1586
        {
1587
            coeffs[i+(blueIdx^2)*3] = _coeffs[i];
1588
            coeffs[i+3] = _coeffs[i+3];
1589
            coeffs[i+blueIdx*3] = _coeffs[i+6];
1590
        }
1591
1592
        float d = 1.f/(whitept[0] + whitept[1]*15 + whitept[2]*3);
1593
        un = 4*whitept[0]*d;
1594
        vn = 9*whitept[1]*d;
1595
1596
        CV_Assert(whitept[1] == 1.f);
1597
    }
1598
1599
    void operator()(const float* src, float* dst, int n) const
1600
    {
1601
        int i, dcn = dstcn;
1602
        const float* gammaTab = srgb ? sRGBInvGammaTab : 0;
1603
        float gscale = GammaTabScale;
1604
        float C0 = coeffs[0], C1 = coeffs[1], C2 = coeffs[2],
1605
              C3 = coeffs[3], C4 = coeffs[4], C5 = coeffs[5],
1606
              C6 = coeffs[6], C7 = coeffs[7], C8 = coeffs[8];
1607
        float alpha = ColorChannel<float>::max();
1608
        float _un = un, _vn = vn;
1609
        n *= 3;
1610
1611
        for( i = 0; i < n; i += 3, dst += dcn )
1612
        {
1613
            float L = src[i], u = src[i+1], v = src[i+2], d, X, Y, Z;
1614
            Y = (L + 16.f) * (1.f/116.f);
1615
            Y = Y*Y*Y;
1616
            d = (1.f/13.f)/L;
1617
            u = u*d + _un;
1618
            v = v*d + _vn;
1619
            float iv = 1.f/v;
1620
            X = 2.25f * u * Y * iv ;
1621
            Z = (12 - 3 * u - 20 * v) * Y * 0.25f * iv;
1622
1623
            float R = X*C0 + Y*C1 + Z*C2;
1624
            float G = X*C3 + Y*C4 + Z*C5;
1625
            float B = X*C6 + Y*C7 + Z*C8;
1626
1627
            if( gammaTab )
1628
            {
1629
                R = splineInterpolate(R*gscale, gammaTab, GAMMA_TAB_SIZE);
1630
                G = splineInterpolate(G*gscale, gammaTab, GAMMA_TAB_SIZE);
1631
                B = splineInterpolate(B*gscale, gammaTab, GAMMA_TAB_SIZE);
1632
            }
1633
1634
            dst[0] = R; dst[1] = G; dst[2] = B;
1635
            if( dcn == 4 )
1636
                dst[3] = alpha;
1637
        }
1638
    }
1639
1640
    int dstcn;
1641
    float coeffs[9], un, vn;
1642
    bool srgb;
1643
};
1644
1645
1646
struct RGB2Luv_b
1647
{
1648
    typedef uchar channel_type;
1649
1650
    RGB2Luv_b( int _srccn, int blueIdx, const float* _coeffs,
1651
               const float* _whitept, bool _srgb )
1652
    : srccn(_srccn), cvt(3, blueIdx, _coeffs, _whitept, _srgb) {}
1653
1654
    void operator()(const uchar* src, uchar* dst, int n) const
1655
    {
1656
        int i, j, scn = srccn;
1657
        float buf[3*BLOCK_SIZE];
1658
1659
        for( i = 0; i < n; i += BLOCK_SIZE, dst += BLOCK_SIZE*3 )
1660
        {
1661
            int dn = std::min(n - i, (int)BLOCK_SIZE);
1662
1663
            for( j = 0; j < dn*3; j += 3, src += scn )
1664
            {
1665
                buf[j] = src[0]*(1.f/255.f);
1666
                buf[j+1] = (float)(src[1]*(1.f/255.f));
1667
                buf[j+2] = (float)(src[2]*(1.f/255.f));
1668
            }
1669
            cvt(buf, buf, dn);
1670
1671
            for( j = 0; j < dn*3; j += 3 )
1672
            {
1673
                dst[j] = saturate_cast<uchar>(buf[j]*2.55f);
1674
                dst[j+1] = saturate_cast<uchar>(buf[j+1]*0.72033898305084743f + 96.525423728813564f);
1675
                dst[j+2] = saturate_cast<uchar>(buf[j+2]*0.99609375f + 139.453125f);
1676
            }
1677
        }
1678
    }
1679
1680
    int srccn;
1681
    RGB2Luv_f cvt;
1682
};
1683
1684
1685
struct Luv2RGB_b
1686
{
1687
    typedef uchar channel_type;
1688
1689
    Luv2RGB_b( int _dstcn, int blueIdx, const float* _coeffs,
1690
               const float* _whitept, bool _srgb )
1691
    : dstcn(_dstcn), cvt(3, blueIdx, _coeffs, _whitept, _srgb ) {}
1692
1693
    void operator()(const uchar* src, uchar* dst, int n) const
1694
    {
1695
        int i, j, dcn = dstcn;
1696
        uchar alpha = ColorChannel<uchar>::max();
1697
        float buf[3*BLOCK_SIZE];
1698
1699
        for( i = 0; i < n; i += BLOCK_SIZE, src += BLOCK_SIZE*3 )
1700
        {
1701
            int dn = std::min(n - i, (int)BLOCK_SIZE);
1702
1703
            for( j = 0; j < dn*3; j += 3 )
1704
            {
1705
                buf[j] = src[j]*(100.f/255.f);
1706
                buf[j+1] = (float)(src[j+1]*1.388235294117647f - 134.f);
1707
                buf[j+2] = (float)(src[j+2]*1.003921568627451f - 140.f);
1708
            }
1709
            cvt(buf, buf, dn);
1710
1711
            for( j = 0; j < dn*3; j += 3, dst += dcn )
1712
            {
1713
                dst[0] = saturate_cast<uchar>(buf[j]*255.f);
1714
                dst[1] = saturate_cast<uchar>(buf[j+1]*255.f);
1715
                dst[2] = saturate_cast<uchar>(buf[j+2]*255.f);
1716
                if( dcn == 4 )
1717
                    dst[3] = alpha;
1718
            }
1719
        }
1720
    }
1721
1722
    int dstcn;
1723
    Luv2RGB_f cvt;
1724
};
1725
1726
1727
//////////////////////////// Bayer Pattern -> RGB conversion /////////////////////////////
1728
1729
template<typename T>
1730
class SIMDBayerStubInterpolator_
1731
{
1732
public:
1733
    int bayer2Gray(const T*, int, T*, int, int, int, int) const
1734
    {
1735
        return 0;
1736
    }
1737
1738
    int bayer2RGB(const T*, int, T*, int, int) const
1739
    {
1740
        return 0;
1741
    }
1742
};
1743
1744
#if CV_SSE2
1745
class SIMDBayerInterpolator_8u
1746
{
1747
public:
1748
    SIMDBayerInterpolator_8u()
1749
    {
1750
        use_simd = checkHardwareSupport(CV_CPU_SSE2);
1751
    }
1752
1753
    int bayer2Gray(const uchar* bayer, int bayer_step, uchar* dst,
1754
                   int width, int bcoeff, int gcoeff, int rcoeff) const
1755
    {
1756
        if( !use_simd )
1757
            return 0;
1758
1759
        __m128i _b2y = _mm_set1_epi16((short)(rcoeff*2));
1760
        __m128i _g2y = _mm_set1_epi16((short)(gcoeff*2));
1761
        __m128i _r2y = _mm_set1_epi16((short)(bcoeff*2));
1762
        const uchar* bayer_end = bayer + width;
1763
1764
        for( ; bayer <= bayer_end - 18; bayer += 14, dst += 14 )
1765
        {
1766
            __m128i r0 = _mm_loadu_si128((const __m128i*)bayer);
1767
            __m128i r1 = _mm_loadu_si128((const __m128i*)(bayer+bayer_step));
1768
            __m128i r2 = _mm_loadu_si128((const __m128i*)(bayer+bayer_step*2));
1769
1770
            __m128i b1 = _mm_add_epi16(_mm_srli_epi16(_mm_slli_epi16(r0, 8), 7),
1771
                                       _mm_srli_epi16(_mm_slli_epi16(r2, 8), 7));
1772
            __m128i b0 = _mm_add_epi16(b1, _mm_srli_si128(b1, 2));
1773
            b1 = _mm_slli_epi16(_mm_srli_si128(b1, 2), 1);
1774
1775
            __m128i g0 = _mm_add_epi16(_mm_srli_epi16(r0, 7), _mm_srli_epi16(r2, 7));
1776
            __m128i g1 = _mm_srli_epi16(_mm_slli_epi16(r1, 8), 7);
1777
            g0 = _mm_add_epi16(g0, _mm_add_epi16(g1, _mm_srli_si128(g1, 2)));
1778
            g1 = _mm_slli_epi16(_mm_srli_si128(g1, 2), 2);
1779
1780
            r0 = _mm_srli_epi16(r1, 8);
1781
            r1 = _mm_slli_epi16(_mm_add_epi16(r0, _mm_srli_si128(r0, 2)), 2);
1782
            r0 = _mm_slli_epi16(r0, 3);
1783
1784
            g0 = _mm_add_epi16(_mm_mulhi_epi16(b0, _b2y), _mm_mulhi_epi16(g0, _g2y));
1785
            g1 = _mm_add_epi16(_mm_mulhi_epi16(b1, _b2y), _mm_mulhi_epi16(g1, _g2y));
1786
            g0 = _mm_add_epi16(g0, _mm_mulhi_epi16(r0, _r2y));
1787
            g1 = _mm_add_epi16(g1, _mm_mulhi_epi16(r1, _r2y));
1788
            g0 = _mm_srli_epi16(g0, 2);
1789
            g1 = _mm_srli_epi16(g1, 2);
1790
            g0 = _mm_packus_epi16(g0, g0);
1791
            g1 = _mm_packus_epi16(g1, g1);
1792
            g0 = _mm_unpacklo_epi8(g0, g1);
1793
            _mm_storeu_si128((__m128i*)dst, g0);
1794
        }
1795
1796
        return (int)(bayer - (bayer_end - width));
1797
    }
1798
1799
    int bayer2RGB(const uchar* bayer, int bayer_step, uchar* dst, int width, int blue) const
1800
    {
1801
        if( !use_simd )
1802
            return 0;
1803
        /*
1804
         B G B G | B G B G | B G B G | B G B G
1805
         G R G R | G R G R | G R G R | G R G R
1806
         B G B G | B G B G | B G B G | B G B G
1807
         */
1808
        __m128i delta1 = _mm_set1_epi16(1), delta2 = _mm_set1_epi16(2);
1809
        __m128i mask = _mm_set1_epi16(blue < 0 ? -1 : 0), z = _mm_setzero_si128();
1810
        __m128i masklo = _mm_set1_epi16(0x00ff);
1811
        const uchar* bayer_end = bayer + width;
1812
1813
        for( ; bayer <= bayer_end - 18; bayer += 14, dst += 42 )
1814
        {
1815
            __m128i r0 = _mm_loadu_si128((const __m128i*)bayer);
1816
            __m128i r1 = _mm_loadu_si128((const __m128i*)(bayer+bayer_step));
1817
            __m128i r2 = _mm_loadu_si128((const __m128i*)(bayer+bayer_step*2));
1818
1819
            __m128i b1 = _mm_add_epi16(_mm_and_si128(r0, masklo), _mm_and_si128(r2, masklo));
1820
            __m128i b0 = _mm_add_epi16(b1, _mm_srli_si128(b1, 2));
1821
            b1 = _mm_srli_si128(b1, 2);
1822
            b1 = _mm_srli_epi16(_mm_add_epi16(b1, delta1), 1);
1823
            b0 = _mm_srli_epi16(_mm_add_epi16(b0, delta2), 2);
1824
            b0 = _mm_packus_epi16(b0, b1);
1825
1826
            __m128i g0 = _mm_add_epi16(_mm_srli_epi16(r0, 8), _mm_srli_epi16(r2, 8));
1827
            __m128i g1 = _mm_and_si128(r1, masklo);
1828
            g0 = _mm_add_epi16(g0, _mm_add_epi16(g1, _mm_srli_si128(g1, 2)));
1829
            g1 = _mm_srli_si128(g1, 2);
1830
            g0 = _mm_srli_epi16(_mm_add_epi16(g0, delta2), 2);
1831
            g0 = _mm_packus_epi16(g0, g1);
1832
1833
            r0 = _mm_srli_epi16(r1, 8);
1834
            r1 = _mm_add_epi16(r0, _mm_srli_si128(r0, 2));
1835
            r1 = _mm_srli_epi16(_mm_add_epi16(r1, delta1), 1);
1836
            r0 = _mm_packus_epi16(r0, r1);
1837
1838
            b1 = _mm_and_si128(_mm_xor_si128(b0, r0), mask);
1839
            b0 = _mm_xor_si128(b0, b1);
1840
            r0 = _mm_xor_si128(r0, b1);
1841
1842
            // b1 g1 b1 g1 ...
1843
            b1 = _mm_unpackhi_epi8(b0, g0);
1844
            // b0 g0 b2 g2 b4 g4 ....
1845
            b0 = _mm_unpacklo_epi8(b0, g0);
1846
1847
            // r1 0 r3 0 ...
1848
            r1 = _mm_unpackhi_epi8(r0, z);
1849
            // r0 0 r2 0 r4 0 ...
1850
            r0 = _mm_unpacklo_epi8(r0, z);
1851
1852
            // 0 b0 g0 r0 0 b2 g2 r2 0 ...
1853
            g0 = _mm_slli_si128(_mm_unpacklo_epi16(b0, r0), 1);
1854
            // 0 b8 g8 r8 0 b10 g10 r10 0 ...
1855
            g1 = _mm_slli_si128(_mm_unpackhi_epi16(b0, r0), 1);
1856
1857
            // b1 g1 r1 0 b3 g3 r3 ....
1858
            r0 = _mm_unpacklo_epi16(b1, r1);
1859
            // b9 g9 r9 0 ...
1860
            r1 = _mm_unpackhi_epi16(b1, r1);
1861
1862
            b0 = _mm_srli_si128(_mm_unpacklo_epi32(g0, r0), 1);
1863
            b1 = _mm_srli_si128(_mm_unpackhi_epi32(g0, r0), 1);
1864
1865
            _mm_storel_epi64((__m128i*)(dst-1+0), b0);
1866
            _mm_storel_epi64((__m128i*)(dst-1+6*1), _mm_srli_si128(b0, 8));
1867
            _mm_storel_epi64((__m128i*)(dst-1+6*2), b1);
1868
            _mm_storel_epi64((__m128i*)(dst-1+6*3), _mm_srli_si128(b1, 8));
1869
1870
            g0 = _mm_srli_si128(_mm_unpacklo_epi32(g1, r1), 1);
1871
            g1 = _mm_srli_si128(_mm_unpackhi_epi32(g1, r1), 1);
1872
1873
            _mm_storel_epi64((__m128i*)(dst-1+6*4), g0);
1874
            _mm_storel_epi64((__m128i*)(dst-1+6*5), _mm_srli_si128(g0, 8));
1875
1876
            _mm_storel_epi64((__m128i*)(dst-1+6*6), g1);
1877
        }
1878
1879
        return (int)(bayer - (bayer_end - width));
1880
    }
1881
1882
    bool use_simd;
1883
};
1884
#else
1885
typedef SIMDBayerStubInterpolator_<uchar> SIMDBayerInterpolator_8u;
1886
#endif
1887
1888
template<typename T, class SIMDInterpolator>
1889
static void Bayer2Gray_( const Mat& srcmat, Mat& dstmat, int code )
1890
{
1891
    SIMDInterpolator vecOp;
1892
    const int R2Y = 4899;
1893
    const int G2Y = 9617;
1894
    const int B2Y = 1868;
1895
    const int SHIFT = 14;
1896
1897
    const T* bayer0 = (const T*)srcmat.data;
1898
    int bayer_step = (int)(srcmat.step/sizeof(T));
1899
    T* dst0 = (T*)dstmat.data;
1900
    int dst_step = (int)(dstmat.step/sizeof(T));
1901
    Size size = srcmat.size();
1902
    int bcoeff = B2Y, rcoeff = R2Y;
1903
    int start_with_green = code == CV_BayerGB2GRAY || code == CV_BayerGR2GRAY;
1904
    bool brow = true;
1905
1906
    if( code != CV_BayerBG2GRAY && code != CV_BayerGB2GRAY )
1907
    {
1908
        brow = false;
1909
        std::swap(bcoeff, rcoeff);
1910
    }
1911
1912
    dst0 += dst_step + 1;
1913
    size.height -= 2;
1914
    size.width -= 2;
1915
1916
    for( ; size.height-- > 0; bayer0 += bayer_step, dst0 += dst_step )
1917
    {
1918
        unsigned t0, t1, t2;
1919
        const T* bayer = bayer0;
1920
        T* dst = dst0;
1921
        const T* bayer_end = bayer + size.width;
1922
1923
        if( size.width <= 0 )
1924
        {
1925
            dst[-1] = dst[size.width] = 0;
1926
            continue;
1927
        }
1928
1929
        if( start_with_green )
1930
        {
1931
            t0 = (bayer[1] + bayer[bayer_step*2+1])*rcoeff;
1932
            t1 = (bayer[bayer_step] + bayer[bayer_step+2])*bcoeff;
1933
            t2 = bayer[bayer_step+1]*(2*G2Y);
1934
1935
            dst[0] = (T)CV_DESCALE(t0 + t1 + t2, SHIFT+1);
1936
            bayer++;
1937
            dst++;
1938
        }
1939
1940
        int delta = vecOp.bayer2Gray(bayer, bayer_step, dst, size.width, bcoeff, G2Y, rcoeff);
1941
        bayer += delta;
1942
        dst += delta;
1943
1944
        for( ; bayer <= bayer_end - 2; bayer += 2, dst += 2 )
1945
        {
1946
            t0 = (bayer[0] + bayer[2] + bayer[bayer_step*2] + bayer[bayer_step*2+2])*rcoeff;
1947
            t1 = (bayer[1] + bayer[bayer_step] + bayer[bayer_step+2] + bayer[bayer_step*2+1])*G2Y;
1948
            t2 = bayer[bayer_step+1]*(4*bcoeff);
1949
            dst[0] = (T)CV_DESCALE(t0 + t1 + t2, SHIFT+2);
1950
1951
            t0 = (bayer[2] + bayer[bayer_step*2+2])*rcoeff;
1952
            t1 = (bayer[bayer_step+1] + bayer[bayer_step+3])*bcoeff;
1953
            t2 = bayer[bayer_step+2]*(2*G2Y);
1954
            dst[1] = (T)CV_DESCALE(t0 + t1 + t2, SHIFT+1);
1955
        }
1956
1957
        if( bayer < bayer_end )
1958
        {
1959
            t0 = (bayer[0] + bayer[2] + bayer[bayer_step*2] + bayer[bayer_step*2+2])*rcoeff;
1960
            t1 = (bayer[1] + bayer[bayer_step] + bayer[bayer_step+2] + bayer[bayer_step*2+1])*G2Y;
1961
            t2 = bayer[bayer_step+1]*(4*bcoeff);
1962
            dst[0] = (T)CV_DESCALE(t0 + t1 + t2, SHIFT+2);
1963
            bayer++;
1964
            dst++;
1965
        }
1966
1967
        dst0[-1] = dst0[0];
1968
        dst0[size.width] = dst0[size.width-1];
1969
1970
        brow = !brow;
1971
        std::swap(bcoeff, rcoeff);
1972
        start_with_green = !start_with_green;
1973
    }
1974
1975
    size = dstmat.size();
1976
    dst0 = (T*)dstmat.data;
1977
    if( size.height > 2 )
1978
        for( int i = 0; i < size.width; i++ )
1979
        {
1980
            dst0[i] = dst0[i + dst_step];
1981
            dst0[i + (size.height-1)*dst_step] = dst0[i + (size.height-2)*dst_step];
1982
        }
1983
    else
1984
        for( int i = 0; i < size.width; i++ )
1985
        {
1986
            dst0[i] = dst0[i + (size.height-1)*dst_step] = 0;
1987
        }
1988
}
1989
1990
template<typename T, class SIMDInterpolator>
1991
static void Bayer2RGB_( const Mat& srcmat, Mat& dstmat, int code )
1992
{
1993
    SIMDInterpolator vecOp;
1994
    const T* bayer0 = (const T*)srcmat.data;
1995
    int bayer_step = (int)(srcmat.step/sizeof(T));
1996
    T* dst0 = (T*)dstmat.data;
1997
    int dst_step = (int)(dstmat.step/sizeof(T));
1998
    Size size = srcmat.size();
1999
    int blue = code == CV_BayerBG2BGR || code == CV_BayerGB2BGR ? -1 : 1;
2000
    int start_with_green = code == CV_BayerGB2BGR || code == CV_BayerGR2BGR;
2001
2002
    dst0 += dst_step + 3 + 1;
2003
    size.height -= 2;
2004
    size.width -= 2;
2005
2006
    for( ; size.height-- > 0; bayer0 += bayer_step, dst0 += dst_step )
2007
    {
2008
        int t0, t1;
2009
        const T* bayer = bayer0;
2010
        T* dst = dst0;
2011
        const T* bayer_end = bayer + size.width;
2012
2013
        if( size.width <= 0 )
2014
        {
2015
            dst[-4] = dst[-3] = dst[-2] = dst[size.width*3-1] =
2016
            dst[size.width*3] = dst[size.width*3+1] = 0;
2017
            continue;
2018
        }
2019
2020
        if( start_with_green )
2021
        {
2022
            t0 = (bayer[1] + bayer[bayer_step*2+1] + 1) >> 1;
2023
            t1 = (bayer[bayer_step] + bayer[bayer_step+2] + 1) >> 1;
2024
            dst[-blue] = (T)t0;
2025
            dst[0] = bayer[bayer_step+1];
2026
            dst[blue] = (T)t1;
2027
            bayer++;
2028
            dst += 3;
2029
        }
2030
2031
        int delta = vecOp.bayer2RGB(bayer, bayer_step, dst, size.width, blue);
2032
        bayer += delta;
2033
        dst += delta*3;
2034
2035
        if( blue > 0 )
2036
        {
2037
            for( ; bayer <= bayer_end - 2; bayer += 2, dst += 6 )
2038
            {
2039
                t0 = (bayer[0] + bayer[2] + bayer[bayer_step*2] +
2040
                      bayer[bayer_step*2+2] + 2) >> 2;
2041
                t1 = (bayer[1] + bayer[bayer_step] +
2042
                      bayer[bayer_step+2] + bayer[bayer_step*2+1]+2) >> 2;
2043
                dst[-1] = (T)t0;
2044
                dst[0] = (T)t1;
2045
                dst[1] = bayer[bayer_step+1];
2046
2047
                t0 = (bayer[2] + bayer[bayer_step*2+2] + 1) >> 1;
2048
                t1 = (bayer[bayer_step+1] + bayer[bayer_step+3] + 1) >> 1;
2049
                dst[2] = (T)t0;
2050
                dst[3] = bayer[bayer_step+2];
2051
                dst[4] = (T)t1;
2052
            }
2053
        }
2054
        else
2055
        {
2056
            for( ; bayer <= bayer_end - 2; bayer += 2, dst += 6 )
2057
            {
2058
                t0 = (bayer[0] + bayer[2] + bayer[bayer_step*2] +
2059
                      bayer[bayer_step*2+2] + 2) >> 2;
2060
                t1 = (bayer[1] + bayer[bayer_step] +
2061
                      bayer[bayer_step+2] + bayer[bayer_step*2+1]+2) >> 2;
2062
                dst[1] = (T)t0;
2063
                dst[0] = (T)t1;
2064
                dst[-1] = bayer[bayer_step+1];
2065
2066
                t0 = (bayer[2] + bayer[bayer_step*2+2] + 1) >> 1;
2067
                t1 = (bayer[bayer_step+1] + bayer[bayer_step+3] + 1) >> 1;
2068
                dst[4] = (T)t0;
2069
                dst[3] = bayer[bayer_step+2];
2070
                dst[2] = (T)t1;
2071
            }
2072
        }
2073
2074
        if( bayer < bayer_end )
2075
        {
2076
            t0 = (bayer[0] + bayer[2] + bayer[bayer_step*2] +
2077
                  bayer[bayer_step*2+2] + 2) >> 2;
2078
            t1 = (bayer[1] + bayer[bayer_step] +
2079
                  bayer[bayer_step+2] + bayer[bayer_step*2+1]+2) >> 2;
2080
            dst[-blue] = (T)t0;
2081
            dst[0] = (T)t1;
2082
            dst[blue] = bayer[bayer_step+1];
2083
            bayer++;
2084
            dst += 3;
2085
        }
2086
2087
        dst0[-4] = dst0[-1];
2088
        dst0[-3] = dst0[0];
2089
        dst0[-2] = dst0[1];
2090
        dst0[size.width*3-1] = dst0[size.width*3-4];
2091
        dst0[size.width*3] = dst0[size.width*3-3];
2092
        dst0[size.width*3+1] = dst0[size.width*3-2];
2093
2094
        blue = -blue;
2095
        start_with_green = !start_with_green;
2096
    }
2097
2098
    size = dstmat.size();
2099
    dst0 = (T*)dstmat.data;
2100
    if( size.height > 2 )
2101
        for( int i = 0; i < size.width*3; i++ )
2102
        {
2103
            dst0[i] = dst0[i + dst_step];
2104
            dst0[i + (size.height-1)*dst_step] = dst0[i + (size.height-2)*dst_step];
2105
        }
2106
    else
2107
        for( int i = 0; i < size.width*3; i++ )
2108
        {
2109
            dst0[i] = dst0[i + (size.height-1)*dst_step] = 0;
2110
        }
2111
}
2112
2113
2114
/////////////////// Demosaicing using Variable Number of Gradients ///////////////////////
2115
2116
static void Bayer2RGB_VNG_8u( const Mat& srcmat, Mat& dstmat, int code )
2117
{
2118
    const uchar* bayer = srcmat.data;
2119
    int bstep = (int)srcmat.step;
2120
    uchar* dst = dstmat.data;
2121
    int dststep = (int)dstmat.step;
2122
    Size size = srcmat.size();
2123
2124
    int blueIdx = code == CV_BayerBG2BGR_VNG || code == CV_BayerGB2BGR_VNG ? 0 : 2;
2125
    bool greenCell0 = code != CV_BayerBG2BGR_VNG && code != CV_BayerRG2BGR_VNG;
2126
2127
    // for too small images use the simple interpolation algorithm
2128
    if( MIN(size.width, size.height) < 8 )
2129
    {
2130
        Bayer2RGB_<uchar, SIMDBayerInterpolator_8u>( srcmat, dstmat, code );
2131
        return;
2132
    }
2133
2134
    const int brows = 3, bcn = 7;
2135
    int N = size.width, N2 = N*2, N3 = N*3, N4 = N*4, N5 = N*5, N6 = N*6, N7 = N*7;
2136
    int i, bufstep = N7*bcn;
2137
    cv::AutoBuffer<ushort> _buf(bufstep*brows);
2138
    ushort* buf = (ushort*)_buf;
2139
2140
    bayer += bstep*2;
2141
2142
#if CV_SSE2
2143
    bool haveSSE = cv::checkHardwareSupport(CV_CPU_SSE2);
2144
    #define _mm_absdiff_epu16(a,b) _mm_adds_epu16(_mm_subs_epu16(a, b), _mm_subs_epu16(b, a))
2145
#endif
2146
2147
    for( int y = 2; y < size.height - 4; y++ )
2148
    {
2149
        uchar* dstrow = dst + dststep*y + 6;
2150
        const uchar* srow;
2151
2152
        for( int dy = (y == 2 ? -1 : 1); dy <= 1; dy++ )
2153
        {
2154
            ushort* brow = buf + ((y + dy - 1)%brows)*bufstep + 1;
2155
            srow = bayer + (y+dy)*bstep + 1;
2156
2157
            for( i = 0; i < bcn; i++ )
2158
                brow[N*i-1] = brow[(N-2) + N*i] = 0;
2159
2160
            i = 1;
2161
2162
#if CV_SSE2
2163
            if( haveSSE )
2164
            {
2165
                __m128i z = _mm_setzero_si128();
2166
                for( ; i <= N-9; i += 8, srow += 8, brow += 8 )
2167
                {
2168
                    __m128i s1, s2, s3, s4, s6, s7, s8, s9;
2169
2170
                    s1 = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)(srow-1-bstep)),z);
2171
                    s2 = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)(srow-bstep)),z);
2172
                    s3 = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)(srow+1-bstep)),z);
2173
2174
                    s4 = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)(srow-1)),z);
2175
                    s6 = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)(srow+1)),z);
2176
2177
                    s7 = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)(srow-1+bstep)),z);
2178
                    s8 = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)(srow+bstep)),z);
2179
                    s9 = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)(srow+1+bstep)),z);
2180
2181
                    __m128i b0, b1, b2, b3, b4, b5, b6;
2182
2183
                    b0 = _mm_adds_epu16(_mm_slli_epi16(_mm_absdiff_epu16(s2,s8),1),
2184
                                        _mm_adds_epu16(_mm_absdiff_epu16(s1, s7),
2185
                                                       _mm_absdiff_epu16(s3, s9)));
2186
                    b1 = _mm_adds_epu16(_mm_slli_epi16(_mm_absdiff_epu16(s4,s6),1),
2187
                                        _mm_adds_epu16(_mm_absdiff_epu16(s1, s3),
2188
                                                       _mm_absdiff_epu16(s7, s9)));
2189
                    b2 = _mm_slli_epi16(_mm_absdiff_epu16(s3,s7),1);
2190
                    b3 = _mm_slli_epi16(_mm_absdiff_epu16(s1,s9),1);
2191
2192
                    _mm_storeu_si128((__m128i*)brow, b0);
2193
                    _mm_storeu_si128((__m128i*)(brow + N), b1);
2194
                    _mm_storeu_si128((__m128i*)(brow + N2), b2);
2195
                    _mm_storeu_si128((__m128i*)(brow + N3), b3);
2196
2197
                    b4 = _mm_adds_epu16(b2,_mm_adds_epu16(_mm_absdiff_epu16(s2, s4),
2198
                                                          _mm_absdiff_epu16(s6, s8)));
2199
                    b5 = _mm_adds_epu16(b3,_mm_adds_epu16(_mm_absdiff_epu16(s2, s6),
2200
                                                          _mm_absdiff_epu16(s4, s8)));
2201
                    b6 = _mm_adds_epu16(_mm_adds_epu16(s2, s4), _mm_adds_epu16(s6, s8));
2202
                    b6 = _mm_srli_epi16(b6, 1);
2203
2204
                    _mm_storeu_si128((__m128i*)(brow + N4), b4);
2205
                    _mm_storeu_si128((__m128i*)(brow + N5), b5);
2206
                    _mm_storeu_si128((__m128i*)(brow + N6), b6);
2207
                }
2208
            }
2209
#endif
2210
2211
            for( ; i < N-1; i++, srow++, brow++ )
2212
            {
2213
                brow[0] = (ushort)(std::abs(srow[-1-bstep] - srow[-1+bstep]) +
2214
                                   std::abs(srow[-bstep] - srow[+bstep])*2 +
2215
                                   std::abs(srow[1-bstep] - srow[1+bstep]));
2216
                brow[N] = (ushort)(std::abs(srow[-1-bstep] - srow[1-bstep]) +
2217
                                   std::abs(srow[-1] - srow[1])*2 +
2218
                                   std::abs(srow[-1+bstep] - srow[1+bstep]));
2219
                brow[N2] = (ushort)(std::abs(srow[+1-bstep] - srow[-1+bstep])*2);
2220
                brow[N3] = (ushort)(std::abs(srow[-1-bstep] - srow[1+bstep])*2);
2221
                brow[N4] = (ushort)(brow[N2] + std::abs(srow[-bstep] - srow[-1]) +
2222
                                    std::abs(srow[+bstep] - srow[1]));
2223
                brow[N5] = (ushort)(brow[N3] + std::abs(srow[-bstep] - srow[1]) +
2224
                                    std::abs(srow[+bstep] - srow[-1]));
2225
                brow[N6] = (ushort)((srow[-bstep] + srow[-1] + srow[1] + srow[+bstep])>>1);
2226
            }
2227
        }
2228
2229
        const ushort* brow0 = buf + ((y - 2) % brows)*bufstep + 2;
2230
        const ushort* brow1 = buf + ((y - 1) % brows)*bufstep + 2;
2231
        const ushort* brow2 = buf + (y % brows)*bufstep + 2;
2232
        static const float scale[] = { 0.f, 0.5f, 0.25f, 0.1666666666667f, 0.125f, 0.1f, 0.08333333333f, 0.0714286f, 0.0625f };
2233
        srow = bayer + y*bstep + 2;
2234
        bool greenCell = greenCell0;
2235
2236
        i = 2;
2237
#if CV_SSE2
2238
        int limit = !haveSSE ? N-2 : greenCell ? std::min(3, N-2) : 2;
2239
#else
2240
        int limit = N - 2;
2241
#endif
2242
2243
        do
2244
        {
2245
            for( ; i < limit; i++, srow++, brow0++, brow1++, brow2++, dstrow += 3 )
2246
            {
2247
                int gradN = brow0[0] + brow1[0];
2248
                int gradS = brow1[0] + brow2[0];
2249
                int gradW = brow1[N-1] + brow1[N];
2250
                int gradE = brow1[N] + brow1[N+1];
2251
                int minGrad = std::min(std::min(std::min(gradN, gradS), gradW), gradE);
2252
                int maxGrad = std::max(std::max(std::max(gradN, gradS), gradW), gradE);
2253
                int R, G, B;
2254
2255
                if( !greenCell )
2256
                {
2257
                    int gradNE = brow0[N4+1] + brow1[N4];
2258
                    int gradSW = brow1[N4] + brow2[N4-1];
2259
                    int gradNW = brow0[N5-1] + brow1[N5];
2260
                    int gradSE = brow1[N5] + brow2[N5+1];
2261
2262
                    minGrad = std::min(std::min(std::min(std::min(minGrad, gradNE), gradSW), gradNW), gradSE);
2263
                    maxGrad = std::max(std::max(std::max(std::max(maxGrad, gradNE), gradSW), gradNW), gradSE);
2264
                    int T = minGrad + maxGrad/2;
2265
2266
                    int Rs = 0, Gs = 0, Bs = 0, ng = 0;
2267
                    if( gradN < T )
2268
                    {
2269
                        Rs += srow[-bstep*2] + srow[0];
2270
                        Gs += srow[-bstep]*2;
2271
                        Bs += srow[-bstep-1] + srow[-bstep+1];
2272
                        ng++;
2273
                    }
2274
                    if( gradS < T )
2275
                    {
2276
                        Rs += srow[bstep*2] + srow[0];
2277
                        Gs += srow[bstep]*2;
2278
                        Bs += srow[bstep-1] + srow[bstep+1];
2279
                        ng++;
2280
                    }
2281
                    if( gradW < T )
2282
                    {
2283
                        Rs += srow[-2] + srow[0];
2284
                        Gs += srow[-1]*2;
2285
                        Bs += srow[-bstep-1] + srow[bstep-1];
2286
                        ng++;
2287
                    }
2288
                    if( gradE < T )
2289
                    {
2290
                        Rs += srow[2] + srow[0];
2291
                        Gs += srow[1]*2;
2292
                        Bs += srow[-bstep+1] + srow[bstep+1];
2293
                        ng++;
2294
                    }
2295
                    if( gradNE < T )
2296
                    {
2297
                        Rs += srow[-bstep*2+2] + srow[0];
2298
                        Gs += brow0[N6+1];
2299
                        Bs += srow[-bstep+1]*2;
2300
                        ng++;
2301
                    }
2302
                    if( gradSW < T )
2303
                    {
2304
                        Rs += srow[bstep*2-2] + srow[0];
2305
                        Gs += brow2[N6-1];
2306
                        Bs += srow[bstep-1]*2;
2307
                        ng++;
2308
                    }
2309
                    if( gradNW < T )
2310
                    {
2311
                        Rs += srow[-bstep*2-2] + srow[0];
2312
                        Gs += brow0[N6-1];
2313
                        Bs += srow[-bstep+1]*2;
2314
                        ng++;
2315
                    }
2316
                    if( gradSE < T )
2317
                    {
2318
                        Rs += srow[bstep*2+2] + srow[0];
2319
                        Gs += brow2[N6+1];
2320
                        Bs += srow[-bstep+1]*2;
2321
                        ng++;
2322
                    }
2323
                    R = srow[0];
2324
                    G = R + cvRound((Gs - Rs)*scale[ng]);
2325
                    B = R + cvRound((Bs - Rs)*scale[ng]);
2326
                }
2327
                else
2328
                {
2329
                    int gradNE = brow0[N2] + brow0[N2+1] + brow1[N2] + brow1[N2+1];
2330
                    int gradSW = brow1[N2] + brow1[N2-1] + brow2[N2] + brow2[N2-1];
2331
                    int gradNW = brow0[N3] + brow0[N3-1] + brow1[N3] + brow1[N3-1];
2332
                    int gradSE = brow1[N3] + brow1[N3+1] + brow2[N3] + brow2[N3+1];
2333
2334
                    minGrad = std::min(std::min(std::min(std::min(minGrad, gradNE), gradSW), gradNW), gradSE);
2335
                    maxGrad = std::max(std::max(std::max(std::max(maxGrad, gradNE), gradSW), gradNW), gradSE);
2336
                    int T = minGrad + maxGrad/2;
2337
2338
                    int Rs = 0, Gs = 0, Bs = 0, ng = 0;
2339
                    if( gradN < T )
2340
                    {
2341
                        Rs += srow[-bstep*2-1] + srow[-bstep*2+1];
2342
                        Gs += srow[-bstep*2] + srow[0];
2343
                        Bs += srow[-bstep]*2;
2344
                        ng++;
2345
                    }
2346
                    if( gradS < T )
2347
                    {
2348
                        Rs += srow[bstep*2-1] + srow[bstep*2+1];
2349
                        Gs += srow[bstep*2] + srow[0];
2350
                        Bs += srow[bstep]*2;
2351
                        ng++;
2352
                    }
2353
                    if( gradW < T )
2354
                    {
2355
                        Rs += srow[-1]*2;
2356
                        Gs += srow[-2] + srow[0];
2357
                        Bs += srow[-bstep-2]+srow[bstep-2];
2358
                        ng++;
2359
                    }
2360
                    if( gradE < T )
2361
                    {
2362
                        Rs += srow[1]*2;
2363
                        Gs += srow[2] + srow[0];
2364
                        Bs += srow[-bstep+2]+srow[bstep+2];
2365
                        ng++;
2366
                    }
2367
                    if( gradNE < T )
2368
                    {
2369
                        Rs += srow[-bstep*2+1] + srow[1];
2370
                        Gs += srow[-bstep+1]*2;
2371
                        Bs += srow[-bstep] + srow[-bstep+2];
2372
                        ng++;
2373
                    }
2374
                    if( gradSW < T )
2375
                    {
2376
                        Rs += srow[bstep*2-1] + srow[-1];
2377
                        Gs += srow[bstep-1]*2;
2378
                        Bs += srow[bstep] + srow[bstep-2];
2379
                        ng++;
2380
                    }
2381
                    if( gradNW < T )
2382
                    {
2383
                        Rs += srow[-bstep*2-1] + srow[-1];
2384
                        Gs += srow[-bstep-1]*2;
2385
                        Bs += srow[-bstep-2]+srow[-bstep];
2386
                        ng++;
2387
                    }
2388
                    if( gradSE < T )
2389
                    {
2390
                        Rs += srow[bstep*2+1] + srow[1];
2391
                        Gs += srow[bstep+1]*2;
2392
                        Bs += srow[bstep+2]+srow[bstep];
2393
                        ng++;
2394
                    }
2395
                    G = srow[0];
2396
                    R = G + cvRound((Rs - Gs)*scale[ng]);
2397
                    B = G + cvRound((Bs - Gs)*scale[ng]);
2398
                }
2399
                dstrow[blueIdx] = CV_CAST_8U(B);
2400
                dstrow[1] = CV_CAST_8U(G);
2401
                dstrow[blueIdx^2] = CV_CAST_8U(R);
2402
                greenCell = !greenCell;
2403
            }
2404
2405
#if CV_SSE2
2406
            if( !haveSSE )
2407
                break;
2408
2409
            __m128i emask    = _mm_set1_epi32(0x0000ffff),
2410
                    omask    = _mm_set1_epi32(0xffff0000),
2411
                                        smask    = _mm_set1_epi16(0x7fff), //Get rid of sign bit in u16's
2412
                    z        = _mm_setzero_si128();
2413
            __m128 _0_5      = _mm_set1_ps(0.5f);
2414
2415
            #define _mm_merge_epi16(a, b) _mm_or_si128(_mm_and_si128(a, emask), _mm_and_si128(b, omask)) //(aA_aA_aA_aA) * (bB_bB_bB_bB) => (bA_bA_bA_bA)
2416
            #define _mm_cvtloepi16_ps(a)  _mm_cvtepi32_ps(_mm_srai_epi32(_mm_unpacklo_epi16(a,a), 16))   //(1,2,3,4,5,6,7,8) => (1f,2f,3f,4f)
2417
            #define _mm_cvthiepi16_ps(a)  _mm_cvtepi32_ps(_mm_srai_epi32(_mm_unpackhi_epi16(a,a), 16))   //(1,2,3,4,5,6,7,8) => (5f,6f,7f,8f)
2418
            #define _mm_loadl_u8_s16(ptr, offset) _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)((ptr) + (offset))), z) //load 8 uchars to 8 shorts
2419
2420
            // process 8 pixels at once
2421
            for( ; i <= N - 10; i += 8, srow += 8, brow0 += 8, brow1 += 8, brow2 += 8 )
2422
            {
2423
                //int gradN = brow0[0] + brow1[0];
2424
                __m128i gradN = _mm_adds_epi16(_mm_loadu_si128((__m128i*)brow0), _mm_loadu_si128((__m128i*)brow1));
2425
2426
                //int gradS = brow1[0] + brow2[0];
2427
                __m128i gradS = _mm_adds_epi16(_mm_loadu_si128((__m128i*)brow1), _mm_loadu_si128((__m128i*)brow2));
2428
2429
                //int gradW = brow1[N-1] + brow1[N];
2430
                __m128i gradW = _mm_adds_epi16(_mm_loadu_si128((__m128i*)(brow1+N-1)), _mm_loadu_si128((__m128i*)(brow1+N)));
2431
2432
                //int gradE = brow1[N+1] + brow1[N];
2433
                __m128i gradE = _mm_adds_epi16(_mm_loadu_si128((__m128i*)(brow1+N+1)), _mm_loadu_si128((__m128i*)(brow1+N)));
2434
2435
                //int minGrad = std::min(std::min(std::min(gradN, gradS), gradW), gradE);
2436
                //int maxGrad = std::max(std::max(std::max(gradN, gradS), gradW), gradE);
2437
                __m128i minGrad = _mm_min_epi16(_mm_min_epi16(gradN, gradS), _mm_min_epi16(gradW, gradE));
2438
                __m128i maxGrad = _mm_max_epi16(_mm_max_epi16(gradN, gradS), _mm_max_epi16(gradW, gradE));
2439
2440
                __m128i grad0, grad1;
2441
2442
                //int gradNE = brow0[N4+1] + brow1[N4];
2443
                //int gradNE = brow0[N2] + brow0[N2+1] + brow1[N2] + brow1[N2+1];
2444
                grad0 = _mm_adds_epi16(_mm_loadu_si128((__m128i*)(brow0+N4+1)), _mm_loadu_si128((__m128i*)(brow1+N4)));
2445
                grad1 = _mm_adds_epi16( _mm_adds_epi16(_mm_loadu_si128((__m128i*)(brow0+N2)), _mm_loadu_si128((__m128i*)(brow0+N2+1))),
2446
                                        _mm_adds_epi16(_mm_loadu_si128((__m128i*)(brow1+N2)), _mm_loadu_si128((__m128i*)(brow1+N2+1))));
2447
                __m128i gradNE = _mm_merge_epi16(grad0, grad1);
2448
2449
                //int gradSW = brow1[N4] + brow2[N4-1];
2450
                //int gradSW = brow1[N2] + brow1[N2-1] + brow2[N2] + brow2[N2-1];
2451
                grad0 = _mm_adds_epi16(_mm_loadu_si128((__m128i*)(brow2+N4-1)), _mm_loadu_si128((__m128i*)(brow1+N4)));
2452
                grad1 = _mm_adds_epi16(_mm_adds_epi16(_mm_loadu_si128((__m128i*)(brow2+N2)), _mm_loadu_si128((__m128i*)(brow2+N2-1))),
2453
                                       _mm_adds_epi16(_mm_loadu_si128((__m128i*)(brow1+N2)), _mm_loadu_si128((__m128i*)(brow1+N2-1))));
2454
                __m128i gradSW = _mm_merge_epi16(grad0, grad1);
2455
2456
                minGrad = _mm_min_epi16(_mm_min_epi16(minGrad, gradNE), gradSW);
2457
                maxGrad = _mm_max_epi16(_mm_max_epi16(maxGrad, gradNE), gradSW);
2458
2459
                //int gradNW = brow0[N5-1] + brow1[N5];
2460
                //int gradNW = brow0[N3] + brow0[N3-1] + brow1[N3] + brow1[N3-1];
2461
                grad0 = _mm_adds_epi16(_mm_loadu_si128((__m128i*)(brow0+N5-1)), _mm_loadu_si128((__m128i*)(brow1+N5)));
2462
                grad1 = _mm_adds_epi16(_mm_adds_epi16(_mm_loadu_si128((__m128i*)(brow0+N3)), _mm_loadu_si128((__m128i*)(brow0+N3-1))),
2463
                                       _mm_adds_epi16(_mm_loadu_si128((__m128i*)(brow1+N3)), _mm_loadu_si128((__m128i*)(brow1+N3-1))));
2464
                __m128i gradNW = _mm_merge_epi16(grad0, grad1);
2465
2466
                //int gradSE = brow1[N5] + brow2[N5+1];
2467
                //int gradSE = brow1[N3] + brow1[N3+1] + brow2[N3] + brow2[N3+1];
2468
                grad0 = _mm_adds_epi16(_mm_loadu_si128((__m128i*)(brow2+N5+1)), _mm_loadu_si128((__m128i*)(brow1+N5)));
2469
                grad1 = _mm_adds_epi16(_mm_adds_epi16(_mm_loadu_si128((__m128i*)(brow2+N3)), _mm_loadu_si128((__m128i*)(brow2+N3+1))),
2470
                                       _mm_adds_epi16(_mm_loadu_si128((__m128i*)(brow1+N3)), _mm_loadu_si128((__m128i*)(brow1+N3+1))));
2471
                __m128i gradSE = _mm_merge_epi16(grad0, grad1);
2472
2473
                minGrad = _mm_min_epi16(_mm_min_epi16(minGrad, gradNW), gradSE);
2474
                maxGrad = _mm_max_epi16(_mm_max_epi16(maxGrad, gradNW), gradSE);
2475
2476
                //int T = minGrad + maxGrad/2;
2477
                __m128i T = _mm_adds_epi16(_mm_srli_epi16(maxGrad, 1), minGrad);
2478
2479
                __m128i RGs = z, GRs = z, Bs = z, ng = z;
2480
2481
                __m128i x0  = _mm_loadl_u8_s16(srow, +0          );
2482
                __m128i x1  = _mm_loadl_u8_s16(srow, -1 - bstep  );
2483
                __m128i x2  = _mm_loadl_u8_s16(srow, -1 - bstep*2);
2484
                __m128i x3  = _mm_loadl_u8_s16(srow,    - bstep  );
2485
                __m128i x4  = _mm_loadl_u8_s16(srow, +1 - bstep*2);
2486
                __m128i x5  = _mm_loadl_u8_s16(srow, +1 - bstep  );
2487
                __m128i x6  = _mm_loadl_u8_s16(srow, +2 - bstep  );
2488
                __m128i x7  = _mm_loadl_u8_s16(srow, +1          );
2489
                __m128i x8  = _mm_loadl_u8_s16(srow, +2 + bstep  );
2490
                __m128i x9  = _mm_loadl_u8_s16(srow, +1 + bstep  );
2491
                __m128i x10 = _mm_loadl_u8_s16(srow, +1 + bstep*2);
2492
                __m128i x11 = _mm_loadl_u8_s16(srow,    + bstep  );
2493
                __m128i x12 = _mm_loadl_u8_s16(srow, -1 + bstep*2);
2494
                __m128i x13 = _mm_loadl_u8_s16(srow, -1 + bstep  );
2495
                __m128i x14 = _mm_loadl_u8_s16(srow, -2 + bstep  );
2496
                __m128i x15 = _mm_loadl_u8_s16(srow, -1          );
2497
                __m128i x16 = _mm_loadl_u8_s16(srow, -2 - bstep  );
2498
2499
                __m128i t0, t1, mask;
2500
2501
                // gradN ***********************************************
2502
                mask = _mm_cmpgt_epi16(T, gradN); // mask = T>gradN
2503
                ng = _mm_sub_epi16(ng, mask);     // ng += (T>gradN)
2504
2505
                t0 = _mm_slli_epi16(x3, 1);                                 // srow[-bstep]*2
2506
                t1 = _mm_adds_epi16(_mm_loadl_u8_s16(srow, -bstep*2), x0);  // srow[-bstep*2] + srow[0]
2507
2508
                // RGs += (srow[-bstep*2] + srow[0]) * (T>gradN)
2509
                RGs = _mm_adds_epi16(RGs, _mm_and_si128(t1, mask));
2510
                // GRs += {srow[-bstep]*2; (srow[-bstep*2-1] + srow[-bstep*2+1])} * (T>gradN)
2511
                GRs = _mm_adds_epi16(GRs, _mm_and_si128(_mm_merge_epi16(t0, _mm_adds_epi16(x2,x4)), mask));
2512
                // Bs  += {(srow[-bstep-1]+srow[-bstep+1]); srow[-bstep]*2 } * (T>gradN)
2513
                Bs  = _mm_adds_epi16(Bs, _mm_and_si128(_mm_merge_epi16(_mm_adds_epi16(x1,x5), t0), mask));
2514
2515
                // gradNE **********************************************
2516
                mask = _mm_cmpgt_epi16(T, gradNE); // mask = T>gradNE
2517
                ng = _mm_sub_epi16(ng, mask);      // ng += (T>gradNE)
2518
2519
                t0 = _mm_slli_epi16(x5, 1);                                    // srow[-bstep+1]*2
2520
                t1 = _mm_adds_epi16(_mm_loadl_u8_s16(srow, -bstep*2+2), x0);   // srow[-bstep*2+2] + srow[0]
2521
2522
                // RGs += {(srow[-bstep*2+2] + srow[0]); srow[-bstep+1]*2} * (T>gradNE)
2523
                RGs = _mm_adds_epi16(RGs, _mm_and_si128(_mm_merge_epi16(t1, t0), mask));
2524
                // GRs += {brow0[N6+1]; (srow[-bstep*2+1] + srow[1])} * (T>gradNE)
2525
                GRs = _mm_adds_epi16(GRs, _mm_and_si128(_mm_merge_epi16(_mm_loadu_si128((__m128i*)(brow0+N6+1)), _mm_adds_epi16(x4,x7)), mask));
2526
                // Bs  += {srow[-bstep+1]*2; (srow[-bstep] + srow[-bstep+2])}  * (T>gradNE)
2527
                Bs  = _mm_adds_epi16(Bs, _mm_and_si128(_mm_merge_epi16(t0,_mm_adds_epi16(x3,x6)), mask));
2528
2529
                // gradE ***********************************************
2530
                mask = _mm_cmpgt_epi16(T, gradE);  // mask = T>gradE
2531
                ng = _mm_sub_epi16(ng, mask);      // ng += (T>gradE)
2532
2533
                t0 = _mm_slli_epi16(x7, 1);                         // srow[1]*2
2534
                t1 = _mm_adds_epi16(_mm_loadl_u8_s16(srow, 2), x0); // srow[2] + srow[0]
2535
2536
                // RGs += (srow[2] + srow[0]) * (T>gradE)
2537
                RGs = _mm_adds_epi16(RGs, _mm_and_si128(t1, mask));
2538
                // GRs += (srow[1]*2) * (T>gradE)
2539
                GRs = _mm_adds_epi16(GRs, _mm_and_si128(t0, mask));
2540
                // Bs  += {(srow[-bstep+1]+srow[bstep+1]); (srow[-bstep+2]+srow[bstep+2])} * (T>gradE)
2541
                Bs  = _mm_adds_epi16(Bs, _mm_and_si128(_mm_merge_epi16(_mm_adds_epi16(x5,x9), _mm_adds_epi16(x6,x8)), mask));
2542
2543
                // gradSE **********************************************
2544
                mask = _mm_cmpgt_epi16(T, gradSE);  // mask = T>gradSE
2545
                ng = _mm_sub_epi16(ng, mask);       // ng += (T>gradSE)
2546
2547
                t0 = _mm_slli_epi16(x9, 1);                                 // srow[bstep+1]*2
2548
                t1 = _mm_adds_epi16(_mm_loadl_u8_s16(srow, bstep*2+2), x0); // srow[bstep*2+2] + srow[0]
2549
2550
                // RGs += {(srow[bstep*2+2] + srow[0]); srow[bstep+1]*2} * (T>gradSE)
2551
                RGs = _mm_adds_epi16(RGs, _mm_and_si128(_mm_merge_epi16(t1, t0), mask));
2552
                // GRs += {brow2[N6+1]; (srow[1]+srow[bstep*2+1])} * (T>gradSE)
2553
                GRs = _mm_adds_epi16(GRs, _mm_and_si128(_mm_merge_epi16(_mm_loadu_si128((__m128i*)(brow2+N6+1)), _mm_adds_epi16(x7,x10)), mask));
2554
                // Bs  += {srow[-bstep+1]*2; (srow[bstep+2]+srow[bstep])} * (T>gradSE)
2555
                Bs  = _mm_adds_epi16(Bs, _mm_and_si128(_mm_merge_epi16(_mm_slli_epi16(x5, 1), _mm_adds_epi16(x8,x11)), mask));
2556
2557
                // gradS ***********************************************
2558
                mask = _mm_cmpgt_epi16(T, gradS);  // mask = T>gradS
2559
                ng = _mm_sub_epi16(ng, mask);      // ng += (T>gradS)
2560
2561
                t0 = _mm_slli_epi16(x11, 1);                             // srow[bstep]*2
2562
                t1 = _mm_adds_epi16(_mm_loadl_u8_s16(srow,bstep*2), x0); // srow[bstep*2]+srow[0]
2563
2564
                // RGs += (srow[bstep*2]+srow[0]) * (T>gradS)
2565
                RGs = _mm_adds_epi16(RGs, _mm_and_si128(t1, mask));
2566
                // GRs += {srow[bstep]*2; (srow[bstep*2+1]+srow[bstep*2-1])} * (T>gradS)
2567
                GRs = _mm_adds_epi16(GRs, _mm_and_si128(_mm_merge_epi16(t0, _mm_adds_epi16(x10,x12)), mask));
2568
                // Bs  += {(srow[bstep+1]+srow[bstep-1]); srow[bstep]*2} * (T>gradS)
2569
                Bs  = _mm_adds_epi16(Bs, _mm_and_si128(_mm_merge_epi16(_mm_adds_epi16(x9,x13), t0), mask));
2570
2571
                // gradSW **********************************************
2572
                mask = _mm_cmpgt_epi16(T, gradSW);  // mask = T>gradSW
2573
                ng = _mm_sub_epi16(ng, mask);       // ng += (T>gradSW)
2574
2575
                t0 = _mm_slli_epi16(x13, 1);                                // srow[bstep-1]*2
2576
                t1 = _mm_adds_epi16(_mm_loadl_u8_s16(srow, bstep*2-2), x0); // srow[bstep*2-2]+srow[0]
2577
2578
                // RGs += {(srow[bstep*2-2]+srow[0]); srow[bstep-1]*2} * (T>gradSW)
2579
                RGs = _mm_adds_epi16(RGs, _mm_and_si128(_mm_merge_epi16(t1, t0), mask));
2580
                // GRs += {brow2[N6-1]; (srow[bstep*2-1]+srow[-1])} * (T>gradSW)
2581
                GRs = _mm_adds_epi16(GRs, _mm_and_si128(_mm_merge_epi16(_mm_loadu_si128((__m128i*)(brow2+N6-1)), _mm_adds_epi16(x12,x15)), mask));
2582
                // Bs  += {srow[bstep-1]*2; (srow[bstep]+srow[bstep-2])} * (T>gradSW)
2583
                Bs  = _mm_adds_epi16(Bs, _mm_and_si128(_mm_merge_epi16(t0,_mm_adds_epi16(x11,x14)), mask));
2584
2585
                // gradW ***********************************************
2586
                mask = _mm_cmpgt_epi16(T, gradW);  // mask = T>gradW
2587
                ng = _mm_sub_epi16(ng, mask);      // ng += (T>gradW)
2588
2589
                t0 = _mm_slli_epi16(x15, 1);                         // srow[-1]*2
2590
                t1 = _mm_adds_epi16(_mm_loadl_u8_s16(srow, -2), x0); // srow[-2]+srow[0]
2591
2592
                // RGs += (srow[-2]+srow[0]) * (T>gradW)
2593
                RGs = _mm_adds_epi16(RGs, _mm_and_si128(t1, mask));
2594
                // GRs += (srow[-1]*2) * (T>gradW)
2595
                GRs = _mm_adds_epi16(GRs, _mm_and_si128(t0, mask));
2596
                // Bs  += {(srow[-bstep-1]+srow[bstep-1]); (srow[bstep-2]+srow[-bstep-2])} * (T>gradW)
2597
                Bs  = _mm_adds_epi16(Bs, _mm_and_si128(_mm_merge_epi16(_mm_adds_epi16(x1,x13), _mm_adds_epi16(x14,x16)), mask));
2598
2599
                // gradNW **********************************************
2600
                mask = _mm_cmpgt_epi16(T, gradNW);  // mask = T>gradNW
2601
                ng = _mm_sub_epi16(ng, mask);       // ng += (T>gradNW)
2602
2603
                t0 = _mm_slli_epi16(x1, 1);                                 // srow[-bstep-1]*2
2604
                t1 = _mm_adds_epi16(_mm_loadl_u8_s16(srow,-bstep*2-2), x0); // srow[-bstep*2-2]+srow[0]
2605
2606
                // RGs += {(srow[-bstep*2-2]+srow[0]); srow[-bstep-1]*2} * (T>gradNW)
2607
                RGs = _mm_adds_epi16(RGs, _mm_and_si128(_mm_merge_epi16(t1, t0), mask));
2608
                // GRs += {brow0[N6-1]; (srow[-bstep*2-1]+srow[-1])} * (T>gradNW)
2609
                GRs = _mm_adds_epi16(GRs, _mm_and_si128(_mm_merge_epi16(_mm_loadu_si128((__m128i*)(brow0+N6-1)), _mm_adds_epi16(x2,x15)), mask));
2610
                // Bs  += {srow[-bstep-1]*2; (srow[-bstep]+srow[-bstep-2])} * (T>gradNW)
2611
                Bs  = _mm_adds_epi16(Bs, _mm_and_si128(_mm_merge_epi16(_mm_slli_epi16(x5, 1),_mm_adds_epi16(x3,x16)), mask));
2612
2613
                __m128 ngf0, ngf1;
2614
                ngf0 = _mm_div_ps(_0_5, _mm_cvtloepi16_ps(ng));
2615
                ngf1 = _mm_div_ps(_0_5, _mm_cvthiepi16_ps(ng));
2616
2617
                // now interpolate r, g & b
2618
                t0 = _mm_sub_epi16(GRs, RGs);
2619
                t1 = _mm_sub_epi16(Bs, RGs);
2620
2621
                t0 = _mm_add_epi16(x0, _mm_packs_epi32(
2622
                                                       _mm_cvtps_epi32(_mm_mul_ps(_mm_cvtloepi16_ps(t0), ngf0)),
2623
                                                       _mm_cvtps_epi32(_mm_mul_ps(_mm_cvthiepi16_ps(t0), ngf1))));
2624
2625
                t1 = _mm_add_epi16(x0, _mm_packs_epi32(
2626
                                                       _mm_cvtps_epi32(_mm_mul_ps(_mm_cvtloepi16_ps(t1), ngf0)),
2627
                                                       _mm_cvtps_epi32(_mm_mul_ps(_mm_cvthiepi16_ps(t1), ngf1))));
2628
2629
                x1 = _mm_merge_epi16(x0, t0);
2630
                x2 = _mm_merge_epi16(t0, x0);
2631
2632
                uchar R[8], G[8], B[8];
2633
                                
2634
                                //Make sure there is no sign bit in the 16 bit values so they can saturate correctly
2635
                x1 = _mm_and_si128(x1, smask);
2636
                                x2 = _mm_and_si128(x2, smask);
2637
                                t1 = _mm_and_si128(t1, smask);
2638
2639
                _mm_storel_epi64(blueIdx ? (__m128i*)B : (__m128i*)R, _mm_packus_epi16(x1, z));
2640
                _mm_storel_epi64((__m128i*)G, _mm_packus_epi16(x2, z));
2641
                _mm_storel_epi64(blueIdx ? (__m128i*)R : (__m128i*)B, _mm_packus_epi16(t1, z));
2642
2643
                for( int j = 0; j < 8; j++, dstrow += 3 )
2644
                {
2645
                    dstrow[0] = B[j]; dstrow[1] = G[j]; dstrow[2] = R[j];
2646
                }
2647
            }
2648
#endif
2649
2650
            limit = N - 2;
2651
        }
2652
        while( i < N - 2 );
2653
2654
        for( i = 0; i < 6; i++ )
2655
        {
2656
            dst[dststep*y + 5 - i] = dst[dststep*y + 8 - i];
2657
            dst[dststep*y + (N - 2)*3 + i] = dst[dststep*y + (N - 3)*3 + i];
2658
        }
2659
2660
        greenCell0 = !greenCell0;
2661
        blueIdx ^= 2;
2662
    }
2663
2664
    for( i = 0; i < size.width*3; i++ )
2665
    {
2666
        dst[i] = dst[i + dststep] = dst[i + dststep*2];
2667
        dst[i + dststep*(size.height-4)] =
2668
        dst[i + dststep*(size.height-3)] =
2669
        dst[i + dststep*(size.height-2)] =
2670
        dst[i + dststep*(size.height-1)] = dst[i + dststep*(size.height-5)];
2671
    }
2672
}
2673
2674
///////////////////////////////////// YUV420 -> RGB /////////////////////////////////////
2675
2676
const int ITUR_BT_601_CY = 1220542;
2677
const int ITUR_BT_601_CUB = 2116026;
2678
const int ITUR_BT_601_CUG = -409993;
2679
const int ITUR_BT_601_CVG = -852492;
2680
const int ITUR_BT_601_CVR = 1673527;
2681
const int ITUR_BT_601_SHIFT = 20;
2682
2683
template<int bIdx, int uIdx>
2684
struct YUV420sp2RGB888Invoker
2685
{
2686
    Mat* dst;
2687
    const uchar* my1, *muv;
2688
    int width, stride;
2689
2690
    YUV420sp2RGB888Invoker(Mat* _dst, int _stride, const uchar* _y1, const uchar* _uv)
2691
        : dst(_dst), my1(_y1), muv(_uv), width(_dst->cols), stride(_stride) {}
2692
2693
    void operator()(const BlockedRange& range) const
2694
    {
2695
        int rangeBegin = range.begin() * 2;
2696
        int rangeEnd = range.end() * 2;
2697
2698
        //R = 1.164(Y - 16) + 1.596(V - 128)
2699
        //G = 1.164(Y - 16) - 0.813(V - 128) - 0.391(U - 128)
2700
        //B = 1.164(Y - 16)                  + 2.018(U - 128)
2701
2702
        //R = (1220542(Y - 16) + 1673527(V - 128)                  + (1 << 19)) >> 20
2703
        //G = (1220542(Y - 16) - 852492(V - 128) - 409993(U - 128) + (1 << 19)) >> 20
2704
        //B = (1220542(Y - 16)                  + 2116026(U - 128) + (1 << 19)) >> 20
2705
2706
        const uchar* y1 = my1 + rangeBegin * stride, *uv = muv + rangeBegin * stride / 2;
2707
2708
#ifdef HAVE_TEGRA_OPTIMIZATION
2709
        if(tegra::cvtYUV4202RGB(bIdx, uIdx, 3, y1, uv, stride, dst->ptr<uchar>(rangeBegin), dst->step, rangeEnd - rangeBegin, dst->cols))
2710
            return;
2711
#endif
2712
2713
        for (int j = rangeBegin; j < rangeEnd; j += 2, y1 += stride * 2, uv += stride)
2714
        {
2715
            uchar* row1 = dst->ptr<uchar>(j);
2716
            uchar* row2 = dst->ptr<uchar>(j + 1);
2717
            const uchar* y2 = y1 + stride;
2718
2719
            for (int i = 0; i < width; i += 2, row1 += 6, row2 += 6)
2720
            {
2721
                int u = int(uv[i + 0 + uIdx]) - 128;
2722
                int v = int(uv[i + 1 - uIdx]) - 128;
2723
2724
                int ruv = (1 << (ITUR_BT_601_SHIFT - 1)) + ITUR_BT_601_CVR * v;
2725
                int guv = (1 << (ITUR_BT_601_SHIFT - 1)) + ITUR_BT_601_CVG * v + ITUR_BT_601_CUG * u;
2726
                int buv = (1 << (ITUR_BT_601_SHIFT - 1)) + ITUR_BT_601_CUB * u;
2727
2728
                int y00 = std::max(0, int(y1[i]) - 16) * ITUR_BT_601_CY;
2729
                row1[2-bIdx] = saturate_cast<uchar>((y00 + ruv) >> ITUR_BT_601_SHIFT);
2730
                row1[1]      = saturate_cast<uchar>((y00 + guv) >> ITUR_BT_601_SHIFT);
2731
                row1[bIdx]   = saturate_cast<uchar>((y00 + buv) >> ITUR_BT_601_SHIFT);
2732
2733
                int y01 = std::max(0, int(y1[i + 1]) - 16) * ITUR_BT_601_CY;
2734
                row1[5-bIdx] = saturate_cast<uchar>((y01 + ruv) >> ITUR_BT_601_SHIFT);
2735
                row1[4]      = saturate_cast<uchar>((y01 + guv) >> ITUR_BT_601_SHIFT);
2736
                row1[3+bIdx] = saturate_cast<uchar>((y01 + buv) >> ITUR_BT_601_SHIFT);
2737
2738
                int y10 = std::max(0, int(y2[i]) - 16) * ITUR_BT_601_CY;
2739
                row2[2-bIdx] = saturate_cast<uchar>((y10 + ruv) >> ITUR_BT_601_SHIFT);
2740
                row2[1]      = saturate_cast<uchar>((y10 + guv) >> ITUR_BT_601_SHIFT);
2741
                row2[bIdx]   = saturate_cast<uchar>((y10 + buv) >> ITUR_BT_601_SHIFT);
2742
2743
                int y11 = std::max(0, int(y2[i + 1]) - 16) * ITUR_BT_601_CY;
2744
                row2[5-bIdx] = saturate_cast<uchar>((y11 + ruv) >> ITUR_BT_601_SHIFT);
2745
                row2[4]      = saturate_cast<uchar>((y11 + guv) >> ITUR_BT_601_SHIFT);
2746
                row2[3+bIdx] = saturate_cast<uchar>((y11 + buv) >> ITUR_BT_601_SHIFT);
2747
            }
2748
        }
2749
    }
2750
};
2751
2752
template<int bIdx, int uIdx>
2753
struct YUV420sp2RGBA8888Invoker
2754
{
2755
    Mat* dst;
2756
    const uchar* my1, *muv;
2757
    int width, stride;
2758
2759
    YUV420sp2RGBA8888Invoker(Mat* _dst, int _stride, const uchar* _y1, const uchar* _uv)
2760
        : dst(_dst), my1(_y1), muv(_uv), width(_dst->cols), stride(_stride) {}
2761
2762
    void operator()(const BlockedRange& range) const
2763
    {
2764
        int rangeBegin = range.begin() * 2;
2765
        int rangeEnd = range.end() * 2;
2766
2767
        //R = 1.164(Y - 16) + 1.596(V - 128)
2768
        //G = 1.164(Y - 16) - 0.813(V - 128) - 0.391(U - 128)
2769
        //B = 1.164(Y - 16)                  + 2.018(U - 128)
2770
2771
        //R = (1220542(Y - 16) + 1673527(V - 128)                  + (1 << 19)) >> 20
2772
        //G = (1220542(Y - 16) - 852492(V - 128) - 409993(U - 128) + (1 << 19)) >> 20
2773
        //B = (1220542(Y - 16)                  + 2116026(U - 128) + (1 << 19)) >> 20
2774
2775
        const uchar* y1 = my1 + rangeBegin * stride, *uv = muv + rangeBegin * stride / 2;
2776
2777
#ifdef HAVE_TEGRA_OPTIMIZATION
2778
        if(tegra::cvtYUV4202RGB(bIdx, uIdx, 4, y1, uv, stride, dst->ptr<uchar>(rangeBegin), dst->step, rangeEnd - rangeBegin, dst->cols))
2779
            return;
2780
#endif
2781
2782
        for (int j = rangeBegin; j < rangeEnd; j += 2, y1 += stride * 2, uv += stride)
2783
        {
2784
            uchar* row1 = dst->ptr<uchar>(j);
2785
            uchar* row2 = dst->ptr<uchar>(j + 1);
2786
            const uchar* y2 = y1 + stride;
2787
2788
            for (int i = 0; i < width; i += 2, row1 += 8, row2 += 8)
2789
            {
2790
                int u = int(uv[i + 0 + uIdx]) - 128;
2791
                int v = int(uv[i + 1 - uIdx]) - 128;
2792
2793
                int ruv = (1 << (ITUR_BT_601_SHIFT - 1)) + ITUR_BT_601_CVR * v;
2794
                int guv = (1 << (ITUR_BT_601_SHIFT - 1)) + ITUR_BT_601_CVG * v + ITUR_BT_601_CUG * u;
2795
                int buv = (1 << (ITUR_BT_601_SHIFT - 1)) + ITUR_BT_601_CUB * u;
2796
2797
                int y00 = std::max(0, int(y1[i]) - 16) * ITUR_BT_601_CY;
2798
                row1[2-bIdx] = saturate_cast<uchar>((y00 + ruv) >> ITUR_BT_601_SHIFT);
2799
                row1[1]      = saturate_cast<uchar>((y00 + guv) >> ITUR_BT_601_SHIFT);
2800
                row1[bIdx]   = saturate_cast<uchar>((y00 + buv) >> ITUR_BT_601_SHIFT);
2801
                row1[3]      = uchar(0xff);
2802
2803
                int y01 = std::max(0, int(y1[i + 1]) - 16) * ITUR_BT_601_CY;
2804
                row1[6-bIdx] = saturate_cast<uchar>((y01 + ruv) >> ITUR_BT_601_SHIFT);
2805
                row1[5]      = saturate_cast<uchar>((y01 + guv) >> ITUR_BT_601_SHIFT);
2806
                row1[4+bIdx] = saturate_cast<uchar>((y01 + buv) >> ITUR_BT_601_SHIFT);
2807
                row1[7]      = uchar(0xff);
2808
2809
                int y10 = std::max(0, int(y2[i]) - 16) * ITUR_BT_601_CY;
2810
                row2[2-bIdx] = saturate_cast<uchar>((y10 + ruv) >> ITUR_BT_601_SHIFT);
2811
                row2[1]      = saturate_cast<uchar>((y10 + guv) >> ITUR_BT_601_SHIFT);
2812
                row2[bIdx]   = saturate_cast<uchar>((y10 + buv) >> ITUR_BT_601_SHIFT);
2813
                row2[3]      = uchar(0xff);
2814
2815
                int y11 = std::max(0, int(y2[i + 1]) - 16) * ITUR_BT_601_CY;
2816
                row2[6-bIdx] = saturate_cast<uchar>((y11 + ruv) >> ITUR_BT_601_SHIFT);
2817
                row2[5]      = saturate_cast<uchar>((y11 + guv) >> ITUR_BT_601_SHIFT);
2818
                row2[4+bIdx] = saturate_cast<uchar>((y11 + buv) >> ITUR_BT_601_SHIFT);
2819
                row2[7]      = uchar(0xff);
2820
            }
2821
        }
2822
    }
2823
};
2824
2825
template<int bIdx>
2826
struct YUV420p2RGB888Invoker
2827
{
2828
    Mat* dst;
2829
    const uchar* my1, *mu, *mv;
2830
    int width, stride;
2831
    int ustepIdx, vstepIdx;
2832
2833
    YUV420p2RGB888Invoker(Mat* _dst, int _stride, const uchar* _y1, const uchar* _u, const uchar* _v, int _ustepIdx, int _vstepIdx)
2834
        : dst(_dst), my1(_y1), mu(_u), mv(_v), width(_dst->cols), stride(_stride), ustepIdx(_ustepIdx), vstepIdx(_vstepIdx) {}
2835
2836
    void operator()(const BlockedRange& range) const
2837
    {
2838
        const int rangeBegin = range.begin() * 2;
2839
        const int rangeEnd = range.end() * 2;
2840
2841
        size_t uvsteps[2] = {width/2, stride - width/2};
2842
        int usIdx = ustepIdx, vsIdx = vstepIdx;
2843
2844
        const uchar* y1 = my1 + rangeBegin * stride;
2845
        const uchar* u1 = mu + (range.begin() / 2) * stride;
2846
        const uchar* v1 = mv + (range.begin() / 2) * stride;
2847
2848
        if(range.begin() % 2 == 1)
2849
        {
2850
            u1 += uvsteps[(usIdx++) & 1];
2851
            v1 += uvsteps[(vsIdx++) & 1];
2852
        }
2853
2854
        for (int j = rangeBegin; j < rangeEnd; j += 2, y1 += stride * 2, u1 += uvsteps[(usIdx++) & 1], v1 += uvsteps[(vsIdx++) & 1])
2855
        {
2856
            uchar* row1 = dst->ptr<uchar>(j);
2857
            uchar* row2 = dst->ptr<uchar>(j + 1);
2858
            const uchar* y2 = y1 + stride;
2859
2860
            for (int i = 0; i < width / 2; i += 1, row1 += 6, row2 += 6)
2861
            {
2862
                int u = int(u1[i]) - 128;
2863
                int v = int(v1[i]) - 128;
2864
2865
                int ruv = (1 << (ITUR_BT_601_SHIFT - 1)) + ITUR_BT_601_CVR * v;
2866
                int guv = (1 << (ITUR_BT_601_SHIFT - 1)) + ITUR_BT_601_CVG * v + ITUR_BT_601_CUG * u;
2867
                int buv = (1 << (ITUR_BT_601_SHIFT - 1)) + ITUR_BT_601_CUB * u;
2868
2869
                int y00 = std::max(0, int(y1[2 * i]) - 16) * ITUR_BT_601_CY;
2870
                row1[2-bIdx] = saturate_cast<uchar>((y00 + ruv) >> ITUR_BT_601_SHIFT);
2871
                row1[1]      = saturate_cast<uchar>((y00 + guv) >> ITUR_BT_601_SHIFT);
2872
                row1[bIdx]   = saturate_cast<uchar>((y00 + buv) >> ITUR_BT_601_SHIFT);
2873
2874
                int y01 = std::max(0, int(y1[2 * i + 1]) - 16) * ITUR_BT_601_CY;
2875
                row1[5-bIdx] = saturate_cast<uchar>((y01 + ruv) >> ITUR_BT_601_SHIFT);
2876
                row1[4]      = saturate_cast<uchar>((y01 + guv) >> ITUR_BT_601_SHIFT);
2877
                row1[3+bIdx] = saturate_cast<uchar>((y01 + buv) >> ITUR_BT_601_SHIFT);
2878
2879
                int y10 = std::max(0, int(y2[2 * i]) - 16) * ITUR_BT_601_CY;
2880
                row2[2-bIdx] = saturate_cast<uchar>((y10 + ruv) >> ITUR_BT_601_SHIFT);
2881
                row2[1]      = saturate_cast<uchar>((y10 + guv) >> ITUR_BT_601_SHIFT);
2882
                row2[bIdx]   = saturate_cast<uchar>((y10 + buv) >> ITUR_BT_601_SHIFT);
2883
2884
                int y11 = std::max(0, int(y2[2 * i + 1]) - 16) * ITUR_BT_601_CY;
2885
                row2[5-bIdx] = saturate_cast<uchar>((y11 + ruv) >> ITUR_BT_601_SHIFT);
2886
                row2[4]      = saturate_cast<uchar>((y11 + guv) >> ITUR_BT_601_SHIFT);
2887
                row2[3+bIdx] = saturate_cast<uchar>((y11 + buv) >> ITUR_BT_601_SHIFT);
2888
            }
2889
        }
2890
    }
2891
};
2892
2893
template<int bIdx>
2894
struct YUV420p2RGBA8888Invoker
2895
{
2896
    Mat* dst;
2897
    const uchar* my1, *mu, *mv;
2898
    int width, stride;
2899
    int ustepIdx, vstepIdx;
2900
2901
    YUV420p2RGBA8888Invoker(Mat* _dst, int _stride, const uchar* _y1, const uchar* _u, const uchar* _v, int _ustepIdx, int _vstepIdx)
2902
        : dst(_dst), my1(_y1), mu(_u), mv(_v), width(_dst->cols), stride(_stride), ustepIdx(_ustepIdx), vstepIdx(_vstepIdx) {}
2903
2904
    void operator()(const BlockedRange& range) const
2905
    {
2906
        int rangeBegin = range.begin() * 2;
2907
        int rangeEnd = range.end() * 2;
2908
2909
        size_t uvsteps[2] = {width/2, stride - width/2};
2910
        int usIdx = ustepIdx, vsIdx = vstepIdx;
2911
2912
        const uchar* y1 = my1 + rangeBegin * stride;
2913
        const uchar* u1 = mu + (range.begin() / 2) * stride;
2914
        const uchar* v1 = mv + (range.begin() / 2) * stride;
2915
2916
        if(range.begin() % 2 == 1)
2917
        {
2918
            u1 += uvsteps[(usIdx++) & 1];
2919
            v1 += uvsteps[(vsIdx++) & 1];
2920
        }
2921
2922
        for (int j = rangeBegin; j < rangeEnd; j += 2, y1 += stride * 2, u1 += uvsteps[(usIdx++) & 1], v1 += uvsteps[(vsIdx++) & 1])
2923
        {
2924
            uchar* row1 = dst->ptr<uchar>(j);
2925
            uchar* row2 = dst->ptr<uchar>(j + 1);
2926
            const uchar* y2 = y1 + stride;
2927
2928
            for (int i = 0; i < width / 2; i += 1, row1 += 8, row2 += 8)
2929
            {
2930
                int u = int(u1[i]) - 128;
2931
                int v = int(v1[i]) - 128;
2932
2933
                int ruv = (1 << (ITUR_BT_601_SHIFT - 1)) + ITUR_BT_601_CVR * v;
2934
                int guv = (1 << (ITUR_BT_601_SHIFT - 1)) + ITUR_BT_601_CVG * v + ITUR_BT_601_CUG * u;
2935
                int buv = (1 << (ITUR_BT_601_SHIFT - 1)) + ITUR_BT_601_CUB * u;
2936
2937
                int y00 = std::max(0, int(y1[2 * i]) - 16) * ITUR_BT_601_CY;
2938
                row1[2-bIdx] = saturate_cast<uchar>((y00 + ruv) >> ITUR_BT_601_SHIFT);
2939
                row1[1]      = saturate_cast<uchar>((y00 + guv) >> ITUR_BT_601_SHIFT);
2940
                row1[bIdx]   = saturate_cast<uchar>((y00 + buv) >> ITUR_BT_601_SHIFT);
2941
                row1[3]      = uchar(0xff);
2942
2943
                int y01 = std::max(0, int(y1[2 * i + 1]) - 16) * ITUR_BT_601_CY;
2944
                row1[6-bIdx] = saturate_cast<uchar>((y01 + ruv) >> ITUR_BT_601_SHIFT);
2945
                row1[5]      = saturate_cast<uchar>((y01 + guv) >> ITUR_BT_601_SHIFT);
2946
                row1[4+bIdx] = saturate_cast<uchar>((y01 + buv) >> ITUR_BT_601_SHIFT);
2947
                row1[7]      = uchar(0xff);
2948
2949
                int y10 = std::max(0, int(y2[2 * i]) - 16) * ITUR_BT_601_CY;
2950
                row2[2-bIdx] = saturate_cast<uchar>((y10 + ruv) >> ITUR_BT_601_SHIFT);
2951
                row2[1]      = saturate_cast<uchar>((y10 + guv) >> ITUR_BT_601_SHIFT);
2952
                row2[bIdx]   = saturate_cast<uchar>((y10 + buv) >> ITUR_BT_601_SHIFT);
2953
                row2[3]      = uchar(0xff);
2954
2955
                int y11 = std::max(0, int(y2[2 * i + 1]) - 16) * ITUR_BT_601_CY;
2956
                row2[6-bIdx] = saturate_cast<uchar>((y11 + ruv) >> ITUR_BT_601_SHIFT);
2957
                row2[5]      = saturate_cast<uchar>((y11 + guv) >> ITUR_BT_601_SHIFT);
2958
                row2[4+bIdx] = saturate_cast<uchar>((y11 + buv) >> ITUR_BT_601_SHIFT);
2959
                row2[7]      = uchar(0xff);
2960
            }
2961
        }
2962
    }
2963
};
2964
2965
#define MIN_SIZE_FOR_PARALLEL_YUV420_CONVERSION (320*240)
2966
2967
template<int bIdx, int uIdx>
2968
inline void cvtYUV420sp2RGB(Mat& _dst, int _stride, const uchar* _y1, const uchar* _uv)
2969
{
2970
    YUV420sp2RGB888Invoker<bIdx, uIdx> converter(&_dst, _stride, _y1,  _uv);
2971
#ifdef HAVE_TBB
2972
    if (_dst.total() >= MIN_SIZE_FOR_PARALLEL_YUV420_CONVERSION)
2973
        parallel_for(BlockedRange(0, _dst.rows/2), converter);
2974
    else
2975
#endif
2976
        converter(BlockedRange(0, _dst.rows/2));
2977
}
2978
2979
template<int bIdx, int uIdx>
2980
inline void cvtYUV420sp2RGBA(Mat& _dst, int _stride, const uchar* _y1, const uchar* _uv)
2981
{
2982
    YUV420sp2RGBA8888Invoker<bIdx, uIdx> converter(&_dst, _stride, _y1,  _uv);
2983
#ifdef HAVE_TBB
2984
    if (_dst.total() >= MIN_SIZE_FOR_PARALLEL_YUV420_CONVERSION)
2985
        parallel_for(BlockedRange(0, _dst.rows/2), converter);
2986
    else
2987
#endif
2988
        converter(BlockedRange(0, _dst.rows/2));
2989
}
2990
2991
template<int bIdx>
2992
inline void cvtYUV420p2RGB(Mat& _dst, int _stride, const uchar* _y1, const uchar* _u, const uchar* _v, int ustepIdx, int vstepIdx)
2993
{
2994
    YUV420p2RGB888Invoker<bIdx> converter(&_dst, _stride, _y1,  _u, _v, ustepIdx, vstepIdx);
2995
#ifdef HAVE_TBB
2996
    if (_dst.total() >= MIN_SIZE_FOR_PARALLEL_YUV420_CONVERSION)
2997
        parallel_for(BlockedRange(0, _dst.rows/2), converter);
2998
    else
2999
#endif
3000
        converter(BlockedRange(0, _dst.rows/2));
3001
}
3002
3003
template<int bIdx>
3004
inline void cvtYUV420p2RGBA(Mat& _dst, int _stride, const uchar* _y1, const uchar* _u, const uchar* _v, int ustepIdx, int vstepIdx)
3005
{
3006
    YUV420p2RGBA8888Invoker<bIdx> converter(&_dst, _stride, _y1,  _u, _v, ustepIdx, vstepIdx);
3007
#ifdef HAVE_TBB
3008
    if (_dst.total() >= MIN_SIZE_FOR_PARALLEL_YUV420_CONVERSION)
3009
        parallel_for(BlockedRange(0, _dst.rows/2), converter);
3010
    else
3011
#endif
3012
        converter(BlockedRange(0, _dst.rows/2));
3013
}
3014
3015
///////////////////////////////////// YUV422 -> RGB /////////////////////////////////////
3016
3017
template<int bIdx, int uIdx, int yIdx>
3018
struct YUV422toRGB888Invoker
3019
{
3020
    Mat* dst;
3021
    const uchar* src;
3022
    int width, stride;
3023
3024
    YUV422toRGB888Invoker(Mat* _dst, int _stride, const uchar* _yuv)
3025
        : dst(_dst), src(_yuv), width(_dst->cols), stride(_stride) {}
3026
3027
    void operator()(const BlockedRange& range) const
3028
    {
3029
        int rangeBegin = range.begin();
3030
        int rangeEnd = range.end();
3031
3032
        const int uidx = 1 - yIdx + uIdx * 2;
3033
        const int vidx = (2 + uidx) % 4;
3034
        const uchar* yuv_src = src + rangeBegin * stride;
3035
3036
        for (int j = rangeBegin; j < rangeEnd; j++, yuv_src += stride)
3037
        {
3038
            uchar* row = dst->ptr<uchar>(j);
3039
3040
            for (int i = 0; i < 2 * width; i += 4, row += 6)
3041
            {
3042
                int u = int(yuv_src[i + uidx]) - 128;
3043
                int v = int(yuv_src[i + vidx]) - 128;
3044
3045
                int ruv = (1 << (ITUR_BT_601_SHIFT - 1)) + ITUR_BT_601_CVR * v;
3046
                int guv = (1 << (ITUR_BT_601_SHIFT - 1)) + ITUR_BT_601_CVG * v + ITUR_BT_601_CUG * u;
3047
                int buv = (1 << (ITUR_BT_601_SHIFT - 1)) + ITUR_BT_601_CUB * u;
3048
3049
                int y00 = std::max(0, int(yuv_src[i + yIdx]) - 16) * ITUR_BT_601_CY;
3050
                row[2-bIdx] = saturate_cast<uchar>((y00 + ruv) >> ITUR_BT_601_SHIFT);
3051
                row[1]      = saturate_cast<uchar>((y00 + guv) >> ITUR_BT_601_SHIFT);
3052
                row[bIdx]   = saturate_cast<uchar>((y00 + buv) >> ITUR_BT_601_SHIFT);
3053
3054
                int y01 = std::max(0, int(yuv_src[i + yIdx + 2]) - 16) * ITUR_BT_601_CY;
3055
                row[5-bIdx] = saturate_cast<uchar>((y01 + ruv) >> ITUR_BT_601_SHIFT);
3056
                row[4]      = saturate_cast<uchar>((y01 + guv) >> ITUR_BT_601_SHIFT);
3057
                row[3+bIdx] = saturate_cast<uchar>((y01 + buv) >> ITUR_BT_601_SHIFT);
3058
            }
3059
        }
3060
    }
3061
};
3062
3063
template<int bIdx, int uIdx, int yIdx>
3064
struct YUV422toRGBA8888Invoker
3065
{
3066
    Mat* dst;
3067
    const uchar* src;
3068
    int width, stride;
3069
3070
    YUV422toRGBA8888Invoker(Mat* _dst, int _stride, const uchar* _yuv)
3071
        : dst(_dst), src(_yuv), width(_dst->cols), stride(_stride) {}
3072
3073
    void operator()(const BlockedRange& range) const
3074
    {
3075
        int rangeBegin = range.begin();
3076
        int rangeEnd = range.end();
3077
3078
        const int uidx = 1 - yIdx + uIdx * 2;
3079
        const int vidx = (2 + uidx) % 4;
3080
        const uchar* yuv_src = src + rangeBegin * stride;
3081
3082
        for (int j = rangeBegin; j < rangeEnd; j++, yuv_src += stride)
3083
        {
3084
            uchar* row = dst->ptr<uchar>(j);
3085
3086
            for (int i = 0; i < 2 * width; i += 4, row += 8)
3087
            {
3088
                int u = int(yuv_src[i + uidx]) - 128;
3089
                int v = int(yuv_src[i + vidx]) - 128;
3090
3091
                int ruv = (1 << (ITUR_BT_601_SHIFT - 1)) + ITUR_BT_601_CVR * v;
3092
                int guv = (1 << (ITUR_BT_601_SHIFT - 1)) + ITUR_BT_601_CVG * v + ITUR_BT_601_CUG * u;
3093
                int buv = (1 << (ITUR_BT_601_SHIFT - 1)) + ITUR_BT_601_CUB * u;
3094
3095
                int y00 = std::max(0, int(yuv_src[i + yIdx]) - 16) * ITUR_BT_601_CY;
3096
                row[2-bIdx] = saturate_cast<uchar>((y00 + ruv) >> ITUR_BT_601_SHIFT);
3097
                row[1]      = saturate_cast<uchar>((y00 + guv) >> ITUR_BT_601_SHIFT);
3098
                row[bIdx]   = saturate_cast<uchar>((y00 + buv) >> ITUR_BT_601_SHIFT);
3099
                row[3]      = uchar(0xff);
3100
3101
                int y01 = std::max(0, int(yuv_src[i + yIdx + 2]) - 16) * ITUR_BT_601_CY;
3102
                row[6-bIdx] = saturate_cast<uchar>((y01 + ruv) >> ITUR_BT_601_SHIFT);
3103
                row[5]      = saturate_cast<uchar>((y01 + guv) >> ITUR_BT_601_SHIFT);
3104
                row[4+bIdx] = saturate_cast<uchar>((y01 + buv) >> ITUR_BT_601_SHIFT);
3105
                row[7]      = uchar(0xff);
3106
            }
3107
        }
3108
    }
3109
};
3110
3111
#define MIN_SIZE_FOR_PARALLEL_YUV422_CONVERSION (320*240)
3112
3113
template<int bIdx, int uIdx, int yIdx>
3114
inline void cvtYUV422toRGB(Mat& _dst, int _stride, const uchar* _yuv)
3115
{
3116
    YUV422toRGB888Invoker<bIdx, uIdx, yIdx> converter(&_dst, _stride, _yuv);
3117
#ifdef HAVE_TBB
3118
    if (_dst.total() >= MIN_SIZE_FOR_PARALLEL_YUV422_CONVERSION)
3119
        parallel_for(BlockedRange(0, _dst.rows), converter);
3120
    else
3121
#endif
3122
        converter(BlockedRange(0, _dst.rows));
3123
}
3124
3125
template<int bIdx, int uIdx, int yIdx>
3126
inline void cvtYUV422toRGBA(Mat& _dst, int _stride, const uchar* _yuv)
3127
{
3128
    YUV422toRGBA8888Invoker<bIdx, uIdx, yIdx> converter(&_dst, _stride, _yuv);
3129
#ifdef HAVE_TBB
3130
    if (_dst.total() >= MIN_SIZE_FOR_PARALLEL_YUV422_CONVERSION)
3131
        parallel_for(BlockedRange(0, _dst.rows), converter);
3132
    else
3133
#endif
3134
        converter(BlockedRange(0, _dst.rows));
3135
}
3136
3137
}//namespace cv
3138
3139
//////////////////////////////////////////////////////////////////////////////////////////
3140
//                                   The main function                                  //
3141
//////////////////////////////////////////////////////////////////////////////////////////
3142
3143
void cv::cvtColor( InputArray _src, OutputArray _dst, int code, int dcn )
3144
{
3145
    Mat src = _src.getMat(), dst;
3146
    Size sz = src.size();
3147
    int scn = src.channels(), depth = src.depth(), bidx;
3148
3149
    CV_Assert( depth == CV_8U || depth == CV_16U || depth == CV_32F );
3150
3151
    switch( code )
3152
    {
3153
        case CV_BGR2BGRA: case CV_RGB2BGRA: case CV_BGRA2BGR:
3154
        case CV_RGBA2BGR: case CV_RGB2BGR: case CV_BGRA2RGBA:
3155
            CV_Assert( scn == 3 || scn == 4 );
3156
            dcn = code == CV_BGR2BGRA || code == CV_RGB2BGRA || code == CV_BGRA2RGBA ? 4 : 3;
3157
            bidx = code == CV_BGR2BGRA || code == CV_BGRA2BGR ? 0 : 2;
3158
3159
            _dst.create( sz, CV_MAKETYPE(depth, dcn));
3160
            dst = _dst.getMat();
3161
3162
            if( depth == CV_8U )
3163
            {
3164
#ifdef HAVE_TEGRA_OPTIMIZATION
3165
                if(!tegra::cvtBGR2RGB(src, dst, bidx))
3166
#endif
3167
                    CvtColorLoop(src, dst, RGB2RGB<uchar>(scn, dcn, bidx));
3168
            }
3169
            else if( depth == CV_16U )
3170
                CvtColorLoop(src, dst, RGB2RGB<ushort>(scn, dcn, bidx));
3171
            else
3172
                CvtColorLoop(src, dst, RGB2RGB<float>(scn, dcn, bidx));
3173
            break;
3174
3175
        case CV_BGR2BGR565: case CV_BGR2BGR555: case CV_RGB2BGR565: case CV_RGB2BGR555:
3176
        case CV_BGRA2BGR565: case CV_BGRA2BGR555: case CV_RGBA2BGR565: case CV_RGBA2BGR555:
3177
            CV_Assert( (scn == 3 || scn == 4) && depth == CV_8U );
3178
            _dst.create(sz, CV_8UC2);
3179
            dst = _dst.getMat();
3180
3181
#ifdef HAVE_TEGRA_OPTIMIZATION
3182
            if(code == CV_BGR2BGR565 || code == CV_BGRA2BGR565 || code == CV_RGB2BGR565  || code == CV_RGBA2BGR565)
3183
                if(tegra::cvtRGB2RGB565(src, dst, code == CV_RGB2BGR565 || code == CV_RGBA2BGR565 ? 0 : 2))
3184
                    break;
3185
#endif
3186
3187
            CvtColorLoop(src, dst, RGB2RGB5x5(scn,
3188
                      code == CV_BGR2BGR565 || code == CV_BGR2BGR555 ||
3189
                      code == CV_BGRA2BGR565 || code == CV_BGRA2BGR555 ? 0 : 2,
3190
                      code == CV_BGR2BGR565 || code == CV_RGB2BGR565 ||
3191
                      code == CV_BGRA2BGR565 || code == CV_RGBA2BGR565 ? 6 : 5 // green bits
3192
                                              ));
3193
            break;
3194
3195
        case CV_BGR5652BGR: case CV_BGR5552BGR: case CV_BGR5652RGB: case CV_BGR5552RGB:
3196
        case CV_BGR5652BGRA: case CV_BGR5552BGRA: case CV_BGR5652RGBA: case CV_BGR5552RGBA:
3197
            if(dcn <= 0) dcn = (code==CV_BGR5652BGRA || code==CV_BGR5552BGRA || code==CV_BGR5652RGBA || code==CV_BGR5552RGBA) ? 4 : 3;
3198
            CV_Assert( (dcn == 3 || dcn == 4) && scn == 2 && depth == CV_8U );
3199
            _dst.create(sz, CV_MAKETYPE(depth, dcn));
3200
            dst = _dst.getMat();
3201
3202
            CvtColorLoop(src, dst, RGB5x52RGB(dcn,
3203
                      code == CV_BGR5652BGR || code == CV_BGR5552BGR ||
3204
                      code == CV_BGR5652BGRA || code == CV_BGR5552BGRA ? 0 : 2, // blue idx
3205
                      code == CV_BGR5652BGR || code == CV_BGR5652RGB ||
3206
                      code == CV_BGR5652BGRA || code == CV_BGR5652RGBA ? 6 : 5 // green bits
3207
                      ));
3208
            break;
3209
3210
        case CV_BGR2GRAY: case CV_BGRA2GRAY: case CV_RGB2GRAY: case CV_RGBA2GRAY:
3211
            CV_Assert( scn == 3 || scn == 4 );
3212
            _dst.create(sz, CV_MAKETYPE(depth, 1));
3213
            dst = _dst.getMat();
3214
3215
            bidx = code == CV_BGR2GRAY || code == CV_BGRA2GRAY ? 0 : 2;
3216
3217
            if( depth == CV_8U )
3218
            {
3219
#ifdef HAVE_TEGRA_OPTIMIZATION
3220
                if(!tegra::cvtRGB2Gray(src, dst, bidx))
3221
#endif
3222
                CvtColorLoop(src, dst, RGB2Gray<uchar>(scn, bidx, 0));
3223
            }
3224
            else if( depth == CV_16U )
3225
                CvtColorLoop(src, dst, RGB2Gray<ushort>(scn, bidx, 0));
3226
            else
3227
                CvtColorLoop(src, dst, RGB2Gray<float>(scn, bidx, 0));
3228
            break;
3229
3230
        case CV_BGR5652GRAY: case CV_BGR5552GRAY:
3231
            CV_Assert( scn == 2 && depth == CV_8U );
3232
            _dst.create(sz, CV_8UC1);
3233
            dst = _dst.getMat();
3234
3235
            CvtColorLoop(src, dst, RGB5x52Gray(code == CV_BGR5652GRAY ? 6 : 5));
3236
            break;
3237
3238
        case CV_GRAY2BGR: case CV_GRAY2BGRA:
3239
            if( dcn <= 0 ) dcn = (code==CV_GRAY2BGRA) ? 4 : 3;
3240
            CV_Assert( scn == 1 && (dcn == 3 || dcn == 4));
3241
            _dst.create(sz, CV_MAKETYPE(depth, dcn));
3242
            dst = _dst.getMat();
3243
3244
            if( depth == CV_8U )
3245
            {
3246
#ifdef HAVE_TEGRA_OPTIMIZATION
3247
                if(!tegra::cvtGray2RGB(src, dst))
3248
#endif
3249
                CvtColorLoop(src, dst, Gray2RGB<uchar>(dcn));
3250
            }
3251
            else if( depth == CV_16U )
3252
                CvtColorLoop(src, dst, Gray2RGB<ushort>(dcn));
3253
            else
3254
                CvtColorLoop(src, dst, Gray2RGB<float>(dcn));
3255
            break;
3256
3257
        case CV_GRAY2BGR565: case CV_GRAY2BGR555:
3258
            CV_Assert( scn == 1 && depth == CV_8U );
3259
            _dst.create(sz, CV_8UC2);
3260
            dst = _dst.getMat();
3261
3262
            CvtColorLoop(src, dst, Gray2RGB5x5(code == CV_GRAY2BGR565 ? 6 : 5));
3263
            break;
3264
3265
        case CV_BGR2YCrCb: case CV_RGB2YCrCb:
3266
        case CV_BGR2YUV: case CV_RGB2YUV:
3267
            {
3268
            CV_Assert( scn == 3 || scn == 4 );
3269
            bidx = code == CV_BGR2YCrCb || code == CV_RGB2YUV ? 0 : 2;
3270
            static const float yuv_f[] = { 0.114f, 0.587f, 0.299f, 0.492f, 0.877f };
3271
            static const int yuv_i[] = { B2Y, G2Y, R2Y, 8061, 14369 };
3272
            const float* coeffs_f = code == CV_BGR2YCrCb || code == CV_RGB2YCrCb ? 0 : yuv_f;
3273
            const int* coeffs_i = code == CV_BGR2YCrCb || code == CV_RGB2YCrCb ? 0 : yuv_i;
3274
3275
            _dst.create(sz, CV_MAKETYPE(depth, 3));
3276
            dst = _dst.getMat();
3277
3278
            if( depth == CV_8U )
3279
            {
3280
#ifdef HAVE_TEGRA_OPTIMIZATION
3281
                if((code == CV_RGB2YCrCb || code == CV_BGR2YCrCb) && tegra::cvtRGB2YCrCb(src, dst, bidx))
3282
                    break;
3283
#endif
3284
                CvtColorLoop(src, dst, RGB2YCrCb_i<uchar>(scn, bidx, coeffs_i));
3285
            }
3286
            else if( depth == CV_16U )
3287
                CvtColorLoop(src, dst, RGB2YCrCb_i<ushort>(scn, bidx, coeffs_i));
3288
            else
3289
                CvtColorLoop(src, dst, RGB2YCrCb_f<float>(scn, bidx, coeffs_f));
3290
            }
3291
            break;
3292
3293
        case CV_YCrCb2BGR: case CV_YCrCb2RGB:
3294
        case CV_YUV2BGR: case CV_YUV2RGB:
3295
            {
3296
            if( dcn <= 0 ) dcn = 3;
3297
            CV_Assert( scn == 3 && (dcn == 3 || dcn == 4) );
3298
            bidx = code == CV_YCrCb2BGR || code == CV_YUV2RGB ? 0 : 2;
3299
            static const float yuv_f[] = { 2.032f, -0.395f, -0.581f, 1.140f };
3300
            static const int yuv_i[] = { 33292, -6472, -9519, 18678 };
3301
            const float* coeffs_f = code == CV_YCrCb2BGR || code == CV_YCrCb2RGB ? 0 : yuv_f;
3302
            const int* coeffs_i = code == CV_YCrCb2BGR || code == CV_YCrCb2RGB ? 0 : yuv_i;
3303
3304
            _dst.create(sz, CV_MAKETYPE(depth, dcn));
3305
            dst = _dst.getMat();
3306
3307
            if( depth == CV_8U )
3308
                CvtColorLoop(src, dst, YCrCb2RGB_i<uchar>(dcn, bidx, coeffs_i));
3309
            else if( depth == CV_16U )
3310
                CvtColorLoop(src, dst, YCrCb2RGB_i<ushort>(dcn, bidx, coeffs_i));
3311
            else
3312
                CvtColorLoop(src, dst, YCrCb2RGB_f<float>(dcn, bidx, coeffs_f));
3313
            }
3314
            break;
3315
3316
        case CV_BGR2XYZ: case CV_RGB2XYZ:
3317
            CV_Assert( scn == 3 || scn == 4 );
3318
            bidx = code == CV_BGR2XYZ ? 0 : 2;
3319
3320
            _dst.create(sz, CV_MAKETYPE(depth, 3));
3321
            dst = _dst.getMat();
3322
3323
            if( depth == CV_8U )
3324
                CvtColorLoop(src, dst, RGB2XYZ_i<uchar>(scn, bidx, 0));
3325
            else if( depth == CV_16U )
3326
                CvtColorLoop(src, dst, RGB2XYZ_i<ushort>(scn, bidx, 0));
3327
            else
3328
                CvtColorLoop(src, dst, RGB2XYZ_f<float>(scn, bidx, 0));
3329
            break;
3330
3331
        case CV_XYZ2BGR: case CV_XYZ2RGB:
3332
            if( dcn <= 0 ) dcn = 3;
3333
            CV_Assert( scn == 3 && (dcn == 3 || dcn == 4) );
3334
            bidx = code == CV_XYZ2BGR ? 0 : 2;
3335
3336
            _dst.create(sz, CV_MAKETYPE(depth, dcn));
3337
            dst = _dst.getMat();
3338
3339
            if( depth == CV_8U )
3340
                CvtColorLoop(src, dst, XYZ2RGB_i<uchar>(dcn, bidx, 0));
3341
            else if( depth == CV_16U )
3342
                CvtColorLoop(src, dst, XYZ2RGB_i<ushort>(dcn, bidx, 0));
3343
            else
3344
                CvtColorLoop(src, dst, XYZ2RGB_f<float>(dcn, bidx, 0));
3345
            break;
3346
3347
        case CV_BGR2HSV: case CV_RGB2HSV: case CV_BGR2HSV_FULL: case CV_RGB2HSV_FULL:
3348
        case CV_BGR2HLS: case CV_RGB2HLS: case CV_BGR2HLS_FULL: case CV_RGB2HLS_FULL:
3349
            {
3350
            CV_Assert( (scn == 3 || scn == 4) && (depth == CV_8U || depth == CV_32F) );
3351
            bidx = code == CV_BGR2HSV || code == CV_BGR2HLS ||
3352
                code == CV_BGR2HSV_FULL || code == CV_BGR2HLS_FULL ? 0 : 2;
3353
            int hrange = depth == CV_32F ? 360 : code == CV_BGR2HSV || code == CV_RGB2HSV ||
3354
                code == CV_BGR2HLS || code == CV_RGB2HLS ? 180 : 256;
3355
3356
            _dst.create(sz, CV_MAKETYPE(depth, 3));
3357
            dst = _dst.getMat();
3358
3359
            if( code == CV_BGR2HSV || code == CV_RGB2HSV ||
3360
                code == CV_BGR2HSV_FULL || code == CV_RGB2HSV_FULL )
3361
            {
3362
#ifdef HAVE_TEGRA_OPTIMIZATION
3363
                if(tegra::cvtRGB2HSV(src, dst, bidx, hrange))
3364
                    break;
3365
#endif
3366
                if( depth == CV_8U )
3367
                    CvtColorLoop(src, dst, RGB2HSV_b(scn, bidx, hrange));
3368
                else
3369
                    CvtColorLoop(src, dst, RGB2HSV_f(scn, bidx, (float)hrange));
3370
            }
3371
            else
3372
            {
3373
                if( depth == CV_8U )
3374
                    CvtColorLoop(src, dst, RGB2HLS_b(scn, bidx, hrange));
3375
                else
3376
                    CvtColorLoop(src, dst, RGB2HLS_f(scn, bidx, (float)hrange));
3377
            }
3378
            }
3379
            break;
3380
3381
        case CV_HSV2BGR: case CV_HSV2RGB: case CV_HSV2BGR_FULL: case CV_HSV2RGB_FULL:
3382
        case CV_HLS2BGR: case CV_HLS2RGB: case CV_HLS2BGR_FULL: case CV_HLS2RGB_FULL:
3383
            {
3384
            if( dcn <= 0 ) dcn = 3;
3385
            CV_Assert( scn == 3 && (dcn == 3 || dcn == 4) && (depth == CV_8U || depth == CV_32F) );
3386
            bidx = code == CV_HSV2BGR || code == CV_HLS2BGR ||
3387
                code == CV_HSV2BGR_FULL || code == CV_HLS2BGR_FULL ? 0 : 2;
3388
            int hrange = depth == CV_32F ? 360 : code == CV_HSV2BGR || code == CV_HSV2RGB ||
3389
                code == CV_HLS2BGR || code == CV_HLS2RGB ? 180 : 255;
3390
3391
            _dst.create(sz, CV_MAKETYPE(depth, dcn));
3392
            dst = _dst.getMat();
3393
3394
            if( code == CV_HSV2BGR || code == CV_HSV2RGB ||
3395
                code == CV_HSV2BGR_FULL || code == CV_HSV2RGB_FULL )
3396
            {
3397
                if( depth == CV_8U )
3398
                    CvtColorLoop(src, dst, HSV2RGB_b(dcn, bidx, hrange));
3399
                else
3400
                    CvtColorLoop(src, dst, HSV2RGB_f(dcn, bidx, (float)hrange));
3401
            }
3402
            else
3403
            {
3404
                if( depth == CV_8U )
3405
                    CvtColorLoop(src, dst, HLS2RGB_b(dcn, bidx, hrange));
3406
                else
3407
                    CvtColorLoop(src, dst, HLS2RGB_f(dcn, bidx, (float)hrange));
3408
            }
3409
            }
3410
            break;
3411
3412
        case CV_BGR2Lab: case CV_RGB2Lab: case CV_LBGR2Lab: case CV_LRGB2Lab:
3413
        case CV_BGR2Luv: case CV_RGB2Luv: case CV_LBGR2Luv: case CV_LRGB2Luv:
3414
            {
3415
            CV_Assert( (scn == 3 || scn == 4) && (depth == CV_8U || depth == CV_32F) );
3416
            bidx = code == CV_BGR2Lab || code == CV_BGR2Luv ||
3417
                   code == CV_LBGR2Lab || code == CV_LBGR2Luv ? 0 : 2;
3418
            bool srgb = code == CV_BGR2Lab || code == CV_RGB2Lab ||
3419
                        code == CV_BGR2Luv || code == CV_RGB2Luv;
3420
3421
            _dst.create(sz, CV_MAKETYPE(depth, 3));
3422
            dst = _dst.getMat();
3423
3424
            if( code == CV_BGR2Lab || code == CV_RGB2Lab ||
3425
                code == CV_LBGR2Lab || code == CV_LRGB2Lab )
3426
            {
3427
                if( depth == CV_8U )
3428
                    CvtColorLoop(src, dst, RGB2Lab_b(scn, bidx, 0, 0, srgb));
3429
                else
3430
                    CvtColorLoop(src, dst, RGB2Lab_f(scn, bidx, 0, 0, srgb));
3431
            }
3432
            else
3433
            {
3434
                if( depth == CV_8U )
3435
                    CvtColorLoop(src, dst, RGB2Luv_b(scn, bidx, 0, 0, srgb));
3436
                else
3437
                    CvtColorLoop(src, dst, RGB2Luv_f(scn, bidx, 0, 0, srgb));
3438
            }
3439
            }
3440
            break;
3441
3442
        case CV_Lab2BGR: case CV_Lab2RGB: case CV_Lab2LBGR: case CV_Lab2LRGB:
3443
        case CV_Luv2BGR: case CV_Luv2RGB: case CV_Luv2LBGR: case CV_Luv2LRGB:
3444
            {
3445
            if( dcn <= 0 ) dcn = 3;
3446
            CV_Assert( scn == 3 && (dcn == 3 || dcn == 4) && (depth == CV_8U || depth == CV_32F) );
3447
            bidx = code == CV_Lab2BGR || code == CV_Luv2BGR ||
3448
                   code == CV_Lab2LBGR || code == CV_Luv2LBGR ? 0 : 2;
3449
            bool srgb = code == CV_Lab2BGR || code == CV_Lab2RGB ||
3450
                    code == CV_Luv2BGR || code == CV_Luv2RGB;
3451
3452
            _dst.create(sz, CV_MAKETYPE(depth, dcn));
3453
            dst = _dst.getMat();
3454
3455
            if( code == CV_Lab2BGR || code == CV_Lab2RGB ||
3456
                code == CV_Lab2LBGR || code == CV_Lab2LRGB )
3457
            {
3458
                if( depth == CV_8U )
3459
                    CvtColorLoop(src, dst, Lab2RGB_b(dcn, bidx, 0, 0, srgb));
3460
                else
3461
                    CvtColorLoop(src, dst, Lab2RGB_f(dcn, bidx, 0, 0, srgb));
3462
            }
3463
            else
3464
            {
3465
                if( depth == CV_8U )
3466
                    CvtColorLoop(src, dst, Luv2RGB_b(dcn, bidx, 0, 0, srgb));
3467
                else
3468
                    CvtColorLoop(src, dst, Luv2RGB_f(dcn, bidx, 0, 0, srgb));
3469
            }
3470
            }
3471
            break;
3472
3473
        case CV_BayerBG2GRAY: case CV_BayerGB2GRAY: case CV_BayerRG2GRAY: case CV_BayerGR2GRAY:
3474
            if(dcn <= 0) dcn = 1;
3475
            CV_Assert( scn == 1 && dcn == 1 );
3476
3477
            _dst.create(sz, depth);
3478
            dst = _dst.getMat();
3479
3480
            if( depth == CV_8U )
3481
                Bayer2Gray_<uchar, SIMDBayerInterpolator_8u>(src, dst, code);
3482
            else if( depth == CV_16U )
3483
                Bayer2Gray_<ushort, SIMDBayerStubInterpolator_<ushort> >(src, dst, code);
3484
            else
3485
                CV_Error(CV_StsUnsupportedFormat, "Bayer->Gray demosaicing only supports 8u and 16u types");
3486
            break;
3487
3488
        case CV_BayerBG2BGR: case CV_BayerGB2BGR: case CV_BayerRG2BGR: case CV_BayerGR2BGR:
3489
        case CV_BayerBG2BGR_VNG: case CV_BayerGB2BGR_VNG: case CV_BayerRG2BGR_VNG: case CV_BayerGR2BGR_VNG:
3490
            if(dcn <= 0) dcn = 3;
3491
            CV_Assert( scn == 1 && dcn == 3 );
3492
3493
            _dst.create(sz, CV_MAKETYPE(depth, dcn));
3494
            dst = _dst.getMat();
3495
3496
            if( code == CV_BayerBG2BGR || code == CV_BayerGB2BGR ||
3497
                code == CV_BayerRG2BGR || code == CV_BayerGR2BGR )
3498
            {
3499
                if( depth == CV_8U )
3500
                    Bayer2RGB_<uchar, SIMDBayerInterpolator_8u>(src, dst, code);
3501
                else if( depth == CV_16U )
3502
                    Bayer2RGB_<ushort, SIMDBayerStubInterpolator_<ushort> >(src, dst, code);
3503
                else
3504
                    CV_Error(CV_StsUnsupportedFormat, "Bayer->RGB demosaicing only supports 8u and 16u types");
3505
            }
3506
            else
3507
            {
3508
                CV_Assert( depth == CV_8U );
3509
                Bayer2RGB_VNG_8u(src, dst, code);
3510
            }
3511
            break;
3512
        case CV_YUV2BGR_NV21:  case CV_YUV2RGB_NV21:  case CV_YUV2BGR_NV12:  case CV_YUV2RGB_NV12:
3513
        case CV_YUV2BGRA_NV21: case CV_YUV2RGBA_NV21: case CV_YUV2BGRA_NV12: case CV_YUV2RGBA_NV12:
3514
            {
3515
                // http://www.fourcc.org/yuv.php#NV21 == yuv420sp -> a plane of 8 bit Y samples followed by an interleaved V/U plane containing 8 bit 2x2 subsampled chroma samples
3516
                // http://www.fourcc.org/yuv.php#NV12 -> a plane of 8 bit Y samples followed by an interleaved U/V plane containing 8 bit 2x2 subsampled colour difference samples
3517
3518
                if (dcn <= 0) dcn = (code==CV_YUV420sp2BGRA || code==CV_YUV420sp2RGBA || code==CV_YUV2BGRA_NV12 || code==CV_YUV2RGBA_NV12) ? 4 : 3;
3519
                const int bIdx = (code==CV_YUV2BGR_NV21 || code==CV_YUV2BGRA_NV21 || code==CV_YUV2BGR_NV12 || code==CV_YUV2BGRA_NV12) ? 0 : 2;
3520
                const int uIdx = (code==CV_YUV2BGR_NV21 || code==CV_YUV2BGRA_NV21 || code==CV_YUV2RGB_NV21 || code==CV_YUV2RGBA_NV21) ? 1 : 0;
3521
3522
                CV_Assert( dcn == 3 || dcn == 4 );
3523
                CV_Assert( sz.width % 2 == 0 && sz.height % 3 == 0 && depth == CV_8U );
3524
3525
                Size dstSz(sz.width, sz.height * 2 / 3);
3526
                _dst.create(dstSz, CV_MAKETYPE(depth, dcn));
3527
                dst = _dst.getMat();
3528
3529
                int srcstep = (int)src.step;
3530
                const uchar* y = src.ptr();
3531
                const uchar* uv = y + srcstep * dstSz.height;
3532
3533
                switch(dcn*100 + bIdx * 10 + uIdx)
3534
                {
3535
                    case 300: cvtYUV420sp2RGB<0, 0> (dst, srcstep, y, uv); break;
3536
                    case 301: cvtYUV420sp2RGB<0, 1> (dst, srcstep, y, uv); break;
3537
                    case 320: cvtYUV420sp2RGB<2, 0> (dst, srcstep, y, uv); break;
3538
                    case 321: cvtYUV420sp2RGB<2, 1> (dst, srcstep, y, uv); break;
3539
                    case 400: cvtYUV420sp2RGBA<0, 0>(dst, srcstep, y, uv); break;
3540
                    case 401: cvtYUV420sp2RGBA<0, 1>(dst, srcstep, y, uv); break;
3541
                    case 420: cvtYUV420sp2RGBA<2, 0>(dst, srcstep, y, uv); break;
3542
                    case 421: cvtYUV420sp2RGBA<2, 1>(dst, srcstep, y, uv); break;
3543
                    default: CV_Error( CV_StsBadFlag, "Unknown/unsupported color conversion code" ); break;
3544
                };
3545
            }
3546
            break;
3547
        case CV_YUV2BGR_YV12: case CV_YUV2RGB_YV12: case CV_YUV2BGRA_YV12: case CV_YUV2RGBA_YV12:
3548
        case CV_YUV2BGR_IYUV: case CV_YUV2RGB_IYUV: case CV_YUV2BGRA_IYUV: case CV_YUV2RGBA_IYUV:
3549
            {
3550
                //http://www.fourcc.org/yuv.php#YV12 == yuv420p -> It comprises an NxM Y plane followed by (N/2)x(M/2) V and U planes.
3551
                //http://www.fourcc.org/yuv.php#IYUV == I420 -> It comprises an NxN Y plane followed by (N/2)x(N/2) U and V planes
3552
3553
                if (dcn <= 0) dcn = (code==CV_YUV2BGRA_YV12 || code==CV_YUV2RGBA_YV12 || code==CV_YUV2RGBA_IYUV || code==CV_YUV2BGRA_IYUV) ? 4 : 3;
3554
                const int bIdx = (code==CV_YUV2BGR_YV12 || code==CV_YUV2BGRA_YV12 || code==CV_YUV2BGR_IYUV || code==CV_YUV2BGRA_IYUV) ? 0 : 2;
3555
                const int uIdx  = (code==CV_YUV2BGR_YV12 || code==CV_YUV2RGB_YV12 || code==CV_YUV2BGRA_YV12 || code==CV_YUV2RGBA_YV12) ? 1 : 0;
3556
3557
                CV_Assert( dcn == 3 || dcn == 4 );
3558
                CV_Assert( sz.width % 2 == 0 && sz.height % 3 == 0 && depth == CV_8U );
3559
3560
                Size dstSz(sz.width, sz.height * 2 / 3);
3561
                _dst.create(dstSz, CV_MAKETYPE(depth, dcn));
3562
                dst = _dst.getMat();
3563
3564
                int srcstep = (int)src.step;
3565
                const uchar* y = src.ptr();
3566
                const uchar* u = y + srcstep * dstSz.height;
3567
                const uchar* v = y + srcstep * (dstSz.height + dstSz.height/4) + (dstSz.width/2) * ((dstSz.height % 4)/2);
3568
3569
                int ustepIdx = 0;
3570
                int vstepIdx = dstSz.height % 4 == 2 ? 1 : 0;
3571
3572
                if(uIdx == 1) { std::swap(u ,v), std::swap(ustepIdx, vstepIdx); };
3573
3574
                switch(dcn*10 + bIdx)
3575
                {
3576
                    case 30: cvtYUV420p2RGB<0>(dst, srcstep, y, u, v, ustepIdx, vstepIdx); break;
3577
                    case 32: cvtYUV420p2RGB<2>(dst, srcstep, y, u, v, ustepIdx, vstepIdx); break;
3578
                    case 40: cvtYUV420p2RGBA<0>(dst, srcstep, y, u, v, ustepIdx, vstepIdx); break;
3579
                    case 42: cvtYUV420p2RGBA<2>(dst, srcstep, y, u, v, ustepIdx, vstepIdx); break;
3580
                    default: CV_Error( CV_StsBadFlag, "Unknown/unsupported color conversion code" ); break;
3581
                };
3582
            }
3583
            break;
3584
        case CV_YUV2GRAY_420:
3585
            {
3586
                if (dcn <= 0) dcn = 1;
3587
3588
                CV_Assert( dcn == 1 );
3589
                CV_Assert( sz.width % 2 == 0 && sz.height % 3 == 0 && depth == CV_8U );
3590
3591
                Size dstSz(sz.width, sz.height * 2 / 3);
3592
                _dst.create(dstSz, CV_MAKETYPE(depth, dcn));
3593
                dst = _dst.getMat();
3594
3595
                src(Range(0, dstSz.height), Range::all()).copyTo(dst);
3596
            }
3597
            break;
3598
        case CV_YUV2RGB_UYVY: case CV_YUV2BGR_UYVY: case CV_YUV2RGBA_UYVY: case CV_YUV2BGRA_UYVY:
3599
        case CV_YUV2RGB_YUY2: case CV_YUV2BGR_YUY2: case CV_YUV2RGB_YVYU: case CV_YUV2BGR_YVYU:
3600
        case CV_YUV2RGBA_YUY2: case CV_YUV2BGRA_YUY2: case CV_YUV2RGBA_YVYU: case CV_YUV2BGRA_YVYU:
3601
            {
3602
                //http://www.fourcc.org/yuv.php#UYVY
3603
                //http://www.fourcc.org/yuv.php#YUY2
3604
                //http://www.fourcc.org/yuv.php#YVYU
3605
3606
                if (dcn <= 0) dcn = (code==CV_YUV2RGBA_UYVY || code==CV_YUV2BGRA_UYVY || code==CV_YUV2RGBA_YUY2 || code==CV_YUV2BGRA_YUY2 || code==CV_YUV2RGBA_YVYU || code==CV_YUV2BGRA_YVYU) ? 4 : 3;
3607
                const int bIdx = (code==CV_YUV2BGR_UYVY || code==CV_YUV2BGRA_UYVY || code==CV_YUV2BGR_YUY2 || code==CV_YUV2BGRA_YUY2 || code==CV_YUV2BGR_YVYU || code==CV_YUV2BGRA_YVYU) ? 0 : 2;
3608
                const int ycn  = (code==CV_YUV2RGB_UYVY || code==CV_YUV2BGR_UYVY || code==CV_YUV2RGBA_UYVY || code==CV_YUV2BGRA_UYVY) ? 1 : 0;
3609
                const int uIdx = (code==CV_YUV2RGB_YVYU || code==CV_YUV2BGR_YVYU || code==CV_YUV2RGBA_YVYU || code==CV_YUV2BGRA_YVYU) ? 1 : 0;
3610
3611
                CV_Assert( dcn == 3 || dcn == 4 );
3612
                CV_Assert( scn == 2 && depth == CV_8U );
3613
3614
                _dst.create(sz, CV_8UC(dcn));
3615
                dst = _dst.getMat();
3616
3617
                switch(dcn*1000 + bIdx*100 + uIdx*10 + ycn)
3618
                {
3619
                    case 3000: cvtYUV422toRGB<0,0,0>(dst, (int)src.step, src.ptr<uchar>()); break;
3620
                    case 3001: cvtYUV422toRGB<0,0,1>(dst, (int)src.step, src.ptr<uchar>()); break;
3621
                    case 3010: cvtYUV422toRGB<0,1,0>(dst, (int)src.step, src.ptr<uchar>()); break;
3622
                    case 3011: cvtYUV422toRGB<0,1,1>(dst, (int)src.step, src.ptr<uchar>()); break;
3623
                    case 3200: cvtYUV422toRGB<2,0,0>(dst, (int)src.step, src.ptr<uchar>()); break;
3624
                    case 3201: cvtYUV422toRGB<2,0,1>(dst, (int)src.step, src.ptr<uchar>()); break;
3625
                    case 3210: cvtYUV422toRGB<2,1,0>(dst, (int)src.step, src.ptr<uchar>()); break;
3626
                    case 3211: cvtYUV422toRGB<2,1,1>(dst, (int)src.step, src.ptr<uchar>()); break;
3627
                    case 4000: cvtYUV422toRGBA<0,0,0>(dst, (int)src.step, src.ptr<uchar>()); break;
3628
                    case 4001: cvtYUV422toRGBA<0,0,1>(dst, (int)src.step, src.ptr<uchar>()); break;
3629
                    case 4010: cvtYUV422toRGBA<0,1,0>(dst, (int)src.step, src.ptr<uchar>()); break;
3630
                    case 4011: cvtYUV422toRGBA<0,1,1>(dst, (int)src.step, src.ptr<uchar>()); break;
3631
                    case 4200: cvtYUV422toRGBA<2,0,0>(dst, (int)src.step, src.ptr<uchar>()); break;
3632
                    case 4201: cvtYUV422toRGBA<2,0,1>(dst, (int)src.step, src.ptr<uchar>()); break;
3633
                    case 4210: cvtYUV422toRGBA<2,1,0>(dst, (int)src.step, src.ptr<uchar>()); break;
3634
                    case 4211: cvtYUV422toRGBA<2,1,1>(dst, (int)src.step, src.ptr<uchar>()); break;
3635
                    default: CV_Error( CV_StsBadFlag, "Unknown/unsupported color conversion code" ); break;
3636
                };
3637
            }
3638
            break;
3639
        case CV_YUV2GRAY_UYVY: case CV_YUV2GRAY_YUY2:
3640
            {
3641
                if (dcn <= 0) dcn = 1;
3642
3643
                CV_Assert( dcn == 1 );
3644
                CV_Assert( scn == 2 && depth == CV_8U );
3645
3646
                extractChannel(_src, _dst, code == CV_YUV2GRAY_UYVY ? 1 : 0);
3647
            }
3648
            break;
3649
        default:
3650
            CV_Error( CV_StsBadFlag, "Unknown/unsupported color conversion code" );
3651
    }
3652
}
3653
3654
CV_IMPL void
3655
cvCvtColor( const CvArr* srcarr, CvArr* dstarr, int code )
3656
{
3657
    cv::Mat src = cv::cvarrToMat(srcarr), dst0 = cv::cvarrToMat(dstarr), dst = dst0;
3658
    CV_Assert( src.depth() == dst.depth() );
3659
3660
    cv::cvtColor(src, dst, code, dst.channels());
3661
    CV_Assert( dst.data == dst0.data );
3662
}
3663
3664
3665
/* End of file. */