116 |
116 |
CV_Error( CV_StsBadArg, "Unknown threshold type" );
|
117 |
117 |
}
|
118 |
118 |
|
|
119 |
/* Optimisation for ARM NEON*/
|
|
120 |
#if defined __GNUC__ && CV_NEON
|
|
121 |
if( CPU_HAS_NEON_FEATURE )
|
|
122 |
{
|
|
123 |
uint8x16_t _x80 = vmovq_n_u8('\x80');
|
|
124 |
uint8x16_t mask = {0, 0, 0, 0, 0, 0, 0, 0, 255, 255, 255, 255, 255, 255, 255, 255};
|
|
125 |
uint8x16_t thresh_u = vmovq_n_u8(thresh);
|
|
126 |
uint8x16_t thresh_s = vmovq_n_u8(thresh ^ 0x80);
|
|
127 |
uint8x16_t maxval_ = vmovq_n_u8(maxval);
|
|
128 |
j_scalar = roi.width & -8;
|
|
129 |
|
|
130 |
for( i = 0; i < roi.height; i++ )
|
|
131 |
{
|
|
132 |
const uchar* src = (const uchar*)(_src.data + _src.step*i);
|
|
133 |
uchar* dst = (uchar*)(_dst.data + _dst.step*i);
|
|
134 |
|
|
135 |
switch( type )
|
|
136 |
{
|
|
137 |
case THRESH_BINARY:
|
|
138 |
for( j = 0; j <= roi.width - 32; j += 32 )
|
|
139 |
{
|
|
140 |
uint8x16_t v0, v1;
|
|
141 |
v0 = vld1q_u8( (uint8_t const*)(src + j) );
|
|
142 |
v1 = vld1q_u8( (uint8_t const*)(src + j + 16) );
|
|
143 |
v0 = vcgtq_s8( veorq_u8(v0, _x80), thresh_s );
|
|
144 |
v1 = vcgtq_s8( veorq_u8(v1, _x80), thresh_s );
|
|
145 |
v0 = vandq_u8( v0, maxval_ );
|
|
146 |
v1 = vandq_u8( v1, maxval_ );
|
|
147 |
vst1q_u8( (uint8_t*)(dst + j), v0 );
|
|
148 |
vst1q_u8( (uint8_t*)(dst + j + 16), v1 );
|
|
149 |
}
|
|
150 |
|
|
151 |
for( ; j <= roi.width - 8; j += 8 )
|
|
152 |
{
|
|
153 |
uint8x16_t v0 = vld1q_u8( (const uint8_t*)(src + j) );
|
|
154 |
v0 = vandq_u8( v0, mask );
|
|
155 |
v0 = vcgtq_s8( veorq_u8(v0, _x80), thresh_s );
|
|
156 |
v0 = vandq_u8( v0, maxval_ );
|
|
157 |
vst1q_u8( (uint8_t*)(dst + j), v0 );
|
|
158 |
v0 = vandq_u8( v0, mask );
|
|
159 |
}
|
|
160 |
break;
|
|
161 |
|
|
162 |
case THRESH_BINARY_INV:
|
|
163 |
for( j = 0; j <= roi.width - 32; j += 32 )
|
|
164 |
{
|
|
165 |
uint8x16_t v0, v1;
|
|
166 |
v0 = vld1q_u8( (uint8_t const*)(src + j) );
|
|
167 |
v1 = vld1q_u8( (uint8_t const*)(src + j + 16) );
|
|
168 |
v0 = vcgtq_s8( veorq_u8(v0, _x80), thresh_s );
|
|
169 |
v1 = vcgtq_s8( veorq_u8(v1, _x80), thresh_s );
|
|
170 |
v0 = vandq_u8( vmvnq_u8( v0 ), maxval_ );
|
|
171 |
v1 = vandq_u8( vmvnq_u8( v1 ), maxval_ );
|
|
172 |
vst1q_u8( (uint8_t*)(dst + j), v0 );
|
|
173 |
vst1q_u8( (uint8_t*)(dst + j + 16), v1 );
|
|
174 |
}
|
|
175 |
|
|
176 |
for( ; j <= roi.width - 8; j += 8 )
|
|
177 |
{
|
|
178 |
uint8x16_t v0 = vld1q_u8( (const uint8_t*)(src + j) );
|
|
179 |
v0 = vandq_u8( v0, mask );
|
|
180 |
v0 = vcgtq_s8( veorq_u8(v0, _x80), thresh_s );
|
|
181 |
v0 = vandq_u8( vmvnq_u8( v0 ), maxval_ );
|
|
182 |
vst1q_u8( (uint8_t*)(dst + j), v0 );
|
|
183 |
v0 = vandq_u8( v0, mask );
|
|
184 |
}
|
|
185 |
break;
|
|
186 |
|
|
187 |
case THRESH_TRUNC:
|
|
188 |
for( j = 0; j <= roi.width - 32; j += 32 )
|
|
189 |
{
|
|
190 |
uint8x16_t v0, v1;
|
|
191 |
v0 = vld1q_u8( (uint8_t const*)(src + j) );
|
|
192 |
v1 = vld1q_u8( (uint8_t const*)(src + j + 16) );
|
|
193 |
v0 = vqsubq_u8( v0, vqsubq_u8( v0, thresh_u ) );
|
|
194 |
v1 = vqsubq_u8( v1, vqsubq_u8( v1, thresh_u ) );
|
|
195 |
vst1q_u8( (uint8_t*)(dst + j), v0 );
|
|
196 |
vst1q_u8( (uint8_t*)(dst + j + 16), v1 );
|
|
197 |
}
|
|
198 |
|
|
199 |
for( ; j <= roi.width - 8; j += 8 )
|
|
200 |
{
|
|
201 |
uint8x16_t v0 = vld1q_u8( (const uint8_t*)(src + j) );
|
|
202 |
v0 = vandq_u8( v0, mask );
|
|
203 |
v0 = vqsubq_u8( v0, vqsubq_u8( v0, thresh_u ) );
|
|
204 |
vst1q_u8( (uint8_t*)(dst + j), v0 );
|
|
205 |
v0 = vandq_u8( v0, mask );
|
|
206 |
}
|
|
207 |
break;
|
|
208 |
|
|
209 |
case THRESH_TOZERO:
|
|
210 |
for( j = 0; j <= roi.width - 32; j += 32 )
|
|
211 |
{
|
|
212 |
uint8x16_t v0, v1;
|
|
213 |
v0 = vld1q_u8( (uint8_t const*)(src + j) );
|
|
214 |
v1 = vld1q_u8( (uint8_t const*)(src + j + 16) );
|
|
215 |
v0 = vandq_u8( v0, vcgtq_s8( veorq_u8(v0, _x80), thresh_s ) );
|
|
216 |
v1 = vandq_u8( v1, vcgtq_s8( veorq_u8(v1, _x80), thresh_s ) );
|
|
217 |
vst1q_u8( (uint8_t*)(dst + j), v0 );
|
|
218 |
vst1q_u8( (uint8_t*)(dst + j + 16), v1 );
|
|
219 |
}
|
|
220 |
|
|
221 |
for( ; j <= roi.width - 8; j += 8 )
|
|
222 |
{
|
|
223 |
uint8x16_t v0 = vld1q_u8( (const uint8_t*)(src + j) );
|
|
224 |
v0 = vandq_u8( v0, mask );
|
|
225 |
v0 = vandq_u8( v0, vcgtq_s8( veorq_u8(v0, _x80), thresh_s ) );
|
|
226 |
vst1q_u8( (uint8_t*)(dst + j), v0 );
|
|
227 |
v0 = vandq_u8( v0, mask );
|
|
228 |
}
|
|
229 |
break;
|
|
230 |
|
|
231 |
case THRESH_TOZERO_INV:
|
|
232 |
for( j = 0; j <= roi.width - 32; j += 32 )
|
|
233 |
{
|
|
234 |
uint8x16_t v0, v1;
|
|
235 |
v0 = vld1q_u8( (uint8_t const*)(src + j) );
|
|
236 |
v1 = vld1q_u8( (uint8_t const*)(src + j + 16) );
|
|
237 |
v0 = vandq_u8( vmvnq_u8( vcgtq_s8( veorq_u8(v0, _x80), thresh_s ) ), v0 );
|
|
238 |
v1 = vandq_u8( vmvnq_u8( vcgtq_s8( veorq_u8(v1, _x80), thresh_s ) ), v1 );
|
|
239 |
vst1q_u8( (uint8_t*)(dst + j), v0 );
|
|
240 |
vst1q_u8( (uint8_t*)(dst + j + 16), v1 );
|
|
241 |
}
|
|
242 |
|
|
243 |
for( ; j <= roi.width - 8; j += 8 )
|
|
244 |
{
|
|
245 |
uint8x16_t v0 = vld1q_u8( (const uint8_t*)(src + j) );
|
|
246 |
v0 = vandq_u8( v0, mask );
|
|
247 |
v0 = vandq_u8( vmvnq_u8( vcgtq_s8( veorq_u8(v0, _x80), thresh_s ) ), v0 );
|
|
248 |
vst1q_u8( (uint8_t*)(dst + j), v0 );
|
|
249 |
v0 = vandq_u8( v0, mask );
|
|
250 |
}
|
|
251 |
break;
|
|
252 |
|
|
253 |
}
|
|
254 |
}
|
|
255 |
}
|
|
256 |
#endif
|
|
257 |
|
119 |
258 |
#if CV_SSE2
|
120 |
259 |
if( checkHardwareSupport(CV_CPU_SSE2) )
|
121 |
260 |
{
|